45 files changed, 3846 insertions, 490 deletions
diff --git a/.travis.yml b/.travis.yml
index aeb9b211cd..279658b116 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -31,7 +31,7 @@ addons:
       - libseccomp-dev
       - libspice-protocol-dev
       - libspice-server-dev
-      - libssh2-1-dev
+      - libssh-dev
       - liburcu-dev
       - libusb-1.0-0-dev
       - libvte-2.91-dev
@@ -270,7 +270,7 @@ matrix:
             - libseccomp-dev
             - libspice-protocol-dev
             - libspice-server-dev
-            - libssh2-1-dev
+            - libssh-dev
             - liburcu-dev
             - libusb-1.0-0-dev
             - libvte-2.91-dev
diff --git a/block/Makefile.objs b/block/Makefile.objs
index dbd1522722..35f3bca4d9 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -31,7 +31,7 @@ block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
 block-obj-$(CONFIG_VXHS) += vxhs.o
-block-obj-$(CONFIG_LIBSSH2) += ssh.o
+block-obj-$(CONFIG_LIBSSH) += ssh.o
 block-obj-y += accounting.o dirty-bitmap.o
 block-obj-y += write-threshold.o
 block-obj-y += backup.o
@@ -52,8 +52,8 @@ rbd.o-libs         := $(RBD_LIBS)
 gluster.o-cflags   := $(GLUSTERFS_CFLAGS)
 gluster.o-libs     := $(GLUSTERFS_LIBS)
 vxhs.o-libs        := $(VXHS_LIBS)
-ssh.o-cflags       := $(LIBSSH2_CFLAGS)
-ssh.o-libs         := $(LIBSSH2_LIBS)
+ssh.o-cflags       := $(LIBSSH_CFLAGS)
+ssh.o-libs         := $(LIBSSH_LIBS)
 block-obj-dmg-bz2-$(CONFIG_BZIP2) += dmg-bz2.o
 block-obj-$(if $(CONFIG_DMG),m,n) += $(block-obj-dmg-bz2-y)
 dmg-bz2.o-libs     := $(BZIP2_LIBS)
diff --git a/block/ssh.c b/block/ssh.c
index 6da7b9cbfe..501933b855 100644
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -24,8 +24,8 @@
 
 #include "qemu/osdep.h"
 
-#include <libssh2.h>
-#include <libssh2_sftp.h>
+#include <libssh/libssh.h>
+#include <libssh/sftp.h>
 
 #include "block/block_int.h"
 #include "block/qdict.h"
@@ -46,13 +46,11 @@
 #include "trace.h"
 
 /*
- * TRACE_LIBSSH2=<bitmask> enables tracing in libssh2 itself.  Note
- * that this requires that libssh2 was specially compiled with the
- * `./configure --enable-debug' option, so most likely you will have
- * to compile it yourself.  The meaning of <bitmask> is described
- * here: http://www.libssh2.org/libssh2_trace.html
+ * TRACE_LIBSSH=<level> enables tracing in libssh itself.
+ * The meaning of <level> is described here:
+ * http://api.libssh.org/master/group__libssh__log.html
  */
-#define TRACE_LIBSSH2 0 /* or try: LIBSSH2_TRACE_SFTP */
+#define TRACE_LIBSSH  0 /* see: SSH_LOG_* */
 
 typedef struct BDRVSSHState {
     /* Coroutine. */
@@ -60,18 +58,15 @@ typedef struct BDRVSSHState {
 
     /* SSH connection. */
     int sock;                         /* socket */
-    LIBSSH2_SESSION *session;         /* ssh session */
-    LIBSSH2_SFTP *sftp;               /* sftp session */
-    LIBSSH2_SFTP_HANDLE *sftp_handle; /* sftp remote file handle */
+    ssh_session session;              /* ssh session */
+    sftp_session sftp;                /* sftp session */
+    sftp_file sftp_handle;            /* sftp remote file handle */
 
-    /* See ssh_seek() function below. */
-    int64_t offset;
-    bool offset_op_read;
-
-    /* File attributes at open.  We try to keep the .filesize field
+    /*
+     * File attributes at open.  We try to keep the .size field
      * updated if it changes (eg by writing at the end of the file).
      */
-    LIBSSH2_SFTP_ATTRIBUTES attrs;
+    sftp_attributes attrs;
 
     InetSocketAddress *inet;
 
@@ -91,7 +86,6 @@ static void ssh_state_init(BDRVSSHState *s)
 {
     memset(s, 0, sizeof *s);
     s->sock = -1;
-    s->offset = -1;
     qemu_co_mutex_init(&s->lock);
 }
 
@@ -99,20 +93,18 @@ static void ssh_state_free(BDRVSSHState *s)
 {
     g_free(s->user);
 
+    if (s->attrs) {
+        sftp_attributes_free(s->attrs);
+    }
     if (s->sftp_handle) {
-        libssh2_sftp_close(s->sftp_handle);
+        sftp_close(s->sftp_handle);
     }
     if (s->sftp) {
-        libssh2_sftp_shutdown(s->sftp);
+        sftp_free(s->sftp);
     }
     if (s->session) {
-        libssh2_session_disconnect(s->session,
-                                   "from qemu ssh client: "
-                                   "user closed the connection");
-        libssh2_session_free(s->session);
-    }
-    if (s->sock >= 0) {
-        close(s->sock);
+        ssh_disconnect(s->session);
+        ssh_free(s->session); /* This frees s->sock */
     }
 }
 
@@ -127,13 +119,13 @@ session_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
     va_end(args);
 
     if (s->session) {
-        char *ssh_err;
+        const char *ssh_err;
         int ssh_err_code;
 
-        /* This is not an errno.  See <libssh2.h>. */
-        ssh_err_code = libssh2_session_last_error(s->session,
-                                                  &ssh_err, NULL, 0);
-        error_setg(errp, "%s: %s (libssh2 error code: %d)",
+        /* This is not an errno.  See <libssh/libssh.h>. */
+        ssh_err = ssh_get_error(s->session);
+        ssh_err_code = ssh_get_error_code(s->session);
+        error_setg(errp, "%s: %s (libssh error code: %d)",
                    msg, ssh_err, ssh_err_code);
     } else {
         error_setg(errp, "%s", msg);
@@ -152,18 +144,18 @@ sftp_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
     va_end(args);
 
     if (s->sftp) {
-        char *ssh_err;
+        const char *ssh_err;
         int ssh_err_code;
-        unsigned long sftp_err_code;
+        int sftp_err_code;
 
-        /* This is not an errno.  See <libssh2.h>. */
-        ssh_err_code = libssh2_session_last_error(s->session,
-                                                  &ssh_err, NULL, 0);
-        /* See <libssh2_sftp.h>. */
-        sftp_err_code = libssh2_sftp_last_error((s)->sftp);
+        /* This is not an errno.  See <libssh/libssh.h>. */
+        ssh_err = ssh_get_error(s->session);
+        ssh_err_code = ssh_get_error_code(s->session);
+        /* See <libssh/sftp.h>. */
+        sftp_err_code = sftp_get_error(s->sftp);
 
         error_setg(errp,
-                   "%s: %s (libssh2 error code: %d, sftp error code: %lu)",
+                   "%s: %s (libssh error code: %d, sftp error code: %d)",
                    msg, ssh_err, ssh_err_code, sftp_err_code);
     } else {
         error_setg(errp, "%s", msg);
@@ -173,15 +165,15 @@ sftp_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
 
 static void sftp_error_trace(BDRVSSHState *s, const char *op)
 {
-    char *ssh_err;
+    const char *ssh_err;
     int ssh_err_code;
-    unsigned long sftp_err_code;
+    int sftp_err_code;
 
-    /* This is not an errno.  See <libssh2.h>. */
-    ssh_err_code = libssh2_session_last_error(s->session,
-                                              &ssh_err, NULL, 0);
-    /* See <libssh2_sftp.h>. */
-    sftp_err_code = libssh2_sftp_last_error((s)->sftp);
+    /* This is not an errno.  See <libssh/libssh.h>. */
+    ssh_err = ssh_get_error(s->session);
+    ssh_err_code = ssh_get_error_code(s->session);
+    /* See <libssh/sftp.h>. */
+    sftp_err_code = sftp_get_error(s->sftp);
 
     trace_sftp_error(op, ssh_err, ssh_err_code, sftp_err_code);
 }
@@ -282,82 +274,120 @@ static void ssh_parse_filename(const char *filename, QDict *options,
     parse_uri(filename, options, errp);
 }
 
-static int check_host_key_knownhosts(BDRVSSHState *s,
-                                     const char *host, int port, Error **errp)
+static int check_host_key_knownhosts(BDRVSSHState *s, Error **errp)
 {
-    const char *home;
-    char *knh_file = NULL;
-    LIBSSH2_KNOWNHOSTS *knh = NULL;
-    struct libssh2_knownhost *found;
-    int ret, r;
-    const char *hostkey;
-    size_t len;
-    int type;
-
-    hostkey = libssh2_session_hostkey(s->session, &len, &type);
-    if (!hostkey) {
+    int ret;
+#ifdef HAVE_LIBSSH_0_8
+    enum ssh_known_hosts_e state;
+    int r;
+    ssh_key pubkey;
+    enum ssh_keytypes_e pubkey_type;
+    unsigned char *server_hash = NULL;
+    size_t server_hash_len;
+    char *fingerprint = NULL;
+
+    state = ssh_session_is_known_server(s->session);
+    trace_ssh_server_status(state);
+
+    switch (state) {
+    case SSH_KNOWN_HOSTS_OK:
+        /* OK */
+        trace_ssh_check_host_key_knownhosts();
+        break;
+    case SSH_KNOWN_HOSTS_CHANGED:
         ret = -EINVAL;
-        session_error_setg(errp, s, "failed to read remote host key");
+        r = ssh_get_server_publickey(s->session, &pubkey);
+        if (r == 0) {
+            r = ssh_get_publickey_hash(pubkey, SSH_PUBLICKEY_HASH_SHA256,
+                                       &server_hash, &server_hash_len);
+            pubkey_type = ssh_key_type(pubkey);
+            ssh_key_free(pubkey);
+        }
+        if (r == 0) {
+            fingerprint = ssh_get_fingerprint_hash(SSH_PUBLICKEY_HASH_SHA256,
+                                                   server_hash,
+                                                   server_hash_len);
+            ssh_clean_pubkey_hash(&server_hash);
+        }
+        if (fingerprint) {
+            error_setg(errp,
+                       "host key (%s key with fingerprint %s) does not match "
+                       "the one in known_hosts; this may be a possible attack",
+                       ssh_key_type_to_char(pubkey_type), fingerprint);
+            ssh_string_free_char(fingerprint);
+        } else  {
+            error_setg(errp,
+                       "host key does not match the one in known_hosts; this "
+                       "may be a possible attack");
+        }
         goto out;
-    }
-
-    knh = libssh2_knownhost_init(s->session);
-    if (!knh) {
+    case SSH_KNOWN_HOSTS_OTHER:
         ret = -EINVAL;
-        session_error_setg(errp, s,
-                           "failed to initialize known hosts support");
+        error_setg(errp,
+                   "host key for this server not found, another type exists");
+        goto out;
+    case SSH_KNOWN_HOSTS_UNKNOWN:
+        ret = -EINVAL;
+        error_setg(errp, "no host key was found in known_hosts");
+        goto out;
+    case SSH_KNOWN_HOSTS_NOT_FOUND:
+        ret = -ENOENT;
+        error_setg(errp, "known_hosts file not found");
+        goto out;
+    case SSH_KNOWN_HOSTS_ERROR:
+        ret = -EINVAL;
+        error_setg(errp, "error while checking the host");
+        goto out;
+    default:
+        ret = -EINVAL;
+        error_setg(errp, "error while checking for known server (%d)", state);
         goto out;
     }
+#else /* !HAVE_LIBSSH_0_8 */
+    int state;
 
-    home = getenv("HOME");
-    if (home) {
-        knh_file = g_strdup_printf("%s/.ssh/known_hosts", home);
-    } else {
-        knh_file = g_strdup_printf("/root/.ssh/known_hosts");
-    }
-
-    /* Read all known hosts from OpenSSH-style known_hosts file. */
-    libssh2_knownhost_readfile(knh, knh_file, LIBSSH2_KNOWNHOST_FILE_OPENSSH);
+    state = ssh_is_server_known(s->session);
+    trace_ssh_server_status(state);
 
-    r = libssh2_knownhost_checkp(knh, host, port, hostkey, len,
-                                 LIBSSH2_KNOWNHOST_TYPE_PLAIN|
-                                 LIBSSH2_KNOWNHOST_KEYENC_RAW,
-                                 &found);
-    switch (r) {
-    case LIBSSH2_KNOWNHOST_CHECK_MATCH:
+    switch (state) {
+    case SSH_SERVER_KNOWN_OK:
         /* OK */
-        trace_ssh_check_host_key_knownhosts(found->key);
+        trace_ssh_check_host_key_knownhosts();
         break;
-    case LIBSSH2_KNOWNHOST_CHECK_MISMATCH:
+    case SSH_SERVER_KNOWN_CHANGED:
         ret = -EINVAL;
-        session_error_setg(errp, s,
-                      "host key does not match the one in known_hosts"
-                      " (found key %s)", found->key);
+        error_setg(errp,
+                   "host key does not match the one in known_hosts; this "
+                   "may be a possible attack");
         goto out;
-    case LIBSSH2_KNOWNHOST_CHECK_NOTFOUND:
+    case SSH_SERVER_FOUND_OTHER:
         ret = -EINVAL;
-        session_error_setg(errp, s, "no host key was found in known_hosts");
+        error_setg(errp,
+                   "host key for this server not found, another type exists");
+        goto out;
+    case SSH_SERVER_FILE_NOT_FOUND:
+        ret = -ENOENT;
+        error_setg(errp, "known_hosts file not found");
         goto out;
-    case LIBSSH2_KNOWNHOST_CHECK_FAILURE:
+    case SSH_SERVER_NOT_KNOWN:
         ret = -EINVAL;
-        session_error_setg(errp, s,
-                      "failure matching the host key with known_hosts");
+        error_setg(errp, "no host key was found in known_hosts");
+        goto out;
+    case SSH_SERVER_ERROR:
+        ret = -EINVAL;
+        error_setg(errp, "server error");
         goto out;
     default:
         ret = -EINVAL;
-        session_error_setg(errp, s, "unknown error matching the host key"
-                      " with known_hosts (%d)", r);
+        error_setg(errp, "error while checking for known server (%d)", state);
         goto out;
     }
+#endif /* !HAVE_LIBSSH_0_8 */
 
     /* known_hosts checking successful. */
     ret = 0;
 
  out:
-    if (knh != NULL) {
-        libssh2_knownhost_free(knh);
-    }
-    g_free(knh_file);
     return ret;
 }
 
@@ -401,18 +431,34 @@ static int compare_fingerprint(const unsigned char *fingerprint, size_t len,
 
 static int
 check_host_key_hash(BDRVSSHState *s, const char *hash,
-                    int hash_type, size_t fingerprint_len, Error **errp)
+                    enum ssh_publickey_hash_type type, Error **errp)
 {
-    const char *fingerprint;
-
-    fingerprint = libssh2_hostkey_hash(s->session, hash_type);
-    if (!fingerprint) {
+    int r;
+    ssh_key pubkey;
+    unsigned char *server_hash;
+    size_t server_hash_len;
+
+#ifdef HAVE_LIBSSH_0_8
+    r = ssh_get_server_publickey(s->session, &pubkey);
+#else
+    r = ssh_get_publickey(s->session, &pubkey);
+#endif
+    if (r != SSH_OK) {
         session_error_setg(errp, s, "failed to read remote host key");
         return -EINVAL;
     }
 
-    if(compare_fingerprint((unsigned char *) fingerprint, fingerprint_len,
-                           hash) != 0) {
+    r = ssh_get_publickey_hash(pubkey, type, &server_hash, &server_hash_len);
+    ssh_key_free(pubkey);
+    if (r != 0) {
+        session_error_setg(errp, s,
+                           "failed reading the hash of the server SSH key");
+        return -EINVAL;
+    }
+
+    r = compare_fingerprint(server_hash, server_hash_len, hash);
+    ssh_clean_pubkey_hash(&server_hash);
+    if (r != 0) {
         error_setg(errp, "remote host key does not match host_key_check '%s'",
                    hash);
         return -EPERM;
@@ -421,8 +467,7 @@ check_host_key_hash(BDRVSSHState *s, const char *hash,
     return 0;
 }
 
-static int check_host_key(BDRVSSHState *s, const char *host, int port,
-                          SshHostKeyCheck *hkc, Error **errp)
+static int check_host_key(BDRVSSHState *s, SshHostKeyCheck *hkc, Error **errp)
 {
     SshHostKeyCheckMode mode;
 
@@ -438,15 +483,15 @@ static int check_host_key(BDRVSSHState *s, const char *host, int port,
     case SSH_HOST_KEY_CHECK_MODE_HASH:
         if (hkc->u.hash.type == SSH_HOST_KEY_CHECK_HASH_TYPE_MD5) {
             return check_host_key_hash(s, hkc->u.hash.hash,
-                                       LIBSSH2_HOSTKEY_HASH_MD5, 16, errp);
+                                       SSH_PUBLICKEY_HASH_MD5, errp);
         } else if (hkc->u.hash.type == SSH_HOST_KEY_CHECK_HASH_TYPE_SHA1) {
             return check_host_key_hash(s, hkc->u.hash.hash,
-                                       LIBSSH2_HOSTKEY_HASH_SHA1, 20, errp);
+                                       SSH_PUBLICKEY_HASH_SHA1, errp);
         }
         g_assert_not_reached();
         break;
     case SSH_HOST_KEY_CHECK_MODE_KNOWN_HOSTS:
-        return check_host_key_knownhosts(s, host, port, errp);
+        return check_host_key_knownhosts(s, errp);
     default:
         g_assert_not_reached();
     }
@@ -454,60 +499,43 @@ static int check_host_key(BDRVSSHState *s, const char *host, int port,
     return -EINVAL;
 }
 
-static int authenticate(BDRVSSHState *s, const char *user, Error **errp)
+static int authenticate(BDRVSSHState *s, Error **errp)
 {
     int r, ret;
-    const char *userauthlist;
-    LIBSSH2_AGENT *agent = NULL;
-    struct libssh2_agent_publickey *identity;
-    struct libssh2_agent_publickey *prev_identity = NULL;
+    int method;
 
-    userauthlist = libssh2_userauth_list(s->session, user, strlen(user));
-    if (strstr(userauthlist, "publickey") == NULL) {
+    /* Try to authenticate with the "none" method. */
+    r = ssh_userauth_none(s->session, NULL);
+    if (r == SSH_AUTH_ERROR) {
         ret = -EPERM;
-        error_setg(errp,
-                "remote server does not support \"publickey\" authentication");
+        session_error_setg(errp, s, "failed to authenticate using none "
+                                    "authentication");
         goto out;
-    }
-
-    /* Connect to ssh-agent and try each identity in turn. */
-    agent = libssh2_agent_init(s->session);
-    if (!agent) {
-        ret = -EINVAL;
-        session_error_setg(errp, s, "failed to initialize ssh-agent support");
-        goto out;
-    }
-    if (libssh2_agent_connect(agent)) {
-        ret = -ECONNREFUSED;
-        session_error_setg(errp, s, "failed to connect to ssh-agent");
-        goto out;
-    }
-    if (libssh2_agent_list_identities(agent)) {
-        ret = -EINVAL;
-        session_error_setg(errp, s,
-                           "failed requesting identities from ssh-agent");
+    } else if (r == SSH_AUTH_SUCCESS) {
+        /* Authenticated! */
+        ret = 0;
         goto out;
     }
 
-    for(;;) {
-        r = libssh2_agent_get_identity(agent, &identity, prev_identity);
-        if (r == 1) {           /* end of list */
-            break;
-        }
-        if (r < 0) {
+    method = ssh_userauth_list(s->session, NULL);
+    trace_ssh_auth_methods(method);
+
+    /*
+     * Try to authenticate with publickey, using the ssh-agent
+     * if available.
+     */
+    if (method & SSH_AUTH_METHOD_PUBLICKEY) {
+        r = ssh_userauth_publickey_auto(s->session, NULL, NULL);
+        if (r == SSH_AUTH_ERROR) {
             ret = -EINVAL;
-            session_error_setg(errp, s,
-                               "failed to obtain identity from ssh-agent");
+            session_error_setg(errp, s, "failed to authenticate using "
+                                        "publickey authentication");
             goto out;
-        }
-        r = libssh2_agent_userauth(agent, user, identity);
-        if (r == 0) {
+        } else if (r == SSH_AUTH_SUCCESS) {
             /* Authenticated! */
             ret = 0;
             goto out;
         }
-        /* Failed to authenticate with this identity, try the next one. */
-        prev_identity = identity;
     }
 
     ret = -EPERM;
@@ -515,13 +543,6 @@ static int authenticate(BDRVSSHState *s, const char *user, Error **errp)
                "and the identities held by your ssh-agent");
 
  out:
-    if (agent != NULL) {
-        /* Note: libssh2 implementation implicitly calls
-         * libssh2_agent_disconnect if necessary.
-         */
-        libssh2_agent_free(agent);
-    }
-
     return ret;
 }
 
@@ -640,7 +661,8 @@ static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts,
                           int ssh_flags, int creat_mode, Error **errp)
 {
     int r, ret;
-    long port = 0;
+    unsigned int port = 0;
+    int new_sock = -1;
 
     if (opts->has_user) {
         s->user = g_strdup(opts->user);
@@ -657,71 +679,147 @@ static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts,
     s->inet = opts->server;
     opts->server = NULL;
 
-    if (qemu_strtol(s->inet->port, NULL, 10, &port) < 0) {
+    if (qemu_strtoui(s->inet->port, NULL, 10, &port) < 0) {
         error_setg(errp, "Use only numeric port value");
         ret = -EINVAL;
         goto err;
     }
 
     /* Open the socket and connect. */
-    s->sock = inet_connect_saddr(s->inet, errp);
-    if (s->sock < 0) {
+    new_sock = inet_connect_saddr(s->inet, errp);
+    if (new_sock < 0) {
         ret = -EIO;
         goto err;
     }
 
+    /*
+     * Try to disable the Nagle algorithm on TCP sockets to reduce latency,
+     * but do not fail if it cannot be disabled.
+     */
+    r = socket_set_nodelay(new_sock);
+    if (r < 0) {
+        warn_report("can't set TCP_NODELAY for the ssh server %s: %s",
+                    s->inet->host, strerror(errno));
+    }
+
     /* Create SSH session. */
-    s->session = libssh2_session_init();
+    s->session = ssh_new();
     if (!s->session) {
         ret = -EINVAL;
-        session_error_setg(errp, s, "failed to initialize libssh2 session");
+        session_error_setg(errp, s, "failed to initialize libssh session");
         goto err;
     }
 
-#if TRACE_LIBSSH2 != 0
-    libssh2_trace(s->session, TRACE_LIBSSH2);
-#endif
+    /*
+     * Make sure we are in blocking mode during the connection and
+     * authentication phases.
+     */
+    ssh_set_blocking(s->session, 1);
 
-    r = libssh2_session_handshake(s->session, s->sock);
-    if (r != 0) {
+    r = ssh_options_set(s->session, SSH_OPTIONS_USER, s->user);
+    if (r < 0) {
+        ret = -EINVAL;
+        session_error_setg(errp, s,
+                           "failed to set the user in the libssh session");
+        goto err;
+    }
+
+    r = ssh_options_set(s->session, SSH_OPTIONS_HOST, s->inet->host);
+    if (r < 0) {
+        ret = -EINVAL;
+        session_error_setg(errp, s,
+                           "failed to set the host in the libssh session");
+        goto err;
+    }
+
+    if (port > 0) {
+        r = ssh_options_set(s->session, SSH_OPTIONS_PORT, &port);
+        if (r < 0) {
+            ret = -EINVAL;
+            session_error_setg(errp, s,
+                               "failed to set the port in the libssh session");
+            goto err;
+        }
+    }
+
+    r = ssh_options_set(s->session, SSH_OPTIONS_COMPRESSION, "none");
+    if (r < 0) {
+        ret = -EINVAL;
+        session_error_setg(errp, s,
+                           "failed to disable the compression in the libssh "
+                           "session");
+        goto err;
+    }
+
+    /* Read ~/.ssh/config. */
+    r = ssh_options_parse_config(s->session, NULL);
+    if (r < 0) {
+        ret = -EINVAL;
+        session_error_setg(errp, s, "failed to parse ~/.ssh/config");
+        goto err;
+    }
+
+    r = ssh_options_set(s->session, SSH_OPTIONS_FD, &new_sock);
+    if (r < 0) {
+        ret = -EINVAL;
+        session_error_setg(errp, s,
+                           "failed to set the socket in the libssh session");
+        goto err;
+    }
+    /* libssh took ownership of the socket. */
+    s->sock = new_sock;
+    new_sock = -1;
+
+    /* Connect. */
+    r = ssh_connect(s->session);
+    if (r != SSH_OK) {
         ret = -EINVAL;
         session_error_setg(errp, s, "failed to establish SSH session");
         goto err;
     }
 
     /* Check the remote host's key against known_hosts. */
-    ret = check_host_key(s, s->inet->host, port, opts->host_key_check, errp);
+    ret = check_host_key(s, opts->host_key_check, errp);
     if (ret < 0) {
         goto err;
     }
 
     /* Authenticate. */
-    ret = authenticate(s, s->user, errp);
+    ret = authenticate(s, errp);
     if (ret < 0) {
         goto err;
     }
 
     /* Start SFTP. */
-    s->sftp = libssh2_sftp_init(s->session);
+    s->sftp = sftp_new(s->session);
     if (!s->sftp) {
-        session_error_setg(errp, s, "failed to initialize sftp handle");
+        session_error_setg(errp, s, "failed to create sftp handle");
+        ret = -EINVAL;
+        goto err;
+    }
+
+    r = sftp_init(s->sftp);
+    if (r < 0) {
+        sftp_error_setg(errp, s, "failed to initialize sftp handle");
         ret = -EINVAL;
         goto err;
     }
 
     /* Open the remote file. */
     trace_ssh_connect_to_ssh(opts->path, ssh_flags, creat_mode);
-    s->sftp_handle = libssh2_sftp_open(s->sftp, opts->path, ssh_flags,
-                                       creat_mode);
+    s->sftp_handle = sftp_open(s->sftp, opts->path, ssh_flags, creat_mode);
     if (!s->sftp_handle) {
-        session_error_setg(errp, s, "failed to open remote file '%s'",
-                           opts->path);
+        sftp_error_setg(errp, s, "failed to open remote file '%s'",
+                        opts->path);
         ret = -EINVAL;
         goto err;
     }
 
-    r = libssh2_sftp_fstat(s->sftp_handle, &s->attrs);
-    if (r < 0) {
+    /* Make sure the SFTP file is handled in blocking mode. */
+    sftp_file_set_blocking(s->sftp_handle);
+
+    s->attrs = sftp_fstat(s->sftp_handle);
+    if (!s->attrs) {
         sftp_error_setg(errp, s, "failed to read file attributes");
         return -EINVAL;
     }
@@ -729,21 +827,27 @@ static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts,
     return 0;
 
  err:
+    if (s->attrs) {
+        sftp_attributes_free(s->attrs);
+    }
+    s->attrs = NULL;
     if (s->sftp_handle) {
-        libssh2_sftp_close(s->sftp_handle);
+        sftp_close(s->sftp_handle);
     }
     s->sftp_handle = NULL;
     if (s->sftp) {
-        libssh2_sftp_shutdown(s->sftp);
+        sftp_free(s->sftp);
     }
     s->sftp = NULL;
     if (s->session) {
-        libssh2_session_disconnect(s->session,
-                                   "from qemu ssh client: "
-                                   "error opening connection");
-        libssh2_session_free(s->session);
+        ssh_disconnect(s->session);
+        ssh_free(s->session);
     }
     s->session = NULL;
+    s->sock = -1;
+    if (new_sock >= 0) {
+        close(new_sock);
+    }
 
     return ret;
 }
@@ -758,9 +862,11 @@ static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags,
 
     ssh_state_init(s);
 
-    ssh_flags = LIBSSH2_FXF_READ;
+    ssh_flags = 0;
     if (bdrv_flags & BDRV_O_RDWR) {
-        ssh_flags |= LIBSSH2_FXF_WRITE;
+        ssh_flags |= O_RDWR;
+    } else {
+        ssh_flags |= O_RDONLY;
     }
 
     opts = ssh_parse_options(options, errp);
@@ -775,18 +881,13 @@ static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags,
     }
 
     /* Go non-blocking. */
-    libssh2_session_set_blocking(s->session, 0);
+    ssh_set_blocking(s->session, 0);
 
     qapi_free_BlockdevOptionsSsh(opts);
 
     return 0;
 
  err:
-    if (s->sock >= 0) {
-        close(s->sock);
-    }
-    s->sock = -1;
-
     qapi_free_BlockdevOptionsSsh(opts);
 
     return ret;
@@ -797,25 +898,25 @@ static int ssh_grow_file(BDRVSSHState *s, int64_t offset, Error **errp)
 {
     ssize_t ret;
     char c[1] = { '\0' };
-    int was_blocking = libssh2_session_get_blocking(s->session);
+    int was_blocking = ssh_is_blocking(s->session);
 
     /* offset must be strictly greater than the current size so we do
      * not overwrite anything */
-    assert(offset > 0 && offset > s->attrs.filesize);
+    assert(offset > 0 && offset > s->attrs->size);
 
-    libssh2_session_set_blocking(s->session, 1);
+    ssh_set_blocking(s->session, 1);
 
-    libssh2_sftp_seek64(s->sftp_handle, offset - 1);
-    ret = libssh2_sftp_write(s->sftp_handle, c, 1);
+    sftp_seek64(s->sftp_handle, offset - 1);
+    ret = sftp_write(s->sftp_handle, c, 1);
 
-    libssh2_session_set_blocking(s->session, was_blocking);
+    ssh_set_blocking(s->session, was_blocking);
 
     if (ret < 0) {
         sftp_error_setg(errp, s, "Failed to grow file");
         return -EIO;
     }
 
-    s->attrs.filesize = offset;
+    s->attrs->size = offset;
     return 0;
 }
 
@@ -843,8 +944,7 @@ static int ssh_co_create(BlockdevCreateOptions *options, Error **errp)
     ssh_state_init(&s);
 
     ret = connect_to_ssh(&s, opts->location,
-                         LIBSSH2_FXF_READ|LIBSSH2_FXF_WRITE|
-                         LIBSSH2_FXF_CREAT|LIBSSH2_FXF_TRUNC,
+                         O_RDWR | O_CREAT | O_TRUNC,
                          0644, errp);
     if (ret < 0) {
         goto fail;
@@ -913,10 +1013,8 @@ static int ssh_has_zero_init(BlockDriverState *bs)
     /* Assume false, unless we can positively prove it's true. */
     int has_zero_init = 0;
 
-    if (s->attrs.flags & LIBSSH2_SFTP_ATTR_PERMISSIONS) {
-        if (s->attrs.permissions & LIBSSH2_SFTP_S_IFREG) {
-            has_zero_init = 1;
-        }
+    if (s->attrs->type == SSH_FILEXFER_TYPE_REGULAR) {
+        has_zero_init = 1;
     }
 
     return has_zero_init;
@@ -953,12 +1051,12 @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
         .co = qemu_coroutine_self()
     };
 
-    r = libssh2_session_block_directions(s->session);
+    r = ssh_get_poll_flags(s->session);
 
-    if (r & LIBSSH2_SESSION_BLOCK_INBOUND) {
+    if (r & SSH_READ_PENDING) {
         rd_handler = restart_coroutine;
     }
-    if (r & LIBSSH2_SESSION_BLOCK_OUTBOUND) {
+    if (r & SSH_WRITE_PENDING) {
         wr_handler = restart_coroutine;
     }
 
@@ -970,33 +1068,6 @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
     trace_ssh_co_yield_back(s->sock);
 }
 
-/* SFTP has a function `libssh2_sftp_seek64' which seeks to a position
- * in the remote file.  Notice that it just updates a field in the
- * sftp_handle structure, so there is no network traffic and it cannot
- * fail.
- *
- * However, `libssh2_sftp_seek64' does have a catastrophic effect on
- * performance since it causes the handle to throw away all in-flight
- * reads and buffered readahead data.  Therefore this function tries
- * to be intelligent about when to call the underlying libssh2 function.
- */
-#define SSH_SEEK_WRITE 0
-#define SSH_SEEK_READ  1
-#define SSH_SEEK_FORCE 2
-
-static void ssh_seek(BDRVSSHState *s, int64_t offset, int flags)
-{
-    bool op_read = (flags & SSH_SEEK_READ) != 0;
-    bool force = (flags & SSH_SEEK_FORCE) != 0;
-
-    if (force || op_read != s->offset_op_read || offset != s->offset) {
-        trace_ssh_seek(offset);
-        libssh2_sftp_seek64(s->sftp_handle, offset);
-        s->offset = offset;
-        s->offset_op_read = op_read;
-    }
-}
-
 static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
                                  int64_t offset, size_t size,
                                  QEMUIOVector *qiov)
@@ -1008,7 +1079,8 @@ static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
 
     trace_ssh_read(offset, size);
 
-    ssh_seek(s, offset, SSH_SEEK_READ);
+    trace_ssh_seek(offset);
+    sftp_seek64(s->sftp_handle, offset);
 
     /* This keeps track of the current iovec element ('i'), where we
      * will write to next ('buf'), and the end of the current iovec
@@ -1018,35 +1090,35 @@ static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
     buf = i->iov_base;
     end_of_vec = i->iov_base + i->iov_len;
 
-    /* libssh2 has a hard-coded limit of 2000 bytes per request,
-     * although it will also do readahead behind our backs.  Therefore
-     * we may have to do repeated reads here until we have read 'size'
-     * bytes.
-     */
     for (got = 0; got < size; ) {
+        size_t request_read_size;
     again:
-        trace_ssh_read_buf(buf, end_of_vec - buf);
-        r = libssh2_sftp_read(s->sftp_handle, buf, end_of_vec - buf);
-        trace_ssh_read_return(r);
+        /*
+         * The size of SFTP packets is limited to 32K bytes, so limit
+         * the amount of data requested to 16K, as libssh currently
+         * does not handle multiple requests on its own.
+         */
+        request_read_size = MIN(end_of_vec - buf, 16384);
+        trace_ssh_read_buf(buf, end_of_vec - buf, request_read_size);
+        r = sftp_read(s->sftp_handle, buf, request_read_size);
+        trace_ssh_read_return(r, sftp_get_error(s->sftp));
 
-        if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
+        if (r == SSH_AGAIN) {
             co_yield(s, bs);
             goto again;
         }
-        if (r < 0) {
-            sftp_error_trace(s, "read");
-            s->offset = -1;
-            return -EIO;
-        }
-        if (r == 0) {
+        if (r == SSH_EOF || (r == 0 && sftp_get_error(s->sftp) == SSH_FX_EOF)) {
             /* EOF: Short read so pad the buffer with zeroes and return it. */
             qemu_iovec_memset(qiov, got, 0, size - got);
             return 0;
         }
+        if (r <= 0) {
+            sftp_error_trace(s, "read");
+            return -EIO;
+        }
 
         got += r;
         buf += r;
-        s->offset += r;
         if (buf >= end_of_vec && got < size) {
             i++;
             buf = i->iov_base;
@@ -1083,7 +1155,8 @@ static int ssh_write(BDRVSSHState *s, BlockDriverState *bs,
 
     trace_ssh_write(offset, size);
 
-    ssh_seek(s, offset, SSH_SEEK_WRITE);
+    trace_ssh_seek(offset);
+    sftp_seek64(s->sftp_handle, offset);
 
     /* This keeps track of the current iovec element ('i'), where we
      * will read from next ('buf'), and the end of the current iovec
@@ -1094,46 +1167,37 @@ static int ssh_write(BDRVSSHState *s, BlockDriverState *bs,
     end_of_vec = i->iov_base + i->iov_len;
 
     for (written = 0; written < size; ) {
+        size_t request_write_size;
     again:
-        trace_ssh_write_buf(buf, end_of_vec - buf);
-        r = libssh2_sftp_write(s->sftp_handle, buf, end_of_vec - buf);
-        trace_ssh_write_return(r);
+        /*
+         * Avoid too large data packets, as libssh currently does not
+         * handle multiple requests on its own.
+         */
+        request_write_size = MIN(end_of_vec - buf, 131072);
+        trace_ssh_write_buf(buf, end_of_vec - buf, request_write_size);
+        r = sftp_write(s->sftp_handle, buf, request_write_size);
+        trace_ssh_write_return(r, sftp_get_error(s->sftp));
 
-        if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
+        if (r == SSH_AGAIN) {
             co_yield(s, bs);
             goto again;
         }
         if (r < 0) {
             sftp_error_trace(s, "write");
-            s->offset = -1;
             return -EIO;
         }
-        /* The libssh2 API is very unclear about this.  A comment in
-         * the code says "nothing was acked, and no EAGAIN was
-         * received!" which apparently means that no data got sent
-         * out, and the underlying channel didn't return any EAGAIN
-         * indication.  I think this is a bug in either libssh2 or
-         * OpenSSH (server-side).  In any case, forcing a seek (to
-         * discard libssh2 internal buffers), and then trying again
-         * works for me.
-         */
-        if (r == 0) {
-            ssh_seek(s, offset + written, SSH_SEEK_WRITE|SSH_SEEK_FORCE);
-            co_yield(s, bs);
-            goto again;
-        }
 
         written += r;
         buf += r;
-        s->offset += r;
         if (buf >= end_of_vec && written < size) {
             i++;
             buf = i->iov_base;
             end_of_vec = i->iov_base + i->iov_len;
         }
 
-        if (offset + written > s->attrs.filesize)
-            s->attrs.filesize = offset + written;
+        if (offset + written > s->attrs->size) {
+            s->attrs->size = offset + written;
+        }
     }
 
     return 0;
@@ -1168,24 +1232,24 @@ static void unsafe_flush_warning(BDRVSSHState *s, const char *what)
     }
 }
 
-#ifdef HAS_LIBSSH2_SFTP_FSYNC
+#ifdef HAVE_LIBSSH_0_8
 
 static coroutine_fn int ssh_flush(BDRVSSHState *s, BlockDriverState *bs)
 {
     int r;
 
     trace_ssh_flush();
+
+    if (!sftp_extension_supported(s->sftp, "fsync@openssh.com", "1")) {
+        unsafe_flush_warning(s, "OpenSSH >= 6.3");
+        return 0;
+    }
  again:
-    r = libssh2_sftp_fsync(s->sftp_handle);
-    if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
+    r = sftp_fsync(s->sftp_handle);
+    if (r == SSH_AGAIN) {
         co_yield(s, bs);
         goto again;
     }
-    if (r == LIBSSH2_ERROR_SFTP_PROTOCOL &&
-        libssh2_sftp_last_error(s->sftp) == LIBSSH2_FX_OP_UNSUPPORTED) {
-        unsafe_flush_warning(s, "OpenSSH >= 6.3");
-        return 0;
-    }
     if (r < 0) {
         sftp_error_trace(s, "fsync");
         return -EIO;
@@ -1206,25 +1270,25 @@ static coroutine_fn int ssh_co_flush(BlockDriverState *bs)
     return ret;
 }
 
-#else /* !HAS_LIBSSH2_SFTP_FSYNC */
+#else /* !HAVE_LIBSSH_0_8 */
 
 static coroutine_fn int ssh_co_flush(BlockDriverState *bs)
 {
     BDRVSSHState *s = bs->opaque;
 
-    unsafe_flush_warning(s, "libssh2 >= 1.4.4");
+    unsafe_flush_warning(s, "libssh >= 0.8.0");
     return 0;
 }
 
-#endif /* !HAS_LIBSSH2_SFTP_FSYNC */
+#endif /* !HAVE_LIBSSH_0_8 */
 
 static int64_t ssh_getlength(BlockDriverState *bs)
 {
     BDRVSSHState *s = bs->opaque;
     int64_t length;
 
-    /* Note we cannot make a libssh2 call here. */
-    length = (int64_t) s->attrs.filesize;
+    /* Note we cannot make a libssh call here. */
+    length = (int64_t) s->attrs->size;
     trace_ssh_getlength(length);
 
     return length;
@@ -1241,12 +1305,12 @@ static int coroutine_fn ssh_co_truncate(BlockDriverState *bs, int64_t offset,
         return -ENOTSUP;
     }
 
-    if (offset < s->attrs.filesize) {
+    if (offset < s->attrs->size) {
         error_setg(errp, "ssh driver does not support shrinking files");
         return -ENOTSUP;
     }
 
-    if (offset == s->attrs.filesize) {
+    if (offset == s->attrs->size) {
         return 0;
     }
 
@@ -1341,12 +1405,16 @@ static void bdrv_ssh_init(void)
 {
     int r;
 
-    r = libssh2_init(0);
+    r = ssh_init();
     if (r != 0) {
-        fprintf(stderr, "libssh2 initialization failed, %d\n", r);
+        fprintf(stderr, "libssh initialization failed, %d\n", r);
         exit(EXIT_FAILURE);
     }
 
+#if TRACE_LIBSSH != 0
+    ssh_set_log_level(TRACE_LIBSSH);
+#endif
+
     bdrv_register(&bdrv_ssh);
 }
 
diff --git a/block/trace-events b/block/trace-events
index 9ccea755da..d724df0117 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -171,19 +171,21 @@ nbd_client_connect_success(const char *export_name) "export '%s'"
 # ssh.c
 ssh_restart_coroutine(void *co) "co=%p"
 ssh_flush(void) "fsync"
-ssh_check_host_key_knownhosts(const char *key) "host key OK: %s"
+ssh_check_host_key_knownhosts(void) "host key OK"
 ssh_connect_to_ssh(char *path, int flags, int mode) "opening file %s flags=0x%x creat_mode=0%o"
 ssh_co_yield(int sock, void *rd_handler, void *wr_handler) "s->sock=%d rd_handler=%p wr_handler=%p"
 ssh_co_yield_back(int sock) "s->sock=%d - back"
 ssh_getlength(int64_t length) "length=%" PRIi64
 ssh_co_create_opts(uint64_t size) "total_size=%" PRIu64
 ssh_read(int64_t offset, size_t size) "offset=%" PRIi64 " size=%zu"
-ssh_read_buf(void *buf, size_t size) "sftp_read buf=%p size=%zu"
-ssh_read_return(ssize_t ret) "sftp_read returned %zd"
+ssh_read_buf(void *buf, size_t size, size_t actual_size) "sftp_read buf=%p size=%zu (actual size=%zu)"
+ssh_read_return(ssize_t ret, int sftp_err) "sftp_read returned %zd (sftp error=%d)"
 ssh_write(int64_t offset, size_t size) "offset=%" PRIi64 " size=%zu"
-ssh_write_buf(void *buf, size_t size) "sftp_write buf=%p size=%zu"
-ssh_write_return(ssize_t ret) "sftp_write returned %zd"
+ssh_write_buf(void *buf, size_t size, size_t actual_size) "sftp_write buf=%p size=%zu (actual size=%zu)"
+ssh_write_return(ssize_t ret, int sftp_err) "sftp_write returned %zd (sftp error=%d)"
 ssh_seek(int64_t offset) "seeking to offset=%" PRIi64
+ssh_auth_methods(int methods) "auth methods=0x%x"
+ssh_server_status(int status) "server status=%d"
 
 # curl.c
 curl_timer_cb(long timeout_ms) "timer callback timeout_ms %ld"
@@ -216,4 +218,4 @@ sheepdog_snapshot_create(const char *sn_name, const char *id) "%s %s"
 sheepdog_snapshot_create_inode(const char *name, uint32_t snap, uint32_t vdi) "s->inode: name %s snap_id 0x%" PRIx32 " vdi 0x%" PRIx32
 
 # ssh.c
-sftp_error(const char *op, const char *ssh_err, int ssh_err_code, unsigned long sftp_err_code) "%s failed: %s (libssh2 error code: %d, sftp error code: %lu)"
+sftp_error(const char *op, const char *ssh_err, int ssh_err_code, int sftp_err_code) "%s failed: %s (libssh error code: %d, sftp error code: %d)"
diff --git a/block/vmdk.c b/block/vmdk.c
index 51067c774f..bd36ece125 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -91,6 +91,44 @@ typedef struct {
     uint16_t compressAlgorithm;
 } QEMU_PACKED VMDK4Header;
 
+typedef struct VMDKSESparseConstHeader {
+    uint64_t magic;
+    uint64_t version;
+    uint64_t capacity;
+    uint64_t grain_size;
+    uint64_t grain_table_size;
+    uint64_t flags;
+    uint64_t reserved1;
+    uint64_t reserved2;
+    uint64_t reserved3;
+    uint64_t reserved4;
+    uint64_t volatile_header_offset;
+    uint64_t volatile_header_size;
+    uint64_t journal_header_offset;
+    uint64_t journal_header_size;
+    uint64_t journal_offset;
+    uint64_t journal_size;
+    uint64_t grain_dir_offset;
+    uint64_t grain_dir_size;
+    uint64_t grain_tables_offset;
+    uint64_t grain_tables_size;
+    uint64_t free_bitmap_offset;
+    uint64_t free_bitmap_size;
+    uint64_t backmap_offset;
+    uint64_t backmap_size;
+    uint64_t grains_offset;
+    uint64_t grains_size;
+    uint8_t pad[304];
+} QEMU_PACKED VMDKSESparseConstHeader;
+
+typedef struct VMDKSESparseVolatileHeader {
+    uint64_t magic;
+    uint64_t free_gt_number;
+    uint64_t next_txn_seq_number;
+    uint64_t replay_journal;
+    uint8_t pad[480];
+} QEMU_PACKED VMDKSESparseVolatileHeader;
+
 #define L2_CACHE_SIZE 16
 
 typedef struct VmdkExtent {
@@ -99,19 +137,23 @@ typedef struct VmdkExtent {
     bool compressed;
     bool has_marker;
     bool has_zero_grain;
+    bool sesparse;
+    uint64_t sesparse_l2_tables_offset;
+    uint64_t sesparse_clusters_offset;
+    int32_t entry_size;
     int version;
     int64_t sectors;
     int64_t end_sector;
     int64_t flat_start_offset;
     int64_t l1_table_offset;
     int64_t l1_backup_table_offset;
-    uint32_t *l1_table;
+    void *l1_table;
     uint32_t *l1_backup_table;
     unsigned int l1_size;
     uint32_t l1_entry_sectors;
 
     unsigned int l2_size;
-    uint32_t *l2_cache;
+    void *l2_cache;
     uint32_t l2_cache_offsets[L2_CACHE_SIZE];
     uint32_t l2_cache_counts[L2_CACHE_SIZE];
 
@@ -425,11 +467,22 @@ static int vmdk_add_extent(BlockDriverState *bs,
         error_setg(errp, "Invalid granularity, image may be corrupt");
         return -EFBIG;
     }
-    if (l1_size > 512 * 1024 * 1024) {
-        /* Although with big capacity and small l1_entry_sectors, we can get a
+    if (l1_size > 32 * 1024 * 1024) {
+        /*
+         * Although with big capacity and small l1_entry_sectors, we can get a
          * big l1_size, we don't want unbounded value to allocate the table.
-         * Limit it to 512M, which is 16PB for default cluster and L2 table
-         * size */
+         * Limit it to 32M, which is enough to store:
+         *     8TB  - for both VMDK3 & VMDK4 with
+         *            minimal cluster size: 512B
+         *            minimal L2 table size: 512 entries
+         *            8 TB is still more than the maximal value supported for
+         *            VMDK3 & VMDK4 which is 2TB.
+         *     64TB - for "ESXi seSparse Extent"
+         *            minimal cluster size: 512B (default is 4KB)
+         *            L2 table size: 4096 entries (const).
+         *            64TB is more than the maximal value supported for
+         *            seSparse VMDKs (which is slightly less than 64TB)
+         */
         error_setg(errp, "L1 size too big");
         return -EFBIG;
     }
@@ -454,6 +507,7 @@ static int vmdk_add_extent(BlockDriverState *bs,
     extent->l2_size = l2_size;
     extent->cluster_sectors = flat ? sectors : cluster_sectors;
     extent->next_cluster_sector = ROUND_UP(nb_sectors, cluster_sectors);
+    extent->entry_size = sizeof(uint32_t);
 
     if (s->num_extents > 1) {
         extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
@@ -475,7 +529,7 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
     int i;
 
     /* read the L1 table */
-    l1_size = extent->l1_size * sizeof(uint32_t);
+    l1_size = extent->l1_size * extent->entry_size;
     extent->l1_table = g_try_malloc(l1_size);
     if (l1_size && extent->l1_table == NULL) {
         return -ENOMEM;
@@ -493,10 +547,16 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
         goto fail_l1;
     }
     for (i = 0; i < extent->l1_size; i++) {
-        le32_to_cpus(&extent->l1_table[i]);
+        if (extent->entry_size == sizeof(uint64_t)) {
+            le64_to_cpus((uint64_t *)extent->l1_table + i);
+        } else {
+            assert(extent->entry_size == sizeof(uint32_t));
+            le32_to_cpus((uint32_t *)extent->l1_table + i);
+        }
     }
 
     if (extent->l1_backup_table_offset) {
+        assert(!extent->sesparse);
         extent->l1_backup_table = g_try_malloc(l1_size);
         if (l1_size && extent->l1_backup_table == NULL) {
             ret = -ENOMEM;
@@ -519,7 +579,7 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
     }
 
     extent->l2_cache =
-        g_new(uint32_t, extent->l2_size * L2_CACHE_SIZE);
+        g_malloc(extent->entry_size * extent->l2_size * L2_CACHE_SIZE);
     return 0;
  fail_l1b:
     g_free(extent->l1_backup_table);
@@ -565,6 +625,205 @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
     return ret;
 }
 
+#define SESPARSE_CONST_HEADER_MAGIC UINT64_C(0x00000000cafebabe)
+#define SESPARSE_VOLATILE_HEADER_MAGIC UINT64_C(0x00000000cafecafe)
+
+/* Strict checks - format not officially documented */
+static int check_se_sparse_const_header(VMDKSESparseConstHeader *header,
+                                        Error **errp)
+{
+    header->magic = le64_to_cpu(header->magic);
+    header->version = le64_to_cpu(header->version);
+    header->grain_size = le64_to_cpu(header->grain_size);
+    header->grain_table_size = le64_to_cpu(header->grain_table_size);
+    header->flags = le64_to_cpu(header->flags);
+    header->reserved1 = le64_to_cpu(header->reserved1);
+    header->reserved2 = le64_to_cpu(header->reserved2);
+    header->reserved3 = le64_to_cpu(header->reserved3);
+    header->reserved4 = le64_to_cpu(header->reserved4);
+
+    header->volatile_header_offset =
+        le64_to_cpu(header->volatile_header_offset);
+    header->volatile_header_size = le64_to_cpu(header->volatile_header_size);
+
+    header->journal_header_offset = le64_to_cpu(header->journal_header_offset);
+    header->journal_header_size = le64_to_cpu(header->journal_header_size);
+
+    header->journal_offset = le64_to_cpu(header->journal_offset);
+    header->journal_size = le64_to_cpu(header->journal_size);
+
+    header->grain_dir_offset = le64_to_cpu(header->grain_dir_offset);
+    header->grain_dir_size = le64_to_cpu(header->grain_dir_size);
+
+    header->grain_tables_offset = le64_to_cpu(header->grain_tables_offset);
+    header->grain_tables_size = le64_to_cpu(header->grain_tables_size);
+
+    header->free_bitmap_offset = le64_to_cpu(header->free_bitmap_offset);
+    header->free_bitmap_size = le64_to_cpu(header->free_bitmap_size);
+
+    header->backmap_offset = le64_to_cpu(header->backmap_offset);
+    header->backmap_size = le64_to_cpu(header->backmap_size);
+
+    header->grains_offset = le64_to_cpu(header->grains_offset);
+    header->grains_size = le64_to_cpu(header->grains_size);
+
+    if (header->magic != SESPARSE_CONST_HEADER_MAGIC) {
+        error_setg(errp, "Bad const header magic: 0x%016" PRIx64,
+                   header->magic);
+        return -EINVAL;
+    }
+
+    if (header->version != 0x0000000200000001) {
+        error_setg(errp, "Unsupported version: 0x%016" PRIx64,
+                   header->version);
+        return -ENOTSUP;
+    }
+
+    if (header->grain_size != 8) {
+        error_setg(errp, "Unsupported grain size: %" PRIu64,
+                   header->grain_size);
+        return -ENOTSUP;
+    }
+
+    if (header->grain_table_size != 64) {
+        error_setg(errp, "Unsupported grain table size: %" PRIu64,
+                   header->grain_table_size);
+        return -ENOTSUP;
+    }
+
+    if (header->flags != 0) {
+        error_setg(errp, "Unsupported flags: 0x%016" PRIx64,
+                   header->flags);
+        return -ENOTSUP;
+    }
+
+    if (header->reserved1 != 0 || header->reserved2 != 0 ||
+        header->reserved3 != 0 || header->reserved4 != 0) {
+        error_setg(errp, "Unsupported reserved bits:"
+                   " 0x%016" PRIx64 " 0x%016" PRIx64
+                   " 0x%016" PRIx64 " 0x%016" PRIx64,
+                   header->reserved1, header->reserved2,
+                   header->reserved3, header->reserved4);
+        return -ENOTSUP;
+    }
+
+    /* check that padding is 0 */
+    if (!buffer_is_zero(header->pad, sizeof(header->pad))) {
+        error_setg(errp, "Unsupported non-zero const header padding");
+        return -ENOTSUP;
+    }
+
+    return 0;
+}
+
+static int check_se_sparse_volatile_header(VMDKSESparseVolatileHeader *header,
+                                           Error **errp)
+{
+    header->magic = le64_to_cpu(header->magic);
+    header->free_gt_number = le64_to_cpu(header->free_gt_number);
+    header->next_txn_seq_number = le64_to_cpu(header->next_txn_seq_number);
+    header->replay_journal = le64_to_cpu(header->replay_journal);
+
+    if (header->magic != SESPARSE_VOLATILE_HEADER_MAGIC) {
+        error_setg(errp, "Bad volatile header magic: 0x%016" PRIx64,
+                   header->magic);
+        return -EINVAL;
+    }
+
+    if (header->replay_journal) {
+        error_setg(errp, "Image is dirty, Replaying journal not supported");
+        return -ENOTSUP;
+    }
+
+    /* check that padding is 0 */
+    if (!buffer_is_zero(header->pad, sizeof(header->pad))) {
+        error_setg(errp, "Unsupported non-zero volatile header padding");
+        return -ENOTSUP;
+    }
+
+    return 0;
+}
+
+static int vmdk_open_se_sparse(BlockDriverState *bs,
+                               BdrvChild *file,
+                               int flags, Error **errp)
+{
+    int ret;
+    VMDKSESparseConstHeader const_header;
+    VMDKSESparseVolatileHeader volatile_header;
+    VmdkExtent *extent;
+
+    ret = bdrv_apply_auto_read_only(bs,
+            "No write support for seSparse images available", errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    assert(sizeof(const_header) == SECTOR_SIZE);
+
+    ret = bdrv_pread(file, 0, &const_header, sizeof(const_header));
+    if (ret < 0) {
+        bdrv_refresh_filename(file->bs);
+        error_setg_errno(errp, -ret,
+                         "Could not read const header from file '%s'",
+                         file->bs->filename);
+        return ret;
+    }
+
+    /* check const header */
+    ret = check_se_sparse_const_header(&const_header, errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    assert(sizeof(volatile_header) == SECTOR_SIZE);
+
+    ret = bdrv_pread(file,
+                     const_header.volatile_header_offset * SECTOR_SIZE,
+                     &volatile_header, sizeof(volatile_header));
+    if (ret < 0) {
+        bdrv_refresh_filename(file->bs);
+        error_setg_errno(errp, -ret,
+                         "Could not read volatile header from file '%s'",
+                         file->bs->filename);
+        return ret;
+    }
+
+    /* check volatile header */
+    ret = check_se_sparse_volatile_header(&volatile_header, errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = vmdk_add_extent(bs, file, false,
+                          const_header.capacity,
+                          const_header.grain_dir_offset * SECTOR_SIZE,
+                          0,
+                          const_header.grain_dir_size *
+                          SECTOR_SIZE / sizeof(uint64_t),
+                          const_header.grain_table_size *
+                          SECTOR_SIZE / sizeof(uint64_t),
+                          const_header.grain_size,
+                          &extent,
+                          errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    extent->sesparse = true;
+    extent->sesparse_l2_tables_offset = const_header.grain_tables_offset;
+    extent->sesparse_clusters_offset = const_header.grains_offset;
+    extent->entry_size = sizeof(uint64_t);
+
+    ret = vmdk_init_tables(bs, extent, errp);
+    if (ret) {
+        /* free extent allocated by vmdk_add_extent */
+        vmdk_free_last_extent(bs);
+    }
+
+    return ret;
+}
+
 static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
                                QDict *options, Error **errp);
 
@@ -842,6 +1101,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
          * RW [size in sectors] SPARSE "file-name.vmdk"
          * RW [size in sectors] VMFS "file-name.vmdk"
          * RW [size in sectors] VMFSSPARSE "file-name.vmdk"
+         * RW [size in sectors] SESPARSE "file-name.vmdk"
          */
         flat_offset = -1;
         matches = sscanf(p, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64,
@@ -864,7 +1124,8 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
 
         if (sectors <= 0 ||
             (strcmp(type, "FLAT") && strcmp(type, "SPARSE") &&
-             strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE")) ||
+             strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE") &&
+             strcmp(type, "SESPARSE")) ||
             (strcmp(access, "RW"))) {
             continue;
         }
@@ -917,6 +1178,13 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
                 return ret;
             }
             extent = &s->extents[s->num_extents - 1];
+        } else if (!strcmp(type, "SESPARSE")) {
+            ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp);
+            if (ret) {
+                bdrv_unref_child(bs, extent_file);
+                return ret;
+            }
+            extent = &s->extents[s->num_extents - 1];
         } else {
             error_setg(errp, "Unsupported extent type '%s'", type);
             bdrv_unref_child(bs, extent_file);
@@ -951,6 +1219,7 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
     if (strcmp(ct, "monolithicFlat") &&
         strcmp(ct, "vmfs") &&
         strcmp(ct, "vmfsSparse") &&
+        strcmp(ct, "seSparse") &&
         strcmp(ct, "twoGbMaxExtentSparse") &&
         strcmp(ct, "twoGbMaxExtentFlat")) {
         error_setg(errp, "Unsupported image type '%s'", ct);
@@ -1201,10 +1470,12 @@ static int get_cluster_offset(BlockDriverState *bs,
 {
     unsigned int l1_index, l2_offset, l2_index;
     int min_index, i, j;
-    uint32_t min_count, *l2_table;
+    uint32_t min_count;
+    void *l2_table;
     bool zeroed = false;
     int64_t ret;
     int64_t cluster_sector;
+    unsigned int l2_size_bytes = extent->l2_size * extent->entry_size;
 
     if (m_data) {
         m_data->valid = 0;
@@ -1219,7 +1490,36 @@ static int get_cluster_offset(BlockDriverState *bs,
     if (l1_index >= extent->l1_size) {
         return VMDK_ERROR;
     }
-    l2_offset = extent->l1_table[l1_index];
+    if (extent->sesparse) {
+        uint64_t l2_offset_u64;
+
+        assert(extent->entry_size == sizeof(uint64_t));
+
+        l2_offset_u64 = ((uint64_t *)extent->l1_table)[l1_index];
+        if (l2_offset_u64 == 0) {
+            l2_offset = 0;
+        } else if ((l2_offset_u64 & 0xffffffff00000000) != 0x1000000000000000) {
+            /*
+             * Top most nibble is 0x1 if grain table is allocated.
+             * strict check - top most 4 bytes must be 0x10000000 since max
+             * supported size is 64TB for disk - so no more than 64TB / 16MB
+             * grain directories which is smaller than uint32,
+             * where 16MB is the only supported default grain table coverage.
+             */
+            return VMDK_ERROR;
+        } else {
+            l2_offset_u64 = l2_offset_u64 & 0x00000000ffffffff;
+            l2_offset_u64 = extent->sesparse_l2_tables_offset +
+                l2_offset_u64 * l2_size_bytes / SECTOR_SIZE;
+            if (l2_offset_u64 > 0x00000000ffffffff) {
+                return VMDK_ERROR;
+            }
+            l2_offset = (unsigned int)(l2_offset_u64);
+        }
+    } else {
+        assert(extent->entry_size == sizeof(uint32_t));
+        l2_offset = ((uint32_t *)extent->l1_table)[l1_index];
+    }
     if (!l2_offset) {
         return VMDK_UNALLOC;
     }
@@ -1231,7 +1531,7 @@ static int get_cluster_offset(BlockDriverState *bs,
                     extent->l2_cache_counts[j] >>= 1;
                 }
             }
-            l2_table = extent->l2_cache + (i * extent->l2_size);
+            l2_table = (char *)extent->l2_cache + (i * l2_size_bytes);
             goto found;
         }
     }
@@ -1244,13 +1544,13 @@ static int get_cluster_offset(BlockDriverState *bs,
             min_index = i;
         }
     }
-    l2_table = extent->l2_cache + (min_index * extent->l2_size);
+    l2_table = (char *)extent->l2_cache + (min_index * l2_size_bytes);
     BLKDBG_EVENT(extent->file, BLKDBG_L2_LOAD);
     if (bdrv_pread(extent->file,
                 (int64_t)l2_offset * 512,
                 l2_table,
-                extent->l2_size * sizeof(uint32_t)
-            ) != extent->l2_size * sizeof(uint32_t)) {
+                l2_size_bytes
+            ) != l2_size_bytes) {
         return VMDK_ERROR;
     }
 
@@ -1258,16 +1558,45 @@ static int get_cluster_offset(BlockDriverState *bs,
     extent->l2_cache_counts[min_index] = 1;
  found:
     l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
-    cluster_sector = le32_to_cpu(l2_table[l2_index]);
 
-    if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
-        zeroed = true;
+    if (extent->sesparse) {
+        cluster_sector = le64_to_cpu(((uint64_t *)l2_table)[l2_index]);
+        switch (cluster_sector & 0xf000000000000000) {
+        case 0x0000000000000000:
+            /* unallocated grain */
+            if (cluster_sector != 0) {
+                return VMDK_ERROR;
+            }
+            break;
+        case 0x1000000000000000:
+            /* scsi-unmapped grain - fallthrough */
+        case 0x2000000000000000:
+            /* zero grain */
+            zeroed = true;
+            break;
+        case 0x3000000000000000:
+            /* allocated grain */
+            cluster_sector = (((cluster_sector & 0x0fff000000000000) >> 48) |
+                              ((cluster_sector & 0x0000ffffffffffff) << 12));
+            cluster_sector = extent->sesparse_clusters_offset +
+                cluster_sector * extent->cluster_sectors;
+            break;
+        default:
+            return VMDK_ERROR;
+        }
+    } else {
+        cluster_sector = le32_to_cpu(((uint32_t *)l2_table)[l2_index]);
+
+        if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
+            zeroed = true;
+        }
     }
 
     if (!cluster_sector || zeroed) {
         if (!allocate) {
             return zeroed ? VMDK_ZEROED : VMDK_UNALLOC;
         }
+        assert(!extent->sesparse);
 
         if (extent->next_cluster_sector >= VMDK_EXTENT_MAX_SECTORS) {
             return VMDK_ERROR;
@@ -1291,7 +1620,7 @@ static int get_cluster_offset(BlockDriverState *bs,
             m_data->l1_index = l1_index;
             m_data->l2_index = l2_index;
             m_data->l2_offset = l2_offset;
-            m_data->l2_cache_entry = &l2_table[l2_index];
+            m_data->l2_cache_entry = ((uint32_t *)l2_table) + l2_index;
         }
     }
     *cluster_offset = cluster_sector << BDRV_SECTOR_BITS;
@@ -1617,6 +1946,9 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
         if (!extent) {
             return -EIO;
         }
+        if (extent->sesparse) {
+            return -ENOTSUP;
+        }
         offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset);
         n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE
                              - offset_in_cluster);
diff --git a/blockdev.c b/blockdev.c
index 5d6a13dea9..4d141e9a1f 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1774,7 +1774,7 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
     assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
     backup = common->action->u.drive_backup.data;
 
-    bs = qmp_get_root_bs(backup->device, errp);
+    bs = bdrv_lookup_bs(backup->device, backup->device, errp);
     if (!bs) {
         return;
     }
diff --git a/configure b/configure
index b091b82cb3..5c7914570e 100755
--- a/configure
+++ b/configure
@@ -472,7 +472,7 @@ auth_pam=""
 vte=""
 virglrenderer=""
 tpm=""
-libssh2=""
+libssh=""
 live_block_migration="yes"
 numa=""
 tcmalloc="no"
@@ -1439,9 +1439,9 @@ for opt do
   ;;
   --enable-tpm) tpm="yes"
   ;;
-  --disable-libssh2) libssh2="no"
+  --disable-libssh) libssh="no"
   ;;
-  --enable-libssh2) libssh2="yes"
+  --enable-libssh) libssh="yes"
   ;;
   --disable-live-block-migration) live_block_migration="no"
   ;;
@@ -1810,7 +1810,7 @@ disabled with --disable-FEATURE, default is enabled if available:
   coroutine-pool  coroutine freelist (better performance)
   glusterfs       GlusterFS backend
   tpm             TPM support
-  libssh2         ssh block device support
+  libssh          ssh block device support
   numa            libnuma support
   libxml2         for Parallels image format
   tcmalloc        tcmalloc support
@@ -3914,43 +3914,34 @@ EOF
 fi
 
 ##########################################
-# libssh2 probe
-min_libssh2_version=1.2.8
-if test "$libssh2" != "no" ; then
-  if $pkg_config --atleast-version=$min_libssh2_version libssh2; then
-    libssh2_cflags=$($pkg_config libssh2 --cflags)
-    libssh2_libs=$($pkg_config libssh2 --libs)
-    libssh2=yes
+# libssh probe
+if test "$libssh" != "no" ; then
+  if $pkg_config --exists libssh; then
+    libssh_cflags=$($pkg_config libssh --cflags)
+    libssh_libs=$($pkg_config libssh --libs)
+    libssh=yes
   else
-    if test "$libssh2" = "yes" ; then
-      error_exit "libssh2 >= $min_libssh2_version required for --enable-libssh2"
+    if test "$libssh" = "yes" ; then
+      error_exit "libssh required for --enable-libssh"
     fi
-    libssh2=no
+    libssh=no
   fi
 fi
 
 ##########################################
-# libssh2_sftp_fsync probe
+# Check for libssh 0.8
+# This is done like this instead of using the LIBSSH_VERSION_* and
+# SSH_VERSION_* macros because some distributions in the past shipped
+# snapshots of the future 0.8 from Git, and those snapshots did not
+# have updated version numbers (still referring to 0.7.0).
 
-if test "$libssh2" = "yes"; then
+if test "$libssh" = "yes"; then
   cat > $TMPC <<EOF
-#include <stdio.h>
-#include <libssh2.h>
-#include <libssh2_sftp.h>
-int main(void) {
-    LIBSSH2_SESSION *session;
-    LIBSSH2_SFTP *sftp;
-    LIBSSH2_SFTP_HANDLE *sftp_handle;
-    session = libssh2_session_init ();
-    sftp = libssh2_sftp_init (session);
-    sftp_handle = libssh2_sftp_open (sftp, "/", 0, 0);
-    libssh2_sftp_fsync (sftp_handle);
-    return 0;
-}
+#include <libssh/libssh.h>
+int main(void) { return ssh_get_server_publickey(NULL, NULL); }
 EOF
-  # libssh2_cflags/libssh2_libs defined in previous test.
-  if compile_prog "$libssh2_cflags" "$libssh2_libs" ; then
-    QEMU_CFLAGS="-DHAS_LIBSSH2_SFTP_FSYNC $QEMU_CFLAGS"
+  if compile_prog "$libssh_cflags" "$libssh_libs"; then
+    libssh_cflags="-DHAVE_LIBSSH_0_8 $libssh_cflags"
   fi
 fi
 
@@ -6451,7 +6442,7 @@ echo "GlusterFS support $glusterfs"
 echo "gcov              $gcov_tool"
 echo "gcov enabled      $gcov"
 echo "TPM support       $tpm"
-echo "libssh2 support   $libssh2"
+echo "libssh support    $libssh"
 echo "QOM debugging     $qom_cast_debug"
 echo "Live block migration $live_block_migration"
 echo "lzo support       $lzo"
@@ -7144,10 +7135,10 @@ if test "$glusterfs_iocb_has_stat" = "yes" ; then
   echo "CONFIG_GLUSTERFS_IOCB_HAS_STAT=y" >> $config_host_mak
 fi
 
-if test "$libssh2" = "yes" ; then
-  echo "CONFIG_LIBSSH2=m" >> $config_host_mak
-  echo "LIBSSH2_CFLAGS=$libssh2_cflags" >> $config_host_mak
-  echo "LIBSSH2_LIBS=$libssh2_libs" >> $config_host_mak
+if test "$libssh" = "yes" ; then
+  echo "CONFIG_LIBSSH=m" >> $config_host_mak
+  echo "LIBSSH_CFLAGS=$libssh_cflags" >> $config_host_mak
+  echo "LIBSSH_LIBS=$libssh_libs" >> $config_host_mak
 fi
 
 if test "$live_block_migration" = "yes" ; then
diff --git a/docs/qemu-block-drivers.texi b/docs/qemu-block-drivers.texi
index da06a9bc83..91ab0eceae 100644
--- a/docs/qemu-block-drivers.texi
+++ b/docs/qemu-block-drivers.texi
@@ -782,7 +782,7 @@ print a warning when @code{fsync} is not supported:
 
 warning: ssh server @code{ssh.example.com:22} does not support fsync
 
-With sufficiently new versions of libssh2 and OpenSSH, @code{fsync} is
+With sufficiently new versions of libssh and OpenSSH, @code{fsync} is
 supported.
 
 @node disk_images_nvme
diff --git a/hw/9pfs/xen-9pfs.h b/hw/9pfs/xen-9pfs.h
index fbdee3d843..241e2216a4 100644
--- a/hw/9pfs/xen-9pfs.h
+++ b/hw/9pfs/xen-9pfs.h
@@ -13,8 +13,8 @@
 #ifndef HW_9PFS_XEN_9PFS_H
 #define HW_9PFS_XEN_9PFS_H
 
-#include <xen/io/protocols.h>
-#include "hw/xen/io/ring.h"
+#include "hw/xen/interface/io/protocols.h"
+#include "hw/xen/interface/io/ring.h"
 
 /*
  * Do not merge into xen-9p-backend.c: clang doesn't allow unused static
diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
index f7ad452bbd..0f200c5fb0 100644
--- a/hw/block/dataplane/xen-block.c
+++ b/hw/block/dataplane/xen-block.c
@@ -58,6 +58,7 @@ struct XenBlockDataPlane {
     int requests_inflight;
     unsigned int max_requests;
     BlockBackend *blk;
+    unsigned int sector_size;
     QEMUBH *bh;
     IOThread *iothread;
     AioContext *ctx;
@@ -167,7 +168,7 @@ static int xen_block_parse_request(XenBlockRequest *request)
         goto err;
     }
 
-    request->start = request->req.sector_number * XEN_BLKIF_SECTOR_SIZE;
+    request->start = request->req.sector_number * dataplane->sector_size;
     for (i = 0; i < request->req.nr_segments; i++) {
         if (i == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
             error_report("error: nr_segments too big");
@@ -177,14 +178,14 @@ static int xen_block_parse_request(XenBlockRequest *request)
             error_report("error: first > last sector");
             goto err;
         }
-        if (request->req.seg[i].last_sect * XEN_BLKIF_SECTOR_SIZE >=
+        if (request->req.seg[i].last_sect * dataplane->sector_size >=
             XC_PAGE_SIZE) {
             error_report("error: page crossing");
             goto err;
         }
 
         len = (request->req.seg[i].last_sect -
-               request->req.seg[i].first_sect + 1) * XEN_BLKIF_SECTOR_SIZE;
+               request->req.seg[i].first_sect + 1) * dataplane->sector_size;
         request->size += len;
     }
     if (request->start + request->size > blk_getlength(dataplane->blk)) {
@@ -218,17 +219,17 @@ static int xen_block_copy_request(XenBlockRequest *request)
         if (to_domain) {
             segs[i].dest.foreign.ref = request->req.seg[i].gref;
             segs[i].dest.foreign.offset = request->req.seg[i].first_sect *
-                XEN_BLKIF_SECTOR_SIZE;
+                dataplane->sector_size;
             segs[i].source.virt = virt;
         } else {
             segs[i].source.foreign.ref = request->req.seg[i].gref;
             segs[i].source.foreign.offset = request->req.seg[i].first_sect *
-                XEN_BLKIF_SECTOR_SIZE;
+                dataplane->sector_size;
             segs[i].dest.virt = virt;
         }
         segs[i].len = (request->req.seg[i].last_sect -
                        request->req.seg[i].first_sect + 1) *
-                      XEN_BLKIF_SECTOR_SIZE;
+                      dataplane->sector_size;
         virt += segs[i].len;
     }
 
@@ -317,7 +318,9 @@ static void xen_block_complete_aio(void *opaque, int ret)
     }
     xen_block_release_request(request);
 
-    qemu_bh_schedule(dataplane->bh);
+    if (dataplane->more_work) {
+        qemu_bh_schedule(dataplane->bh);
+    }
 
 done:
     aio_context_release(dataplane->ctx);
@@ -336,12 +339,12 @@ static bool xen_block_split_discard(XenBlockRequest *request,
 
     /* Wrap around, or overflowing byte limit? */
     if (sec_start + sec_count < sec_count ||
-        sec_start + sec_count > INT64_MAX / XEN_BLKIF_SECTOR_SIZE) {
+        sec_start + sec_count > INT64_MAX / dataplane->sector_size) {
         return false;
     }
 
-    byte_offset = sec_start * XEN_BLKIF_SECTOR_SIZE;
-    byte_remaining = sec_count * XEN_BLKIF_SECTOR_SIZE;
+    byte_offset = sec_start * dataplane->sector_size;
+    byte_remaining = sec_count * dataplane->sector_size;
 
     do {
         byte_chunk = byte_remaining > BDRV_REQUEST_MAX_BYTES ?
@@ -514,12 +517,13 @@ static int xen_block_get_request(XenBlockDataPlane *dataplane,
  */
 #define IO_PLUG_THRESHOLD 1
 
-static void xen_block_handle_requests(XenBlockDataPlane *dataplane)
+static bool xen_block_handle_requests(XenBlockDataPlane *dataplane)
 {
     RING_IDX rc, rp;
     XenBlockRequest *request;
     int inflight_atstart = dataplane->requests_inflight;
     int batched = 0;
+    bool done_something = false;
 
     dataplane->more_work = 0;
 
@@ -551,6 +555,7 @@ static void xen_block_handle_requests(XenBlockDataPlane *dataplane)
         }
         xen_block_get_request(dataplane, request, rc);
         dataplane->rings.common.req_cons = ++rc;
+        done_something = true;
 
         /* parse them */
         if (xen_block_parse_request(request) != 0) {
@@ -602,10 +607,7 @@ static void xen_block_handle_requests(XenBlockDataPlane *dataplane)
         blk_io_unplug(dataplane->blk);
     }
 
-    if (dataplane->more_work &&
-        dataplane->requests_inflight < dataplane->max_requests) {
-        qemu_bh_schedule(dataplane->bh);
-    }
+    return done_something;
 }
 
 static void xen_block_dataplane_bh(void *opaque)
@@ -617,21 +619,23 @@ static void xen_block_dataplane_bh(void *opaque)
     aio_context_release(dataplane->ctx);
 }
 
-static void xen_block_dataplane_event(void *opaque)
+static bool xen_block_dataplane_event(void *opaque)
 {
     XenBlockDataPlane *dataplane = opaque;
 
-    qemu_bh_schedule(dataplane->bh);
+    return xen_block_handle_requests(dataplane);
 }
 
 XenBlockDataPlane *xen_block_dataplane_create(XenDevice *xendev,
-                                              BlockConf *conf,
+                                              BlockBackend *blk,
+                                              unsigned int sector_size,
                                               IOThread *iothread)
 {
     XenBlockDataPlane *dataplane = g_new0(XenBlockDataPlane, 1);
 
     dataplane->xendev = xendev;
-    dataplane->blk = conf->blk;
+    dataplane->blk = blk;
+    dataplane->sector_size = sector_size;
 
     QLIST_INIT(&dataplane->inflight);
     QLIST_INIT(&dataplane->freelist);
@@ -803,7 +807,7 @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane,
     }
 
     dataplane->event_channel =
-        xen_device_bind_event_channel(xendev, event_channel,
+        xen_device_bind_event_channel(xendev, dataplane->ctx, event_channel,
                                       xen_block_dataplane_event, dataplane,
                                       &local_err);
     if (local_err) {
diff --git a/hw/block/dataplane/xen-block.h b/hw/block/dataplane/xen-block.h
index d6fa6d26dd..76dcd51c3d 100644
--- a/hw/block/dataplane/xen-block.h
+++ b/hw/block/dataplane/xen-block.h
@@ -15,7 +15,8 @@
 typedef struct XenBlockDataPlane XenBlockDataPlane;
 
 XenBlockDataPlane *xen_block_dataplane_create(XenDevice *xendev,
-                                              BlockConf *conf,
+                                              BlockBackend *blk,
+                                              unsigned int sector_size,
                                               IOThread *iothread);
 void xen_block_dataplane_destroy(XenBlockDataPlane *dataplane);
 void xen_block_dataplane_start(XenBlockDataPlane *dataplane,
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 107a719b95..36d6a8bb3a 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -1384,7 +1384,6 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
     n->bar.cap = 0;
     NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);
     NVME_CAP_SET_CQR(n->bar.cap, 1);
-    NVME_CAP_SET_AMS(n->bar.cap, 1);
     NVME_CAP_SET_TO(n->bar.cap, 0xf);
     NVME_CAP_SET_CSS(n->bar.cap, 1);
     NVME_CAP_SET_MPSMAX(n->bar.cap, 4);
diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c
index 4de537aef4..8f224ef81d 100644
--- a/hw/block/xen-block.c
+++ b/hw/block/xen-block.c
@@ -52,11 +52,25 @@ static void xen_block_connect(XenDevice *xendev, Error **errp)
     XenBlockDevice *blockdev = XEN_BLOCK_DEVICE(xendev);
     const char *type = object_get_typename(OBJECT(blockdev));
     XenBlockVdev *vdev = &blockdev->props.vdev;
+    BlockConf *conf = &blockdev->props.conf;
+    unsigned int feature_large_sector_size;
     unsigned int order, nr_ring_ref, *ring_ref, event_channel, protocol;
     char *str;
 
     trace_xen_block_connect(type, vdev->disk, vdev->partition);
 
+    if (xen_device_frontend_scanf(xendev, "feature-large-sector-size", "%u",
+                                  &feature_large_sector_size) != 1) {
+        feature_large_sector_size = 0;
+    }
+
+    if (feature_large_sector_size != 1 &&
+        conf->logical_block_size != XEN_BLKIF_SECTOR_SIZE) {
+        error_setg(errp, "logical_block_size != %u not supported by frontend",
+                   XEN_BLKIF_SECTOR_SIZE);
+        return;
+    }
+
     if (xen_device_frontend_scanf(xendev, "ring-page-order", "%u",
                                   &order) != 1) {
         nr_ring_ref = 1;
@@ -150,7 +164,7 @@ static void xen_block_set_size(XenBlockDevice *blockdev)
     const char *type = object_get_typename(OBJECT(blockdev));
     XenBlockVdev *vdev = &blockdev->props.vdev;
     BlockConf *conf = &blockdev->props.conf;
-    int64_t sectors = blk_getlength(conf->blk) / XEN_BLKIF_SECTOR_SIZE;
+    int64_t sectors = blk_getlength(conf->blk) / conf->logical_block_size;
     XenDevice *xendev = XEN_DEVICE(blockdev);
 
     trace_xen_block_size(type, vdev->disk, vdev->partition, sectors);
@@ -185,6 +199,7 @@ static void xen_block_realize(XenDevice *xendev, Error **errp)
     const char *type = object_get_typename(OBJECT(blockdev));
     XenBlockVdev *vdev = &blockdev->props.vdev;
     BlockConf *conf = &blockdev->props.conf;
+    BlockBackend *blk = conf->blk;
     Error *local_err = NULL;
 
     if (vdev->type == XEN_BLOCK_VDEV_TYPE_INVALID) {
@@ -206,8 +221,8 @@ static void xen_block_realize(XenDevice *xendev, Error **errp)
      * The blkif protocol does not deal with removable media, so it must
      * always be present, even for CDRom devices.
      */
-    assert(conf->blk);
-    if (!blk_is_inserted(conf->blk)) {
+    assert(blk);
+    if (!blk_is_inserted(blk)) {
         error_setg(errp, "device needs media, but drive is empty");
         return;
     }
@@ -224,26 +239,20 @@ static void xen_block_realize(XenDevice *xendev, Error **errp)
 
     blkconf_blocksizes(conf);
 
-    if (conf->logical_block_size != XEN_BLKIF_SECTOR_SIZE) {
-        error_setg(errp, "logical_block_size != %u not supported",
-                   XEN_BLKIF_SECTOR_SIZE);
-        return;
-    }
-
     if (conf->logical_block_size > conf->physical_block_size) {
         error_setg(
             errp, "logical_block_size > physical_block_size not supported");
         return;
     }
 
-    blk_set_dev_ops(conf->blk, &xen_block_dev_ops, blockdev);
-    blk_set_guest_block_size(conf->blk, conf->logical_block_size);
+    blk_set_dev_ops(blk, &xen_block_dev_ops, blockdev);
+    blk_set_guest_block_size(blk, conf->logical_block_size);
 
     if (conf->discard_granularity == -1) {
         conf->discard_granularity = conf->physical_block_size;
     }
 
-    if (blk_get_flags(conf->blk) & BDRV_O_UNMAP) {
+    if (blk_get_flags(blk) & BDRV_O_UNMAP) {
         xen_device_backend_printf(xendev, "feature-discard", "%u", 1);
         xen_device_backend_printf(xendev, "discard-granularity", "%u",
                                   conf->discard_granularity);
@@ -260,12 +269,13 @@ static void xen_block_realize(XenDevice *xendev, Error **errp)
                                blockdev->device_type);
 
     xen_device_backend_printf(xendev, "sector-size", "%u",
-                              XEN_BLKIF_SECTOR_SIZE);
+                              conf->logical_block_size);
 
     xen_block_set_size(blockdev);
 
     blockdev->dataplane =
-        xen_block_dataplane_create(xendev, conf, blockdev->props.iothread);
+        xen_block_dataplane_create(xendev, blk, conf->logical_block_size,
+                                   blockdev->props.iothread);
 }
 
 static void xen_block_frontend_changed(XenDevice *xendev,
diff --git a/hw/block/xen_blkif.h b/hw/block/xen_blkif.h
index a353693ea0..99733529c1 100644
--- a/hw/block/xen_blkif.h
+++ b/hw/block/xen_blkif.h
@@ -1,9 +1,8 @@
 #ifndef XEN_BLKIF_H
 #define XEN_BLKIF_H
 
-#include "hw/xen/io/ring.h"
-#include <xen/io/blkif.h>
-#include <xen/io/protocols.h>
+#include "hw/xen/interface/io/blkif.h"
+#include "hw/xen/interface/io/protocols.h"
 
 /*
  * Not a real protocol.  Used to generate ring structs which contain
diff --git a/hw/char/xen_console.c b/hw/char/xen_console.c
index 91f34ef06c..47e1092263 100644
--- a/hw/char/xen_console.c
+++ b/hw/char/xen_console.c
@@ -28,7 +28,7 @@
 #include "chardev/char-fe.h"
 #include "hw/xen/xen-legacy-backend.h"
 
-#include <xen/io/console.h>
+#include "hw/xen/interface/io/console.h"
 
 struct buffer {
     uint8_t *data;
diff --git a/hw/display/xenfb.c b/hw/display/xenfb.c
index 6202f1150e..3c79913b31 100644
--- a/hw/display/xenfb.c
+++ b/hw/display/xenfb.c
@@ -32,10 +32,9 @@
 #include "ui/console.h"
 #include "hw/xen/xen-legacy-backend.h"
 
-#include <xen/event_channel.h>
-#include <xen/io/fbif.h>
-#include <xen/io/kbdif.h>
-#include <xen/io/protocols.h>
+#include "hw/xen/interface/io/fbif.h"
+#include "hw/xen/interface/io/kbdif.h"
+#include "hw/xen/interface/io/protocols.h"
 
 #include "trace.h"
 
diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
index 2939122e7c..469f1260a4 100644
--- a/hw/i386/xen/xen-hvm.c
+++ b/hw/i386/xen/xen-hvm.c
@@ -27,7 +27,6 @@
 #include "exec/address-spaces.h"
 
 #include <xen/hvm/ioreq.h>
-#include <xen/hvm/params.h>
 #include <xen/hvm/e820.h>
 
 //#define DEBUG_XEN_HVM
@@ -120,6 +119,8 @@ typedef struct XenIOState {
     DeviceListener device_listener;
     hwaddr free_phys_offset;
     const XenPhysmap *log_for_dirtybit;
+    /* Buffer used by xen_sync_dirty_bitmap */
+    unsigned long *dirty_bitmap;
 
     Notifier exit;
     Notifier suspend;
@@ -465,6 +466,8 @@ static int xen_remove_from_physmap(XenIOState *state,
     QLIST_REMOVE(physmap, list);
     if (state->log_for_dirtybit == physmap) {
         state->log_for_dirtybit = NULL;
+        g_free(state->dirty_bitmap);
+        state->dirty_bitmap = NULL;
     }
     g_free(physmap);
 
@@ -615,7 +618,7 @@ static void xen_sync_dirty_bitmap(XenIOState *state,
 {
     hwaddr npages = size >> TARGET_PAGE_BITS;
     const int width = sizeof(unsigned long) * 8;
-    unsigned long bitmap[DIV_ROUND_UP(npages, width)];
+    size_t bitmap_size = DIV_ROUND_UP(npages, width);
     int rc, i, j;
     const XenPhysmap *physmap = NULL;
 
@@ -627,13 +630,14 @@ static void xen_sync_dirty_bitmap(XenIOState *state,
 
     if (state->log_for_dirtybit == NULL) {
         state->log_for_dirtybit = physmap;
+        state->dirty_bitmap = g_new(unsigned long, bitmap_size);
     } else if (state->log_for_dirtybit != physmap) {
         /* Only one range for dirty bitmap can be tracked. */
         return;
     }
 
     rc = xen_track_dirty_vram(xen_domid, start_addr >> TARGET_PAGE_BITS,
-                              npages, bitmap);
+                              npages, state->dirty_bitmap);
     if (rc < 0) {
 #ifndef ENODATA
 #define ENODATA  ENOENT
@@ -647,8 +651,8 @@ static void xen_sync_dirty_bitmap(XenIOState *state,
         return;
     }
 
-    for (i = 0; i < ARRAY_SIZE(bitmap); i++) {
-        unsigned long map = bitmap[i];
+    for (i = 0; i < bitmap_size; i++) {
+        unsigned long map = state->dirty_bitmap[i];
         while (map != 0) {
             j = ctzl(map);
             map &= ~(1ul << j);
@@ -678,6 +682,8 @@ static void xen_log_stop(MemoryListener *listener, MemoryRegionSection *section,
 
     if (old & ~new & (1 << DIRTY_MEMORY_VGA)) {
         state->log_for_dirtybit = NULL;
+        g_free(state->dirty_bitmap);
+        state->dirty_bitmap = NULL;
         /* Disable dirty bit tracking */
         xen_track_dirty_vram(xen_domid, 0, 0, NULL);
     }
diff --git a/hw/i386/xen/xen-mapcache.c b/hw/i386/xen/xen-mapcache.c
index 254759f776..dc73c86c61 100644
--- a/hw/i386/xen/xen-mapcache.c
+++ b/hw/i386/xen/xen-mapcache.c
@@ -17,8 +17,6 @@
 #include "hw/xen/xen-legacy-backend.h"
 #include "qemu/bitmap.h"
 
-#include <xen/hvm/params.h>
-
 #include "sysemu/xen-mapcache.h"
 #include "trace.h"
 
diff --git a/hw/net/xen_nic.c b/hw/net/xen_nic.c
index 37cda8e4be..ffb3b5898d 100644
--- a/hw/net/xen_nic.c
+++ b/hw/net/xen_nic.c
@@ -30,7 +30,7 @@
 #include "net/util.h"
 #include "hw/xen/xen-legacy-backend.h"
 
-#include <xen/io/netif.h>
+#include "hw/xen/interface/io/netif.h"
 
 /* ------------------------------------------------------------- */
 
diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c
index b20d0cfadf..dfbb418e77 100644
--- a/hw/usb/xen-usb.c
+++ b/hw/usb/xen-usb.c
@@ -32,8 +32,7 @@
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qstring.h"
 
-#include "hw/xen/io/ring.h"
-#include <xen/io/usbif.h>
+#include "hw/xen/interface/io/usbif.h"
 
 /*
  * Check for required support of usbif.h: USBIF_SHORT_NOT_OK was the last
diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c
index a4416d0bcf..7503eea9e9 100644
--- a/hw/xen/xen-bus.c
+++ b/hw/xen/xen-bus.c
@@ -924,23 +924,35 @@ done:
 }
 
 struct XenEventChannel {
+    QLIST_ENTRY(XenEventChannel) list;
+    AioContext *ctx;
+    xenevtchn_handle *xeh;
     evtchn_port_t local_port;
     XenEventHandler handler;
     void *opaque;
-    Notifier notifier;
 };
 
-static void event_notify(Notifier *n, void *data)
+static bool xen_device_poll(void *opaque)
+{
+    XenEventChannel *channel = opaque;
+
+    return channel->handler(channel->opaque);
+}
+
+static void xen_device_event(void *opaque)
 {
-    XenEventChannel *channel = container_of(n, XenEventChannel, notifier);
-    unsigned long port = (unsigned long)data;
+    XenEventChannel *channel = opaque;
+    unsigned long port = xenevtchn_pending(channel->xeh);
 
     if (port == channel->local_port) {
-        channel->handler(channel->opaque);
+        xen_device_poll(channel);
+
+        xenevtchn_unmask(channel->xeh, port);
     }
 }
 
 XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev,
+                                               AioContext *ctx,
                                                unsigned int port,
                                                XenEventHandler handler,
                                                void *opaque, Error **errp)
@@ -948,24 +960,40 @@ XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev,
     XenEventChannel *channel = g_new0(XenEventChannel, 1);
     xenevtchn_port_or_error_t local_port;
 
-    local_port = xenevtchn_bind_interdomain(xendev->xeh,
+    channel->xeh = xenevtchn_open(NULL, 0);
+    if (!channel->xeh) {
+        error_setg_errno(errp, errno, "failed xenevtchn_open");
+        goto fail;
+    }
+
+    local_port = xenevtchn_bind_interdomain(channel->xeh,
                                             xendev->frontend_id,
                                             port);
     if (local_port < 0) {
         error_setg_errno(errp, errno, "xenevtchn_bind_interdomain failed");
-
-        g_free(channel);
-        return NULL;
+        goto fail;
     }
 
     channel->local_port = local_port;
     channel->handler = handler;
     channel->opaque = opaque;
-    channel->notifier.notify = event_notify;
 
-    notifier_list_add(&xendev->event_notifiers, &channel->notifier);
+    channel->ctx = ctx;
+    aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true,
+                       xen_device_event, NULL, xen_device_poll, channel);
+
+    QLIST_INSERT_HEAD(&xendev->event_channels, channel, list);
 
     return channel;
+
+fail:
+    if (channel->xeh) {
+        xenevtchn_close(channel->xeh);
+    }
+
+    g_free(channel);
+
+    return NULL;
 }
 
 void xen_device_notify_event_channel(XenDevice *xendev,
@@ -977,7 +1005,7 @@ void xen_device_notify_event_channel(XenDevice *xendev,
         return;
     }
 
-    if (xenevtchn_notify(xendev->xeh, channel->local_port) < 0) {
+    if (xenevtchn_notify(channel->xeh, channel->local_port) < 0) {
         error_setg_errno(errp, errno, "xenevtchn_notify failed");
     }
 }
@@ -991,12 +1019,16 @@ void xen_device_unbind_event_channel(XenDevice *xendev,
         return;
     }
 
-    notifier_remove(&channel->notifier);
+    QLIST_REMOVE(channel, list);
+
+    aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true,
+                       NULL, NULL, NULL, NULL);
 
-    if (xenevtchn_unbind(xendev->xeh, channel->local_port) < 0) {
+    if (xenevtchn_unbind(channel->xeh, channel->local_port) < 0) {
         error_setg_errno(errp, errno, "xenevtchn_unbind failed");
     }
 
+    xenevtchn_close(channel->xeh);
     g_free(channel);
 }
 
@@ -1005,6 +1037,7 @@ static void xen_device_unrealize(DeviceState *dev, Error **errp)
     XenDevice *xendev = XEN_DEVICE(dev);
     XenDeviceClass *xendev_class = XEN_DEVICE_GET_CLASS(xendev);
     const char *type = object_get_typename(OBJECT(xendev));
+    XenEventChannel *channel, *next;
 
     if (!xendev->name) {
         return;
@@ -1021,15 +1054,14 @@ static void xen_device_unrealize(DeviceState *dev, Error **errp)
         xendev_class->unrealize(xendev, errp);
     }
 
+    /* Make sure all event channels are cleaned up */
+    QLIST_FOREACH_SAFE(channel, &xendev->event_channels, list, next) {
+        xen_device_unbind_event_channel(xendev, channel, NULL);
+    }
+
     xen_device_frontend_destroy(xendev);
     xen_device_backend_destroy(xendev);
 
-    if (xendev->xeh) {
-        qemu_set_fd_handler(xenevtchn_fd(xendev->xeh), NULL, NULL, NULL);
-        xenevtchn_close(xendev->xeh);
-        xendev->xeh = NULL;
-    }
-
     if (xendev->xgth) {
         xengnttab_close(xendev->xgth);
         xendev->xgth = NULL;
@@ -1046,16 +1078,6 @@ static void xen_device_exit(Notifier *n, void *data)
     xen_device_unrealize(DEVICE(xendev), &error_abort);
 }
 
-static void xen_device_event(void *opaque)
-{
-    XenDevice *xendev = opaque;
-    unsigned long port = xenevtchn_pending(xendev->xeh);
-
-    notifier_list_notify(&xendev->event_notifiers, (void *)port);
-
-    xenevtchn_unmask(xendev->xeh, port);
-}
-
 static void xen_device_realize(DeviceState *dev, Error **errp)
 {
     XenDevice *xendev = XEN_DEVICE(dev);
@@ -1096,16 +1118,6 @@ static void xen_device_realize(DeviceState *dev, Error **errp)
     xendev->feature_grant_copy =
         (xengnttab_grant_copy(xendev->xgth, 0, NULL) == 0);
 
-    xendev->xeh = xenevtchn_open(NULL, 0);
-    if (!xendev->xeh) {
-        error_setg_errno(errp, errno, "failed xenevtchn_open");
-        goto unrealize;
-    }
-
-    notifier_list_init(&xendev->event_notifiers);
-    qemu_set_fd_handler(xenevtchn_fd(xendev->xeh), xen_device_event, NULL,
-                        xendev);
-
     xen_device_backend_create(xendev, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
diff --git a/hw/xen/xen-legacy-backend.c b/hw/xen/xen-legacy-backend.c
index 36fd1e9b09..3715c94fa6 100644
--- a/hw/xen/xen-legacy-backend.c
+++ b/hw/xen/xen-legacy-backend.c
@@ -34,8 +34,6 @@
 #include "hw/xen/xen_pvdev.h"
 #include "monitor/qdev.h"
 
-#include <xen/grant_table.h>
-
 DeviceState *xen_sysdev;
 BusState *xen_sysbus;
 
diff --git a/include/hw/xen/interface/grant_table.h b/include/hw/xen/interface/grant_table.h
new file mode 100644
index 0000000000..2af0cbdde3
--- /dev/null
+++ b/include/hw/xen/interface/grant_table.h
@@ -0,0 +1,36 @@
+/******************************************************************************
+ * grant_table.h
+ *
+ * Interface for granting foreign access to page frames, and receiving
+ * page-ownership transfers.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2004, K A Fraser
+ */
+
+#ifndef __XEN_PUBLIC_GRANT_TABLE_H__
+#define __XEN_PUBLIC_GRANT_TABLE_H__
+
+/*
+ * Reference to a grant entry in a specified domain's grant table.
+ */
+typedef uint32_t grant_ref_t;
+
+#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */
diff --git a/include/hw/xen/interface/io/blkif.h b/include/hw/xen/interface/io/blkif.h
new file mode 100644
index 0000000000..8b1be50ce8
--- /dev/null
+++ b/include/hw/xen/interface/io/blkif.h
@@ -0,0 +1,712 @@
+/******************************************************************************
+ * blkif.h
+ *
+ * Unified block-device I/O interface for Xen guest OSes.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2003-2004, Keir Fraser
+ * Copyright (c) 2012, Spectra Logic Corporation
+ */
+
+#ifndef __XEN_PUBLIC_IO_BLKIF_H__
+#define __XEN_PUBLIC_IO_BLKIF_H__
+
+#include "ring.h"
+#include "../grant_table.h"
+
+/*
+ * Front->back notifications: When enqueuing a new request, sending a
+ * notification can be made conditional on req_event (i.e., the generic
+ * hold-off mechanism provided by the ring macros). Backends must set
+ * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()).
+ *
+ * Back->front notifications: When enqueuing a new response, sending a
+ * notification can be made conditional on rsp_event (i.e., the generic
+ * hold-off mechanism provided by the ring macros). Frontends must set
+ * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()).
+ */
+
+#ifndef blkif_vdev_t
+#define blkif_vdev_t   uint16_t
+#endif
+#define blkif_sector_t uint64_t
+
+/*
+ * Feature and Parameter Negotiation
+ * =================================
+ * The two halves of a Xen block driver utilize nodes within the XenStore to
+ * communicate capabilities and to negotiate operating parameters.  This
+ * section enumerates these nodes which reside in the respective front and
+ * backend portions of the XenStore, following the XenBus convention.
+ *
+ * All data in the XenStore is stored as strings.  Nodes specifying numeric
+ * values are encoded in decimal.  Integer value ranges listed below are
+ * expressed as fixed sized integer types capable of storing the conversion
+ * of a properly formated node string, without loss of information.
+ *
+ * Any specified default value is in effect if the corresponding XenBus node
+ * is not present in the XenStore.
+ *
+ * XenStore nodes in sections marked "PRIVATE" are solely for use by the
+ * driver side whose XenBus tree contains them.
+ *
+ * XenStore nodes marked "DEPRECATED" in their notes section should only be
+ * used to provide interoperability with legacy implementations.
+ *
+ * See the XenBus state transition diagram below for details on when XenBus
+ * nodes must be published and when they can be queried.
+ *
+ *****************************************************************************
+ *                            Backend XenBus Nodes
+ *****************************************************************************
+ *
+ *------------------ Backend Device Identification (PRIVATE) ------------------
+ *
+ * mode
+ *      Values:         "r" (read only), "w" (writable)
+ *
+ *      The read or write access permissions to the backing store to be
+ *      granted to the frontend.
+ *
+ * params
+ *      Values:         string
+ *
+ *      A free formatted string providing sufficient information for the
+ *      hotplug script to attach the device and provide a suitable
+ *      handler (ie: a block device) for blkback to use.
+ *
+ * physical-device
+ *      Values:         "MAJOR:MINOR"
+ *      Notes: 11
+ *
+ *      MAJOR and MINOR are the major number and minor number of the
+ *      backing device respectively.
+ *
+ * physical-device-path
+ *      Values:         path string
+ *
+ *      A string that contains the absolute path to the disk image. On
+ *      NetBSD and Linux this is always a block device, while on FreeBSD
+ *      it can be either a block device or a regular file.
+ *
+ * type
+ *      Values:         "file", "phy", "tap"
+ *
+ *      The type of the backing device/object.
+ *
+ *
+ * direct-io-safe
+ *      Values:         0/1 (boolean)
+ *      Default Value:  0
+ *
+ *      The underlying storage is not affected by the direct IO memory
+ *      lifetime bug.  See:
+ *        http://lists.xen.org/archives/html/xen-devel/2012-12/msg01154.html
+ *
+ *      Therefore this option gives the backend permission to use
+ *      O_DIRECT, notwithstanding that bug.
+ *
+ *      That is, if this option is enabled, use of O_DIRECT is safe,
+ *      in circumstances where we would normally have avoided it as a
+ *      workaround for that bug.  This option is not relevant for all
+ *      backends, and even not necessarily supported for those for
+ *      which it is relevant.  A backend which knows that it is not
+ *      affected by the bug can ignore this option.
+ *
+ *      This option doesn't require a backend to use O_DIRECT, so it
+ *      should not be used to try to control the caching behaviour.
+ *
+ *--------------------------------- Features ---------------------------------
+ *
+ * feature-barrier
+ *      Values:         0/1 (boolean)
+ *      Default Value:  0
+ *
+ *      A value of "1" indicates that the backend can process requests
+ *      containing the BLKIF_OP_WRITE_BARRIER request opcode.  Requests
+ *      of this type may still be returned at any time with the
+ *      BLKIF_RSP_EOPNOTSUPP result code.
+ *
+ * feature-flush-cache
+ *      Values:         0/1 (boolean)
+ *      Default Value:  0
+ *
+ *      A value of "1" indicates that the backend can process requests
+ *      containing the BLKIF_OP_FLUSH_DISKCACHE request opcode.  Requests
+ *      of this type may still be returned at any time with the
+ *      BLKIF_RSP_EOPNOTSUPP result code.
+ *
+ * feature-discard
+ *      Values:         0/1 (boolean)
+ *      Default Value:  0
+ *
+ *      A value of "1" indicates that the backend can process requests
+ *      containing the BLKIF_OP_DISCARD request opcode.  Requests
+ *      of this type may still be returned at any time with the
+ *      BLKIF_RSP_EOPNOTSUPP result code.
+ *
+ * feature-persistent
+ *      Values:         0/1 (boolean)
+ *      Default Value:  0
+ *      Notes: 7
+ *
+ *      A value of "1" indicates that the backend can keep the grants used
+ *      by the frontend driver mapped, so the same set of grants should be
+ *      used in all transactions. The maximum number of grants the backend
+ *      can map persistently depends on the implementation, but ideally it
+ *      should be RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. Using this
+ *      feature the backend doesn't need to unmap each grant, preventing
+ *      costly TLB flushes. The backend driver should only map grants
+ *      persistently if the frontend supports it. If a backend driver chooses
+ *      to use the persistent protocol when the frontend doesn't support it,
+ *      it will probably hit the maximum number of persistently mapped grants
+ *      (due to the fact that the frontend won't be reusing the same grants),
+ *      and fall back to non-persistent mode. Backend implementations may
+ *      shrink or expand the number of persistently mapped grants without
+ *      notifying the frontend depending on memory constraints (this might
+ *      cause a performance degradation).
+ *
+ *      If a backend driver wants to limit the maximum number of persistently
+ *      mapped grants to a value less than RING_SIZE *
+ *      BLKIF_MAX_SEGMENTS_PER_REQUEST a LRU strategy should be used to
+ *      discard the grants that are less commonly used. Using a LRU in the
+ *      backend driver paired with a LIFO queue in the frontend will
+ *      allow us to have better performance in this scenario.
+ *
+ *----------------------- Request Transport Parameters ------------------------
+ *
+ * max-ring-page-order
+ *      Values:         <uint32_t>
+ *      Default Value:  0
+ *      Notes:          1, 3
+ *
+ *      The maximum supported size of the request ring buffer in units of
+ *      lb(machine pages). (e.g. 0 == 1 page,  1 = 2 pages, 2 == 4 pages,
+ *      etc.).
+ *
+ * max-ring-pages
+ *      Values:         <uint32_t>
+ *      Default Value:  1
+ *      Notes:          DEPRECATED, 2, 3
+ *
+ *      The maximum supported size of the request ring buffer in units of
+ *      machine pages.  The value must be a power of 2.
+ *
+ *------------------------- Backend Device Properties -------------------------
+ *
+ * discard-enable
+ *      Values:         0/1 (boolean)
+ *      Default Value:  1
+ *
+ *      This optional property, set by the toolstack, instructs the backend
+ *      to offer (or not to offer) discard to the frontend. If the property
+ *      is missing the backend should offer discard if the backing storage
+ *      actually supports it.
+ *
+ * discard-alignment
+ *      Values:         <uint32_t>
+ *      Default Value:  0
+ *      Notes:          4, 5
+ *
+ *      The offset, in bytes from the beginning of the virtual block device,
+ *      to the first, addressable, discard extent on the underlying device.
+ *
+ * discard-granularity
+ *      Values:         <uint32_t>
+ *      Default Value:  <"sector-size">
+ *      Notes:          4
+ *
+ *      The size, in bytes, of the individually addressable discard extents
+ *      of the underlying device.
+ *
+ * discard-secure
+ *      Values:         0/1 (boolean)
+ *      Default Value:  0
+ *      Notes:          10
+ *
+ *      A value of "1" indicates that the backend can process BLKIF_OP_DISCARD
+ *      requests with the BLKIF_DISCARD_SECURE flag set.
+ *
+ * info
+ *      Values:         <uint32_t> (bitmap)
+ *
+ *      A collection of bit flags describing attributes of the backing
+ *      device.  The VDISK_* macros define the meaning of each bit
+ *      location.
+ *
+ * sector-size
+ *      Values:         <uint32_t>
+ *
+ *      The logical block size, in bytes, of the underlying storage. This
+ *      must be a power of two with a minimum value of 512.
+ *
+ *      NOTE: Because of implementation bugs in some frontends this must be
+ *            set to 512, unless the frontend advertizes a non-zero value
+ *            in its "feature-large-sector-size" xenbus node. (See below).
+ *
+ * physical-sector-size
+ *      Values:         <uint32_t>
+ *      Default Value:  <"sector-size">
+ *
+ *      The physical block size, in bytes, of the backend storage. This
+ *      must be an integer multiple of "sector-size".
+ *
+ * sectors
+ *      Values:         <uint64_t>
+ *
+ *      The size of the backend device, expressed in units of "sector-size".
+ *      The product of "sector-size" and "sectors" must also be an integer
+ *      multiple of "physical-sector-size", if that node is present.
+ *
+ *****************************************************************************
+ *                            Frontend XenBus Nodes
+ *****************************************************************************
+ *
+ *----------------------- Request Transport Parameters -----------------------
+ *
+ * event-channel
+ *      Values:         <uint32_t>
+ *
+ *      The identifier of the Xen event channel used to signal activity
+ *      in the ring buffer.
+ *
+ * ring-ref
+ *      Values:         <uint32_t>
+ *      Notes:          6
+ *
+ *      The Xen grant reference granting permission for the backend to map
+ *      the sole page in a single page sized ring buffer.
+ *
+ * ring-ref%u
+ *      Values:         <uint32_t>
+ *      Notes:          6
+ *
+ *      For a frontend providing a multi-page ring, a "number of ring pages"
+ *      sized list of nodes, each containing a Xen grant reference granting
+ *      permission for the backend to map the page of the ring located
+ *      at page index "%u".  Page indexes are zero based.
+ *
+ * protocol
+ *      Values:         string (XEN_IO_PROTO_ABI_*)
+ *      Default Value:  XEN_IO_PROTO_ABI_NATIVE
+ *
+ *      The machine ABI rules governing the format of all ring request and
+ *      response structures.
+ *
+ * ring-page-order
+ *      Values:         <uint32_t>
+ *      Default Value:  0
+ *      Maximum Value:  MAX(ffs(max-ring-pages) - 1, max-ring-page-order)
+ *      Notes:          1, 3
+ *
+ *      The size of the frontend allocated request ring buffer in units
+ *      of lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages,
+ *      etc.).
+ *
+ * num-ring-pages
+ *      Values:         <uint32_t>
+ *      Default Value:  1
+ *      Maximum Value:  MAX(max-ring-pages,(0x1 << max-ring-page-order))
+ *      Notes:          DEPRECATED, 2, 3
+ *
+ *      The size of the frontend allocated request ring buffer in units of
+ *      machine pages.  The value must be a power of 2.
+ *
+ *--------------------------------- Features ---------------------------------
+ *
+ * feature-persistent
+ *      Values:         0/1 (boolean)
+ *      Default Value:  0
+ *      Notes: 7, 8, 9
+ *
+ *      A value of "1" indicates that the frontend will reuse the same grants
+ *      for all transactions, allowing the backend to map them with write
+ *      access (even when it should be read-only). If the frontend hits the
+ *      maximum number of allowed persistently mapped grants, it can fallback
+ *      to non persistent mode. This will cause a performance degradation,
+ *      since the the backend driver will still try to map those grants
+ *      persistently. Since the persistent grants protocol is compatible with
+ *      the previous protocol, a frontend driver can choose to work in
+ *      persistent mode even when the backend doesn't support it.
+ *
+ *      It is recommended that the frontend driver stores the persistently
+ *      mapped grants in a LIFO queue, so a subset of all persistently mapped
+ *      grants gets used commonly. This is done in case the backend driver
+ *      decides to limit the maximum number of persistently mapped grants
+ *      to a value less than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST.
+ *
+ * feature-large-sector-size
+ *      Values:         0/1 (boolean)
+ *      Default Value:  0
+ *
+ *      A value of "1" indicates that the frontend will correctly supply and
+ *      interpret all sector-based quantities in terms of the "sector-size"
+ *      value supplied in the backend info, whatever that may be set to.
+ *      If this node is not present or its value is "0" then it is assumed
+ *      that the frontend requires that the logical block size is 512 as it
+ *      is hardcoded (which is the case in some frontend implementations).
+ *
+ *------------------------- Virtual Device Properties -------------------------
+ *
+ * device-type
+ *      Values:         "disk", "cdrom", "floppy", etc.
+ *
+ * virtual-device
+ *      Values:         <uint32_t>
+ *
+ *      A value indicating the physical device to virtualize within the
+ *      frontend's domain.  (e.g. "The first ATA disk", "The third SCSI
+ *      disk", etc.)
+ *
+ *      See docs/misc/vbd-interface.txt for details on the format of this
+ *      value.
+ *
+ * Notes
+ * -----
+ * (1) Multi-page ring buffer scheme first developed in the Citrix XenServer
+ *     PV drivers.
+ * (2) Multi-page ring buffer scheme first used in some RedHat distributions
+ *     including a distribution deployed on certain nodes of the Amazon
+ *     EC2 cluster.
+ * (3) Support for multi-page ring buffers was implemented independently,
+ *     in slightly different forms, by both Citrix and RedHat/Amazon.
+ *     For full interoperability, block front and backends should publish
+ *     identical ring parameters, adjusted for unit differences, to the
+ *     XenStore nodes used in both schemes.
+ * (4) Devices that support discard functionality may internally allocate space
+ *     (discardable extents) in units that are larger than the exported logical
+ *     block size. If the backing device has such discardable extents the
+ *     backend should provide both discard-granularity and discard-alignment.
+ *     Providing just one of the two may be considered an error by the frontend.
+ *     Backends supporting discard should include discard-granularity and
+ *     discard-alignment even if it supports discarding individual sectors.
+ *     Frontends should assume discard-alignment == 0 and discard-granularity
+ *     == sector size if these keys are missing.
+ * (5) The discard-alignment parameter allows a physical device to be
+ *     partitioned into virtual devices that do not necessarily begin or
+ *     end on a discardable extent boundary.
+ * (6) When there is only a single page allocated to the request ring,
+ *     'ring-ref' is used to communicate the grant reference for this
+ *     page to the backend.  When using a multi-page ring, the 'ring-ref'
+ *     node is not created.  Instead 'ring-ref0' - 'ring-refN' are used.
+ * (7) When using persistent grants data has to be copied from/to the page
+ *     where the grant is currently mapped. The overhead of doing this copy
+ *     however doesn't suppress the speed improvement of not having to unmap
+ *     the grants.
+ * (8) The frontend driver has to allow the backend driver to map all grants
+ *     with write access, even when they should be mapped read-only, since
+ *     further requests may reuse these grants and require write permissions.
+ * (9) Linux implementation doesn't have a limit on the maximum number of
+ *     grants that can be persistently mapped in the frontend driver, but
+ *     due to the frontent driver implementation it should never be bigger
+ *     than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST.
+ *(10) The discard-secure property may be present and will be set to 1 if the
+ *     backing device supports secure discard.
+ *(11) Only used by Linux and NetBSD.
+ */
+
+/*
+ * Multiple hardware queues/rings:
+ * If supported, the backend will write the key "multi-queue-max-queues" to
+ * the directory for that vbd, and set its value to the maximum supported
+ * number of queues.
+ * Frontends that are aware of this feature and wish to use it can write the
+ * key "multi-queue-num-queues" with the number they wish to use, which must be
+ * greater than zero, and no more than the value reported by the backend in
+ * "multi-queue-max-queues".
+ *
+ * For frontends requesting just one queue, the usual event-channel and
+ * ring-ref keys are written as before, simplifying the backend processing
+ * to avoid distinguishing between a frontend that doesn't understand the
+ * multi-queue feature, and one that does, but requested only one queue.
+ *
+ * Frontends requesting two or more queues must not write the toplevel
+ * event-channel and ring-ref keys, instead writing those keys under sub-keys
+ * having the name "queue-N" where N is the integer ID of the queue/ring for
+ * which those keys belong. Queues are indexed from zero.
+ * For example, a frontend with two queues must write the following set of
+ * queue-related keys:
+ *
+ * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2"
+ * /local/domain/1/device/vbd/0/queue-0 = ""
+ * /local/domain/1/device/vbd/0/queue-0/ring-ref = "<ring-ref#0>"
+ * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>"
+ * /local/domain/1/device/vbd/0/queue-1 = ""
+ * /local/domain/1/device/vbd/0/queue-1/ring-ref = "<ring-ref#1>"
+ * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>"
+ *
+ * It is also possible to use multiple queues/rings together with
+ * feature multi-page ring buffer.
+ * For example, a frontend requests two queues/rings and the size of each ring
+ * buffer is two pages must write the following set of related keys:
+ *
+ * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2"
+ * /local/domain/1/device/vbd/0/ring-page-order = "1"
+ * /local/domain/1/device/vbd/0/queue-0 = ""
+ * /local/domain/1/device/vbd/0/queue-0/ring-ref0 = "<ring-ref#0>"
+ * /local/domain/1/device/vbd/0/queue-0/ring-ref1 = "<ring-ref#1>"
+ * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>"
+ * /local/domain/1/device/vbd/0/queue-1 = ""
+ * /local/domain/1/device/vbd/0/queue-1/ring-ref0 = "<ring-ref#2>"
+ * /local/domain/1/device/vbd/0/queue-1/ring-ref1 = "<ring-ref#3>"
+ * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>"
+ *
+ */
+
+/*
+ * STATE DIAGRAMS
+ *
+ *****************************************************************************
+ *                                   Startup                                 *
+ *****************************************************************************
+ *
+ * Tool stack creates front and back nodes with state XenbusStateInitialising.
+ *
+ * Front                                Back
+ * =================================    =====================================
+ * XenbusStateInitialising              XenbusStateInitialising
+ *  o Query virtual device               o Query backend device identification
+ *    properties.                          data.
+ *  o Setup OS device instance.          o Open and validate backend device.
+ *                                       o Publish backend features and
+ *                                         transport parameters.
+ *                                                      |
+ *                                                      |
+ *                                                      V
+ *                                      XenbusStateInitWait
+ *
+ * o Query backend features and
+ *   transport parameters.
+ * o Allocate and initialize the
+ *   request ring.
+ * o Publish transport parameters
+ *   that will be in effect during
+ *   this connection.
+ *              |
+ *              |
+ *              V
+ * XenbusStateInitialised
+ *
+ *                                       o Query frontend transport parameters.
+ *                                       o Connect to the request ring and
+ *                                         event channel.
+ *                                       o Publish backend device properties.
+ *                                                      |
+ *                                                      |
+ *                                                      V
+ *                                      XenbusStateConnected
+ *
+ *  o Query backend device properties.
+ *  o Finalize OS virtual device
+ *    instance.
+ *              |
+ *              |
+ *              V
+ * XenbusStateConnected
+ *
+ * Note: Drivers that do not support any optional features, or the negotiation
+ *       of transport parameters, can skip certain states in the state machine:
+ *
+ *       o A frontend may transition to XenbusStateInitialised without
+ *         waiting for the backend to enter XenbusStateInitWait.  In this
+ *         case, default transport parameters are in effect and any
+ *         transport parameters published by the frontend must contain
+ *         their default values.
+ *
+ *       o A backend may transition to XenbusStateInitialised, bypassing
+ *         XenbusStateInitWait, without waiting for the frontend to first
+ *         enter the XenbusStateInitialised state.  In this case, default
+ *         transport parameters are in effect and any transport parameters
+ *         published by the backend must contain their default values.
+ *
+ *       Drivers that support optional features and/or transport parameter
+ *       negotiation must tolerate these additional state transition paths.
+ *       In general this means performing the work of any skipped state
+ *       transition, if it has not already been performed, in addition to the
+ *       work associated with entry into the current state.
+ */
+
+/*
+ * REQUEST CODES.
+ */
+#define BLKIF_OP_READ              0
+#define BLKIF_OP_WRITE             1
+/*
+ * All writes issued prior to a request with the BLKIF_OP_WRITE_BARRIER
+ * operation code ("barrier request") must be completed prior to the
+ * execution of the barrier request.  All writes issued after the barrier
+ * request must not execute until after the completion of the barrier request.
+ *
+ * Optional.  See "feature-barrier" XenBus node documentation above.
+ */
+#define BLKIF_OP_WRITE_BARRIER     2
+/*
+ * Commit any uncommitted contents of the backing device's volatile cache
+ * to stable storage.
+ *
+ * Optional.  See "feature-flush-cache" XenBus node documentation above.
+ */
+#define BLKIF_OP_FLUSH_DISKCACHE   3
+/*
+ * Used in SLES sources for device specific command packet
+ * contained within the request. Reserved for that purpose.
+ */
+#define BLKIF_OP_RESERVED_1        4
+/*
+ * Indicate to the backend device that a region of storage is no longer in
+ * use, and may be discarded at any time without impact to the client.  If
+ * the BLKIF_DISCARD_SECURE flag is set on the request, all copies of the
+ * discarded region on the device must be rendered unrecoverable before the
+ * command returns.
+ *
+ * This operation is analogous to performing a trim (ATA) or unamp (SCSI),
+ * command on a native device.
+ *
+ * More information about trim/unmap operations can be found at:
+ * http://t13.org/Documents/UploadedDocuments/docs2008/
+ *     e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc
+ * http://www.seagate.com/staticfiles/support/disc/manuals/
+ *     Interface%20manuals/100293068c.pdf
+ *
+ * Optional.  See "feature-discard", "discard-alignment",
+ * "discard-granularity", and "discard-secure" in the XenBus node
+ * documentation above.
+ */
+#define BLKIF_OP_DISCARD           5
+
+/*
+ * Recognized if "feature-max-indirect-segments" in present in the backend
+ * xenbus info. The "feature-max-indirect-segments" node contains the maximum
+ * number of segments allowed by the backend per request. If the node is
+ * present, the frontend might use blkif_request_indirect structs in order to
+ * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST (11). The
+ * maximum number of indirect segments is fixed by the backend, but the
+ * frontend can issue requests with any number of indirect segments as long as
+ * it's less than the number provided by the backend. The indirect_grefs field
+ * in blkif_request_indirect should be filled by the frontend with the
+ * grant references of the pages that are holding the indirect segments.
+ * These pages are filled with an array of blkif_request_segment that hold the
+ * information about the segments. The number of indirect pages to use is
+ * determined by the number of segments an indirect request contains. Every
+ * indirect page can contain a maximum of
+ * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to
+ * calculate the number of indirect pages to use we have to do
+ * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))).
+ *
+ * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not*
+ * create the "feature-max-indirect-segments" node!
+ */
+#define BLKIF_OP_INDIRECT          6
+
+/*
+ * Maximum scatter/gather segments per request.
+ * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE.
+ * NB. This could be 12 if the ring indexes weren't stored in the same page.
+ */
+#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
+
+/*
+ * Maximum number of indirect pages to use per request.
+ */
+#define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8
+
+/*
+ * NB. 'first_sect' and 'last_sect' in blkif_request_segment, as well as
+ * 'sector_number' in blkif_request, blkif_request_discard and
+ * blkif_request_indirect are sector-based quantities. See the description
+ * of the "feature-large-sector-size" frontend xenbus node above for
+ * more information.
+ */
+struct blkif_request_segment {
+    grant_ref_t gref;        /* reference to I/O buffer frame        */
+    /* @first_sect: first sector in frame to transfer (inclusive).   */
+    /* @last_sect: last sector in frame to transfer (inclusive).     */
+    uint8_t     first_sect, last_sect;
+};
+
+/*
+ * Starting ring element for any I/O request.
+ */
+struct blkif_request {
+    uint8_t        operation;    /* BLKIF_OP_???                         */
+    uint8_t        nr_segments;  /* number of segments                   */
+    blkif_vdev_t   handle;       /* only for read/write requests         */
+    uint64_t       id;           /* private guest value, echoed in resp  */
+    blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+    struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+typedef struct blkif_request blkif_request_t;
+
+/*
+ * Cast to this structure when blkif_request.operation == BLKIF_OP_DISCARD
+ * sizeof(struct blkif_request_discard) <= sizeof(struct blkif_request)
+ */
+struct blkif_request_discard {
+    uint8_t        operation;    /* BLKIF_OP_DISCARD                     */
+    uint8_t        flag;         /* BLKIF_DISCARD_SECURE or zero         */
+#define BLKIF_DISCARD_SECURE (1<<0)  /* ignored if discard-secure=0      */
+    blkif_vdev_t   handle;       /* same as for read/write requests      */
+    uint64_t       id;           /* private guest value, echoed in resp  */
+    blkif_sector_t sector_number;/* start sector idx on disk             */
+    uint64_t       nr_sectors;   /* number of contiguous sectors to discard*/
+};
+typedef struct blkif_request_discard blkif_request_discard_t;
+
+struct blkif_request_indirect {
+    uint8_t        operation;    /* BLKIF_OP_INDIRECT                    */
+    uint8_t        indirect_op;  /* BLKIF_OP_{READ/WRITE}                */
+    uint16_t       nr_segments;  /* number of segments                   */
+    uint64_t       id;           /* private guest value, echoed in resp  */
+    blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+    blkif_vdev_t   handle;       /* same as for read/write requests      */
+    grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
+#ifdef __i386__
+    uint64_t       pad;          /* Make it 64 byte aligned on i386      */
+#endif
+};
+typedef struct blkif_request_indirect blkif_request_indirect_t;
+
+struct blkif_response {
+    uint64_t        id;              /* copied from request */
+    uint8_t         operation;       /* copied from request */
+    int16_t         status;          /* BLKIF_RSP_???       */
+};
+typedef struct blkif_response blkif_response_t;
+
+/*
+ * STATUS RETURN CODES.
+ */
+ /* Operation not supported (only happens on barrier writes). */
+#define BLKIF_RSP_EOPNOTSUPP  -2
+ /* Operation failed for some unspecified reason (-EIO). */
+#define BLKIF_RSP_ERROR       -1
+ /* Operation completed successfully. */
+#define BLKIF_RSP_OKAY         0
+
+/*
+ * Generate blkif ring structures and types.
+ */
+DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
+
+#define VDISK_CDROM        0x1
+#define VDISK_REMOVABLE    0x2
+#define VDISK_READONLY     0x4
+
+#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */
diff --git a/include/hw/xen/interface/io/console.h b/include/hw/xen/interface/io/console.h
new file mode 100644
index 0000000000..e2155d1cf5
--- /dev/null
+++ b/include/hw/xen/interface/io/console.h
@@ -0,0 +1,46 @@
+/******************************************************************************
+ * console.h
+ *
+ * Console I/O interface for Xen guest OSes.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2005, Keir Fraser
+ */
+
+#ifndef __XEN_PUBLIC_IO_CONSOLE_H__
+#define __XEN_PUBLIC_IO_CONSOLE_H__
+
+typedef uint32_t XENCONS_RING_IDX;
+
+#define MASK_XENCONS_IDX(idx, ring) ((idx) & (sizeof(ring)-1))
+
+struct xencons_interface {
+    char in[1024];
+    char out[2048];
+    XENCONS_RING_IDX in_cons, in_prod;
+    XENCONS_RING_IDX out_cons, out_prod;
+};
+
+#ifdef XEN_WANT_FLEX_CONSOLE_RING
+#include "ring.h"
+DEFINE_XEN_FLEX_RING(xencons);
+#endif
+
+#endif /* __XEN_PUBLIC_IO_CONSOLE_H__ */
diff --git a/include/hw/xen/interface/io/fbif.h b/include/hw/xen/interface/io/fbif.h
new file mode 100644
index 0000000000..ea87ebec0a
--- /dev/null
+++ b/include/hw/xen/interface/io/fbif.h
@@ -0,0 +1,156 @@
+/*
+ * fbif.h -- Xen virtual frame buffer device
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com>
+ * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
+ */
+
+#ifndef __XEN_PUBLIC_IO_FBIF_H__
+#define __XEN_PUBLIC_IO_FBIF_H__
+
+/* Out events (frontend -> backend) */
+
+/*
+ * Out events may be sent only when requested by backend, and receipt
+ * of an unknown out event is an error.
+ */
+
+/* Event type 1 currently not used */
+/*
+ * Framebuffer update notification event
+ * Capable frontend sets feature-update in xenstore.
+ * Backend requests it by setting request-update in xenstore.
+ */
+#define XENFB_TYPE_UPDATE 2
+
+struct xenfb_update
+{
+    uint8_t type;    /* XENFB_TYPE_UPDATE */
+    int32_t x;      /* source x */
+    int32_t y;      /* source y */
+    int32_t width;  /* rect width */
+    int32_t height; /* rect height */
+};
+
+/*
+ * Framebuffer resize notification event
+ * Capable backend sets feature-resize in xenstore.
+ */
+#define XENFB_TYPE_RESIZE 3
+
+struct xenfb_resize
+{
+    uint8_t type;    /* XENFB_TYPE_RESIZE */
+    int32_t width;   /* width in pixels */
+    int32_t height;  /* height in pixels */
+    int32_t stride;  /* stride in bytes */
+    int32_t depth;   /* depth in bits */
+    int32_t offset;  /* offset of the framebuffer in bytes */
+};
+
+#define XENFB_OUT_EVENT_SIZE 40
+
+union xenfb_out_event
+{
+    uint8_t type;
+    struct xenfb_update update;
+    struct xenfb_resize resize;
+    char pad[XENFB_OUT_EVENT_SIZE];
+};
+
+/* In events (backend -> frontend) */
+
+/*
+ * Frontends should ignore unknown in events.
+ */
+
+/*
+ * Framebuffer refresh period advice
+ * Backend sends it to advise the frontend their preferred period of
+ * refresh.  Frontends that keep the framebuffer constantly up-to-date
+ * just ignore it.  Frontends that use the advice should immediately
+ * refresh the framebuffer (and send an update notification event if
+ * those have been requested), then use the update frequency to guide
+ * their periodical refreshs.
+ */
+#define XENFB_TYPE_REFRESH_PERIOD 1
+#define XENFB_NO_REFRESH 0
+
+struct xenfb_refresh_period
+{
+    uint8_t type;    /* XENFB_TYPE_UPDATE_PERIOD */
+    uint32_t period; /* period of refresh, in ms,
+                      * XENFB_NO_REFRESH if no refresh is needed */
+};
+
+#define XENFB_IN_EVENT_SIZE 40
+
+union xenfb_in_event
+{
+    uint8_t type;
+    struct xenfb_refresh_period refresh_period;
+    char pad[XENFB_IN_EVENT_SIZE];
+};
+
+/* shared page */
+
+#define XENFB_IN_RING_SIZE 1024
+#define XENFB_IN_RING_LEN (XENFB_IN_RING_SIZE / XENFB_IN_EVENT_SIZE)
+#define XENFB_IN_RING_OFFS 1024
+#define XENFB_IN_RING(page) \
+    ((union xenfb_in_event *)((char *)(page) + XENFB_IN_RING_OFFS))
+#define XENFB_IN_RING_REF(page, idx) \
+    (XENFB_IN_RING((page))[(idx) % XENFB_IN_RING_LEN])
+
+#define XENFB_OUT_RING_SIZE 2048
+#define XENFB_OUT_RING_LEN (XENFB_OUT_RING_SIZE / XENFB_OUT_EVENT_SIZE)
+#define XENFB_OUT_RING_OFFS (XENFB_IN_RING_OFFS + XENFB_IN_RING_SIZE)
+#define XENFB_OUT_RING(page) \
+    ((union xenfb_out_event *)((char *)(page) + XENFB_OUT_RING_OFFS))
+#define XENFB_OUT_RING_REF(page, idx) \
+    (XENFB_OUT_RING((page))[(idx) % XENFB_OUT_RING_LEN])
+
+struct xenfb_page
+{
+    uint32_t in_cons, in_prod;
+    uint32_t out_cons, out_prod;
+
+    int32_t width;          /* the width of the framebuffer (in pixels) */
+    int32_t height;         /* the height of the framebuffer (in pixels) */
+    uint32_t line_length;   /* the length of a row of pixels (in bytes) */
+    uint32_t mem_length;    /* the length of the framebuffer (in bytes) */
+    uint8_t depth;          /* the depth of a pixel (in bits) */
+
+    /*
+     * Framebuffer page directory
+     *
+     * Each directory page holds PAGE_SIZE / sizeof(*pd)
+     * framebuffer pages, and can thus map up to PAGE_SIZE *
+     * PAGE_SIZE / sizeof(*pd) bytes.  With PAGE_SIZE == 4096 and
+     * sizeof(unsigned long) == 4/8, that's 4 Megs 32 bit and 2 Megs
+     * 64 bit.  256 directories give enough room for a 512 Meg
+     * framebuffer with a max resolution of 12,800x10,240.  Should
+     * be enough for a while with room leftover for expansion.
+     */
+    unsigned long pd[256];
+};
+
+#endif
diff --git a/include/hw/xen/interface/io/kbdif.h b/include/hw/xen/interface/io/kbdif.h
new file mode 100644
index 0000000000..1d68cd458e
--- /dev/null
+++ b/include/hw/xen/interface/io/kbdif.h
@@ -0,0 +1,566 @@
+/*
+ * kbdif.h -- Xen virtual keyboard/mouse
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com>
+ * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
+ */
+
+#ifndef __XEN_PUBLIC_IO_KBDIF_H__
+#define __XEN_PUBLIC_IO_KBDIF_H__
+
+/*
+ *****************************************************************************
+ *                     Feature and Parameter Negotiation
+ *****************************************************************************
+ *
+ * The two halves of a para-virtual driver utilize nodes within
+ * XenStore to communicate capabilities and to negotiate operating parameters.
+ * This section enumerates these nodes which reside in the respective front and
+ * backend portions of XenStore, following XenBus convention.
+ *
+ * All data in XenStore is stored as strings.  Nodes specifying numeric
+ * values are encoded in decimal. Integer value ranges listed below are
+ * expressed as fixed sized integer types capable of storing the conversion
+ * of a properly formated node string, without loss of information.
+ *
+ *****************************************************************************
+ *                            Backend XenBus Nodes
+ *****************************************************************************
+ *
+ *---------------------------- Features supported ----------------------------
+ *
+ * Capable backend advertises supported features by publishing
+ * corresponding entries in XenStore and puts 1 as the value of the entry.
+ * If a feature is not supported then 0 must be set or feature entry omitted.
+ *
+ * feature-disable-keyboard
+ *      Values:         <uint>
+ *
+ *      If there is no need to expose a virtual keyboard device by the
+ *      frontend then this must be set to 1.
+ *
+ * feature-disable-pointer
+ *      Values:         <uint>
+ *
+ *      If there is no need to expose a virtual pointer device by the
+ *      frontend then this must be set to 1.
+ *
+ * feature-abs-pointer
+ *      Values:         <uint>
+ *
+ *      Backends, which support reporting of absolute coordinates for pointer
+ *      device should set this to 1.
+ *
+ * feature-multi-touch
+ *      Values:         <uint>
+ *
+ *      Backends, which support reporting of multi-touch events
+ *      should set this to 1.
+ *
+ * feature-raw-pointer
+ *      Values:        <uint>
+ *
+ *      Backends, which support reporting raw (unscaled) absolute coordinates
+ *      for pointer devices should set this to 1. Raw (unscaled) values have
+ *      a range of [0, 0x7fff].
+ *
+ *-----------------------  Device Instance Parameters ------------------------
+ *
+ * unique-id
+ *      Values:         <string>
+ *
+ *      After device instance initialization it is assigned a unique ID,
+ *      so every instance of the frontend can be identified by the backend
+ *      by this ID. This can be UUID or such.
+ *
+ *------------------------- Pointer Device Parameters ------------------------
+ *
+ * width
+ *      Values:         <uint>
+ *
+ *      Maximum X coordinate (width) to be used by the frontend
+ *      while reporting input events, pixels, [0; UINT32_MAX].
+ *
+ * height
+ *      Values:         <uint>
+ *
+ *      Maximum Y coordinate (height) to be used by the frontend
+ *      while reporting input events, pixels, [0; UINT32_MAX].
+ *
+ *----------------------- Multi-touch Device Parameters ----------------------
+ *
+ * multi-touch-num-contacts
+ *      Values:         <uint>
+ *
+ *      Number of simultaneous touches reported.
+ *
+ * multi-touch-width
+ *      Values:         <uint>
+ *
+ *      Width of the touch area to be used by the frontend
+ *      while reporting input events, pixels, [0; UINT32_MAX].
+ *
+ * multi-touch-height
+ *      Values:         <uint>
+ *
+ *      Height of the touch area to be used by the frontend
+ *      while reporting input events, pixels, [0; UINT32_MAX].
+ *
+ *****************************************************************************
+ *                            Frontend XenBus Nodes
+ *****************************************************************************
+ *
+ *------------------------------ Feature request -----------------------------
+ *
+ * Capable frontend requests features from backend via setting corresponding
+ * entries to 1 in XenStore. Requests for features not advertised as supported
+ * by the backend have no effect.
+ *
+ * request-abs-pointer
+ *      Values:         <uint>
+ *
+ *      Request backend to report absolute pointer coordinates
+ *      (XENKBD_TYPE_POS) instead of relative ones (XENKBD_TYPE_MOTION).
+ *
+ * request-multi-touch
+ *      Values:         <uint>
+ *
+ *      Request backend to report multi-touch events.
+ *
+ * request-raw-pointer
+ *      Values:         <uint>
+ *
+ *      Request backend to report raw unscaled absolute pointer coordinates.
+ *      This option is only valid if request-abs-pointer is also set.
+ *      Raw unscaled coordinates have the range [0, 0x7fff]
+ *
+ *----------------------- Request Transport Parameters -----------------------
+ *
+ * event-channel
+ *      Values:         <uint>
+ *
+ *      The identifier of the Xen event channel used to signal activity
+ *      in the ring buffer.
+ *
+ * page-gref
+ *      Values:         <uint>
+ *
+ *      The Xen grant reference granting permission for the backend to map
+ *      a sole page in a single page sized event ring buffer.
+ *
+ * page-ref
+ *      Values:         <uint>
+ *
+ *      OBSOLETE, not recommended for use.
+ *      PFN of the shared page.
+ */
+
+/*
+ * EVENT CODES.
+ */
+
+#define XENKBD_TYPE_MOTION             1
+#define XENKBD_TYPE_RESERVED           2
+#define XENKBD_TYPE_KEY                3
+#define XENKBD_TYPE_POS                4
+#define XENKBD_TYPE_MTOUCH             5
+
+/* Multi-touch event sub-codes */
+
+#define XENKBD_MT_EV_DOWN              0
+#define XENKBD_MT_EV_UP                1
+#define XENKBD_MT_EV_MOTION            2
+#define XENKBD_MT_EV_SYN               3
+#define XENKBD_MT_EV_SHAPE             4
+#define XENKBD_MT_EV_ORIENT            5
+
+/*
+ * CONSTANTS, XENSTORE FIELD AND PATH NAME STRINGS, HELPERS.
+ */
+
+#define XENKBD_DRIVER_NAME             "vkbd"
+
+#define XENKBD_FIELD_FEAT_DSBL_KEYBRD  "feature-disable-keyboard"
+#define XENKBD_FIELD_FEAT_DSBL_POINTER "feature-disable-pointer"
+#define XENKBD_FIELD_FEAT_ABS_POINTER  "feature-abs-pointer"
+#define XENKBD_FIELD_FEAT_RAW_POINTER  "feature-raw-pointer"
+#define XENKBD_FIELD_FEAT_MTOUCH       "feature-multi-touch"
+#define XENKBD_FIELD_REQ_ABS_POINTER   "request-abs-pointer"
+#define XENKBD_FIELD_REQ_RAW_POINTER   "request-raw-pointer"
+#define XENKBD_FIELD_REQ_MTOUCH        "request-multi-touch"
+#define XENKBD_FIELD_RING_GREF         "page-gref"
+#define XENKBD_FIELD_EVT_CHANNEL       "event-channel"
+#define XENKBD_FIELD_WIDTH             "width"
+#define XENKBD_FIELD_HEIGHT            "height"
+#define XENKBD_FIELD_MT_WIDTH          "multi-touch-width"
+#define XENKBD_FIELD_MT_HEIGHT         "multi-touch-height"
+#define XENKBD_FIELD_MT_NUM_CONTACTS   "multi-touch-num-contacts"
+#define XENKBD_FIELD_UNIQUE_ID         "unique-id"
+
+/* OBSOLETE, not recommended for use */
+#define XENKBD_FIELD_RING_REF          "page-ref"
+
+/*
+ *****************************************************************************
+ * Description of the protocol between frontend and backend driver.
+ *****************************************************************************
+ *
+ * The two halves of a Para-virtual driver communicate with
+ * each other using a shared page and an event channel.
+ * Shared page contains a ring with event structures.
+ *
+ * All reserved fields in the structures below must be 0.
+ *
+ *****************************************************************************
+ *                           Backend to frontend events
+ *****************************************************************************
+ *
+ * Frontends should ignore unknown in events.
+ * All event packets have the same length (40 octets)
+ * All event packets have common header:
+ *
+ *          0         octet
+ * +-----------------+
+ * |       type      |
+ * +-----------------+
+ * type - uint8_t, event code, XENKBD_TYPE_???
+ *
+ *
+ * Pointer relative movement event
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |  _TYPE_MOTION  |                     reserved                     | 4
+ * +----------------+----------------+----------------+----------------+
+ * |                               rel_x                               | 8
+ * +----------------+----------------+----------------+----------------+
+ * |                               rel_y                               | 12
+ * +----------------+----------------+----------------+----------------+
+ * |                               rel_z                               | 16
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 20
+ * +----------------+----------------+----------------+----------------+
+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 40
+ * +----------------+----------------+----------------+----------------+
+ *
+ * rel_x - int32_t, relative X motion
+ * rel_y - int32_t, relative Y motion
+ * rel_z - int32_t, relative Z motion (wheel)
+ */
+
+struct xenkbd_motion
+{
+    uint8_t type;
+    int32_t rel_x;
+    int32_t rel_y;
+    int32_t rel_z;
+};
+
+/*
+ * Key event (includes pointer buttons)
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |  _TYPE_KEY     |     pressed    |            reserved             | 4
+ * +----------------+----------------+----------------+----------------+
+ * |                              keycode                              | 8
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 12
+ * +----------------+----------------+----------------+----------------+
+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 40
+ * +----------------+----------------+----------------+----------------+
+ *
+ * pressed - uint8_t, 1 if pressed; 0 otherwise
+ * keycode - uint32_t, KEY_* from linux/input.h
+ */
+
+struct xenkbd_key
+{
+    uint8_t type;
+    uint8_t pressed;
+    uint32_t keycode;
+};
+
+/*
+ * Pointer absolute position event
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |  _TYPE_POS     |                     reserved                     | 4
+ * +----------------+----------------+----------------+----------------+
+ * |                               abs_x                               | 8
+ * +----------------+----------------+----------------+----------------+
+ * |                               abs_y                               | 12
+ * +----------------+----------------+----------------+----------------+
+ * |                               rel_z                               | 16
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 20
+ * +----------------+----------------+----------------+----------------+
+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 40
+ * +----------------+----------------+----------------+----------------+
+ *
+ * abs_x - int32_t, absolute X position (in FB pixels)
+ * abs_y - int32_t, absolute Y position (in FB pixels)
+ * rel_z - int32_t, relative Z motion (wheel)
+ */
+
+struct xenkbd_position
+{
+    uint8_t type;
+    int32_t abs_x;
+    int32_t abs_y;
+    int32_t rel_z;
+};
+
+/*
+ * Multi-touch event and its sub-types
+ *
+ * All multi-touch event packets have common header:
+ *
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |  _TYPE_MTOUCH  |   event_type   |   contact_id   |    reserved    | 4
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 8
+ * +----------------+----------------+----------------+----------------+
+ *
+ * event_type - unt8_t, multi-touch event sub-type, XENKBD_MT_EV_???
+ * contact_id - unt8_t, ID of the contact
+ *
+ * Touch interactions can consist of one or more contacts.
+ * For each contact, a series of events is generated, starting
+ * with a down event, followed by zero or more motion events,
+ * and ending with an up event. Events relating to the same
+ * contact point can be identified by the ID of the sequence: contact ID.
+ * Contact ID may be reused after XENKBD_MT_EV_UP event and
+ * is in the [0; XENKBD_FIELD_NUM_CONTACTS - 1] range.
+ *
+ * For further information please refer to documentation on Wayland [1],
+ * Linux [2] and Windows [3] multi-touch support.
+ *
+ * [1] https://cgit.freedesktop.org/wayland/wayland/tree/protocol/wayland.xml
+ * [2] https://www.kernel.org/doc/Documentation/input/multi-touch-protocol.txt
+ * [3] https://msdn.microsoft.com/en-us/library/jj151564(v=vs.85).aspx
+ *
+ *
+ * Multi-touch down event - sent when a new touch is made: touch is assigned
+ * a unique contact ID, sent with this and consequent events related
+ * to this touch.
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |  _TYPE_MTOUCH  |   _MT_EV_DOWN  |   contact_id   |    reserved    | 4
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 8
+ * +----------------+----------------+----------------+----------------+
+ * |                               abs_x                               | 12
+ * +----------------+----------------+----------------+----------------+
+ * |                               abs_y                               | 16
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 20
+ * +----------------+----------------+----------------+----------------+
+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 40
+ * +----------------+----------------+----------------+----------------+
+ *
+ * abs_x - int32_t, absolute X position, in pixels
+ * abs_y - int32_t, absolute Y position, in pixels
+ *
+ * Multi-touch contact release event
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |  _TYPE_MTOUCH  |  _MT_EV_UP     |   contact_id   |    reserved    | 4
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 8
+ * +----------------+----------------+----------------+----------------+
+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 40
+ * +----------------+----------------+----------------+----------------+
+ *
+ * Multi-touch motion event
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |  _TYPE_MTOUCH  |  _MT_EV_MOTION |   contact_id   |    reserved    | 4
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 8
+ * +----------------+----------------+----------------+----------------+
+ * |                               abs_x                               | 12
+ * +----------------+----------------+----------------+----------------+
+ * |                               abs_y                               | 16
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 20
+ * +----------------+----------------+----------------+----------------+
+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 40
+ * +----------------+----------------+----------------+----------------+
+ *
+ * abs_x - int32_t, absolute X position, in pixels,
+ * abs_y - int32_t, absolute Y position, in pixels,
+ *
+ * Multi-touch input synchronization event - shows end of a set of events
+ * which logically belong together.
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |  _TYPE_MTOUCH  |  _MT_EV_SYN    |   contact_id   |    reserved    | 4
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 8
+ * +----------------+----------------+----------------+----------------+
+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 40
+ * +----------------+----------------+----------------+----------------+
+ *
+ * Multi-touch shape event - touch point's shape has changed its shape.
+ * Shape is approximated by an ellipse through the major and minor axis
+ * lengths: major is the longer diameter of the ellipse and minor is the
+ * shorter one. Center of the ellipse is reported via
+ * XENKBD_MT_EV_DOWN/XENKBD_MT_EV_MOTION events.
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |  _TYPE_MTOUCH  |  _MT_EV_SHAPE  |   contact_id   |    reserved    | 4
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 8
+ * +----------------+----------------+----------------+----------------+
+ * |                               major                               | 12
+ * +----------------+----------------+----------------+----------------+
+ * |                               minor                               | 16
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 20
+ * +----------------+----------------+----------------+----------------+
+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 40
+ * +----------------+----------------+----------------+----------------+
+ *
+ * major - unt32_t, length of the major axis, pixels
+ * minor - unt32_t, length of the minor axis, pixels
+ *
+ * Multi-touch orientation event - touch point's shape has changed
+ * its orientation: calculated as a clockwise angle between the major axis
+ * of the ellipse and positive Y axis in degrees, [-180; +180].
+ *         0                1                 2               3        octet
+ * +----------------+----------------+----------------+----------------+
+ * |  _TYPE_MTOUCH  |  _MT_EV_ORIENT |   contact_id   |    reserved    | 4
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 8
+ * +----------------+----------------+----------------+----------------+
+ * |           orientation           |            reserved             | 12
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 16
+ * +----------------+----------------+----------------+----------------+
+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
+ * +----------------+----------------+----------------+----------------+
+ * |                             reserved                              | 40
+ * +----------------+----------------+----------------+----------------+
+ *
+ * orientation - int16_t, clockwise angle of the major axis
+ */
+
+struct xenkbd_mtouch {
+    uint8_t type;            /* XENKBD_TYPE_MTOUCH */
+    uint8_t event_type;      /* XENKBD_MT_EV_??? */
+    uint8_t contact_id;
+    uint8_t reserved[5];     /* reserved for the future use */
+    union {
+        struct {
+            int32_t abs_x;   /* absolute X position, pixels */
+            int32_t abs_y;   /* absolute Y position, pixels */
+        } pos;
+        struct {
+            uint32_t major;  /* length of the major axis, pixels */
+            uint32_t minor;  /* length of the minor axis, pixels */
+        } shape;
+        int16_t orientation; /* clockwise angle of the major axis */
+    } u;
+};
+
+#define XENKBD_IN_EVENT_SIZE 40
+
+union xenkbd_in_event
+{
+    uint8_t type;
+    struct xenkbd_motion motion;
+    struct xenkbd_key key;
+    struct xenkbd_position pos;
+    struct xenkbd_mtouch mtouch;
+    char pad[XENKBD_IN_EVENT_SIZE];
+};
+
+/*
+ *****************************************************************************
+ *                            Frontend to backend events
+ *****************************************************************************
+ *
+ * Out events may be sent only when requested by backend, and receipt
+ * of an unknown out event is an error.
+ * No out events currently defined.
+
+ * All event packets have the same length (40 octets)
+ * All event packets have common header:
+ *          0         octet
+ * +-----------------+
+ * |       type      |
+ * +-----------------+
+ * type - uint8_t, event code
+ */
+
+#define XENKBD_OUT_EVENT_SIZE 40
+
+union xenkbd_out_event
+{
+    uint8_t type;
+    char pad[XENKBD_OUT_EVENT_SIZE];
+};
+
+/*
+ *****************************************************************************
+ *                            Shared page
+ *****************************************************************************
+ */
+
+#define XENKBD_IN_RING_SIZE 2048
+#define XENKBD_IN_RING_LEN (XENKBD_IN_RING_SIZE / XENKBD_IN_EVENT_SIZE)
+#define XENKBD_IN_RING_OFFS 1024
+#define XENKBD_IN_RING(page) \
+    ((union xenkbd_in_event *)((char *)(page) + XENKBD_IN_RING_OFFS))
+#define XENKBD_IN_RING_REF(page, idx) \
+    (XENKBD_IN_RING((page))[(idx) % XENKBD_IN_RING_LEN])
+
+#define XENKBD_OUT_RING_SIZE 1024
+#define XENKBD_OUT_RING_LEN (XENKBD_OUT_RING_SIZE / XENKBD_OUT_EVENT_SIZE)
+#define XENKBD_OUT_RING_OFFS (XENKBD_IN_RING_OFFS + XENKBD_IN_RING_SIZE)
+#define XENKBD_OUT_RING(page) \
+    ((union xenkbd_out_event *)((char *)(page) + XENKBD_OUT_RING_OFFS))
+#define XENKBD_OUT_RING_REF(page, idx) \
+    (XENKBD_OUT_RING((page))[(idx) % XENKBD_OUT_RING_LEN])
+
+struct xenkbd_page
+{
+    uint32_t in_cons, in_prod;
+    uint32_t out_cons, out_prod;
+};
+
+#endif /* __XEN_PUBLIC_IO_KBDIF_H__ */
diff --git a/include/hw/xen/interface/io/netif.h b/include/hw/xen/interface/io/netif.h
new file mode 100644
index 0000000000..48fa530950
--- /dev/null
+++ b/include/hw/xen/interface/io/netif.h
@@ -0,0 +1,1010 @@
+/******************************************************************************
+ * netif.h
+ *
+ * Unified network-device I/O interface for Xen guest OSes.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2003-2004, Keir Fraser
+ */
+
+#ifndef __XEN_PUBLIC_IO_NETIF_H__
+#define __XEN_PUBLIC_IO_NETIF_H__
+
+#include "ring.h"
+#include "../grant_table.h"
+
+/*
+ * Older implementation of Xen network frontend / backend has an
+ * implicit dependency on the MAX_SKB_FRAGS as the maximum number of
+ * ring slots a skb can use. Netfront / netback may not work as
+ * expected when frontend and backend have different MAX_SKB_FRAGS.
+ *
+ * A better approach is to add mechanism for netfront / netback to
+ * negotiate this value. However we cannot fix all possible
+ * frontends, so we need to define a value which states the minimum
+ * slots backend must support.
+ *
+ * The minimum value derives from older Linux kernel's MAX_SKB_FRAGS
+ * (18), which is proved to work with most frontends. Any new backend
+ * which doesn't negotiate with frontend should expect frontend to
+ * send a valid packet using slots up to this value.
+ */
+#define XEN_NETIF_NR_SLOTS_MIN 18
+
+/*
+ * Notifications after enqueuing any type of message should be conditional on
+ * the appropriate req_event or rsp_event field in the shared ring.
+ * If the client sends notification for rx requests then it should specify
+ * feature 'feature-rx-notify' via xenbus. Otherwise the backend will assume
+ * that it cannot safely queue packets (as it may not be kicked to send them).
+ */
+
+/*
+ * "feature-split-event-channels" is introduced to separate guest TX
+ * and RX notification. Backend either doesn't support this feature or
+ * advertises it via xenstore as 0 (disabled) or 1 (enabled).
+ *
+ * To make use of this feature, frontend should allocate two event
+ * channels for TX and RX, advertise them to backend as
+ * "event-channel-tx" and "event-channel-rx" respectively. If frontend
+ * doesn't want to use this feature, it just writes "event-channel"
+ * node as before.
+ */
+
+/*
+ * Multiple transmit and receive queues:
+ * If supported, the backend will write the key "multi-queue-max-queues" to
+ * the directory for that vif, and set its value to the maximum supported
+ * number of queues.
+ * Frontends that are aware of this feature and wish to use it can write the
+ * key "multi-queue-num-queues", set to the number they wish to use, which
+ * must be greater than zero, and no more than the value reported by the backend
+ * in "multi-queue-max-queues".
+ *
+ * Queues replicate the shared rings and event channels.
+ * "feature-split-event-channels" may optionally be used when using
+ * multiple queues, but is not mandatory.
+ *
+ * Each queue consists of one shared ring pair, i.e. there must be the same
+ * number of tx and rx rings.
+ *
+ * For frontends requesting just one queue, the usual event-channel and
+ * ring-ref keys are written as before, simplifying the backend processing
+ * to avoid distinguishing between a frontend that doesn't understand the
+ * multi-queue feature, and one that does, but requested only one queue.
+ *
+ * Frontends requesting two or more queues must not write the toplevel
+ * event-channel (or event-channel-{tx,rx}) and {tx,rx}-ring-ref keys,
+ * instead writing those keys under sub-keys having the name "queue-N" where
+ * N is the integer ID of the queue for which those keys belong. Queues
+ * are indexed from zero. For example, a frontend with two queues and split
+ * event channels must write the following set of queue-related keys:
+ *
+ * /local/domain/1/device/vif/0/multi-queue-num-queues = "2"
+ * /local/domain/1/device/vif/0/queue-0 = ""
+ * /local/domain/1/device/vif/0/queue-0/tx-ring-ref = "<ring-ref-tx0>"
+ * /local/domain/1/device/vif/0/queue-0/rx-ring-ref = "<ring-ref-rx0>"
+ * /local/domain/1/device/vif/0/queue-0/event-channel-tx = "<evtchn-tx0>"
+ * /local/domain/1/device/vif/0/queue-0/event-channel-rx = "<evtchn-rx0>"
+ * /local/domain/1/device/vif/0/queue-1 = ""
+ * /local/domain/1/device/vif/0/queue-1/tx-ring-ref = "<ring-ref-tx1>"
+ * /local/domain/1/device/vif/0/queue-1/rx-ring-ref = "<ring-ref-rx1"
+ * /local/domain/1/device/vif/0/queue-1/event-channel-tx = "<evtchn-tx1>"
+ * /local/domain/1/device/vif/0/queue-1/event-channel-rx = "<evtchn-rx1>"
+ *
+ * If there is any inconsistency in the XenStore data, the backend may
+ * choose not to connect any queues, instead treating the request as an
+ * error. This includes scenarios where more (or fewer) queues were
+ * requested than the frontend provided details for.
+ *
+ * Mapping of packets to queues is considered to be a function of the
+ * transmitting system (backend or frontend) and is not negotiated
+ * between the two. Guests are free to transmit packets on any queue
+ * they choose, provided it has been set up correctly. Guests must be
+ * prepared to receive packets on any queue they have requested be set up.
+ */
+
+/*
+ * "feature-no-csum-offload" should be used to turn IPv4 TCP/UDP checksum
+ * offload off or on. If it is missing then the feature is assumed to be on.
+ * "feature-ipv6-csum-offload" should be used to turn IPv6 TCP/UDP checksum
+ * offload on or off. If it is missing then the feature is assumed to be off.
+ */
+
+/*
+ * "feature-gso-tcpv4" and "feature-gso-tcpv6" advertise the capability to
+ * handle large TCP packets (in IPv4 or IPv6 form respectively). Neither
+ * frontends nor backends are assumed to be capable unless the flags are
+ * present.
+ */
+
+/*
+ * "feature-multicast-control" and "feature-dynamic-multicast-control"
+ * advertise the capability to filter ethernet multicast packets in the
+ * backend. If the frontend wishes to take advantage of this feature then
+ * it may set "request-multicast-control". If the backend only advertises
+ * "feature-multicast-control" then "request-multicast-control" must be set
+ * before the frontend moves into the connected state. The backend will
+ * sample the value on this state transition and any subsequent change in
+ * value will have no effect. However, if the backend also advertises
+ * "feature-dynamic-multicast-control" then "request-multicast-control"
+ * may be set by the frontend at any time. In this case, the backend will
+ * watch the value and re-sample on watch events.
+ *
+ * If the sampled value of "request-multicast-control" is set then the
+ * backend transmit side should no longer flood multicast packets to the
+ * frontend, it should instead drop any multicast packet that does not
+ * match in a filter list.
+ * The list is amended by the frontend by sending dummy transmit requests
+ * containing XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL} extra-info fragments as
+ * specified below.
+ * Note that the filter list may be amended even if the sampled value of
+ * "request-multicast-control" is not set, however the filter should only
+ * be applied if it is set.
+ */
+
+/*
+ * Control ring
+ * ============
+ *
+ * Some features, such as hashing (detailed below), require a
+ * significant amount of out-of-band data to be passed from frontend to
+ * backend. Use of xenstore is not suitable for large quantities of data
+ * because of quota limitations and so a dedicated 'control ring' is used.
+ * The ability of the backend to use a control ring is advertised by
+ * setting:
+ *
+ * /local/domain/X/backend/<domid>/<vif>/feature-ctrl-ring = "1"
+ *
+ * The frontend provides a control ring to the backend by setting:
+ *
+ * /local/domain/<domid>/device/vif/<vif>/ctrl-ring-ref = <gref>
+ * /local/domain/<domid>/device/vif/<vif>/event-channel-ctrl = <port>
+ *
+ * where <gref> is the grant reference of the shared page used to
+ * implement the control ring and <port> is an event channel to be used
+ * as a mailbox interrupt. These keys must be set before the frontend
+ * moves into the connected state.
+ *
+ * The control ring uses a fixed request/response message size and is
+ * balanced (i.e. one request to one response), so operationally it is much
+ * the same as a transmit or receive ring.
+ * Note that there is no requirement that responses are issued in the same
+ * order as requests.
+ */
+
+/*
+ * Hash types
+ * ==========
+ *
+ * For the purposes of the definitions below, 'Packet[]' is an array of
+ * octets containing an IP packet without options, 'Array[X..Y]' means a
+ * sub-array of 'Array' containing bytes X thru Y inclusive, and '+' is
+ * used to indicate concatenation of arrays.
+ */
+
+/*
+ * A hash calculated over an IP version 4 header as follows:
+ *
+ * Buffer[0..8] = Packet[12..15] (source address) +
+ *                Packet[16..19] (destination address)
+ *
+ * Result = Hash(Buffer, 8)
+ */
+#define _XEN_NETIF_CTRL_HASH_TYPE_IPV4 0
+#define XEN_NETIF_CTRL_HASH_TYPE_IPV4 \
+    (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV4)
+
+/*
+ * A hash calculated over an IP version 4 header and TCP header as
+ * follows:
+ *
+ * Buffer[0..12] = Packet[12..15] (source address) +
+ *                 Packet[16..19] (destination address) +
+ *                 Packet[20..21] (source port) +
+ *                 Packet[22..23] (destination port)
+ *
+ * Result = Hash(Buffer, 12)
+ */
+#define _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP 1
+#define XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP \
+    (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP)
+
+/*
+ * A hash calculated over an IP version 6 header as follows:
+ *
+ * Buffer[0..32] = Packet[8..23]  (source address ) +
+ *                 Packet[24..39] (destination address)
+ *
+ * Result = Hash(Buffer, 32)
+ */
+#define _XEN_NETIF_CTRL_HASH_TYPE_IPV6 2
+#define XEN_NETIF_CTRL_HASH_TYPE_IPV6 \
+    (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV6)
+
+/*
+ * A hash calculated over an IP version 6 header and TCP header as
+ * follows:
+ *
+ * Buffer[0..36] = Packet[8..23]  (source address) +
+ *                 Packet[24..39] (destination address) +
+ *                 Packet[40..41] (source port) +
+ *                 Packet[42..43] (destination port)
+ *
+ * Result = Hash(Buffer, 36)
+ */
+#define _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP 3
+#define XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP \
+    (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP)
+
+/*
+ * Hash algorithms
+ * ===============
+ */
+
+#define XEN_NETIF_CTRL_HASH_ALGORITHM_NONE 0
+
+/*
+ * Toeplitz hash:
+ */
+
+#define XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ 1
+
+/*
+ * Control requests (struct xen_netif_ctrl_request)
+ * ================================================
+ *
+ * All requests have the following format:
+ *
+ *    0     1     2     3     4     5     6     7  octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * |    id     |   type    |         data[0]       |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * |         data[1]       |         data[2]       |
+ * +-----+-----+-----+-----+-----------------------+
+ *
+ * id: the request identifier, echoed in response.
+ * type: the type of request (see below)
+ * data[]: any data associated with the request (determined by type)
+ */
+
+struct xen_netif_ctrl_request {
+    uint16_t id;
+    uint16_t type;
+
+#define XEN_NETIF_CTRL_TYPE_INVALID               0
+#define XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS        1
+#define XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS        2
+#define XEN_NETIF_CTRL_TYPE_SET_HASH_KEY          3
+#define XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE 4
+#define XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE 5
+#define XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING      6
+#define XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM    7
+#define XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE 8
+#define XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING      9
+#define XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING     10
+
+    uint32_t data[3];
+};
+
+/*
+ * Control responses (struct xen_netif_ctrl_response)
+ * ==================================================
+ *
+ * All responses have the following format:
+ *
+ *    0     1     2     3     4     5     6     7  octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * |    id     |   type    |         status        |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * |         data          |
+ * +-----+-----+-----+-----+
+ *
+ * id: the corresponding request identifier
+ * type: the type of the corresponding request
+ * status: the status of request processing
+ * data: any data associated with the response (determined by type and
+ *       status)
+ */
+
+struct xen_netif_ctrl_response {
+    uint16_t id;
+    uint16_t type;
+    uint32_t status;
+
+#define XEN_NETIF_CTRL_STATUS_SUCCESS           0
+#define XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     1
+#define XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER 2
+#define XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW   3
+
+    uint32_t data;
+};
+
+/*
+ * Static Grants (struct xen_netif_gref)
+ * =====================================
+ *
+ * A frontend may provide a fixed set of grant references to be mapped on
+ * the backend. The message of type XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING
+ * prior its usage in the command ring allows for creation of these mappings.
+ * The backend will maintain a fixed amount of these mappings.
+ *
+ * XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE lets a frontend query how many
+ * of these mappings can be kept.
+ *
+ * Each entry in the XEN_NETIF_CTRL_TYPE_{ADD,DEL}_GREF_MAPPING input table has
+ * the following format:
+ *
+ *    0     1     2     3     4     5     6     7  octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | grant ref             |  flags    |  status   |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ *
+ * grant ref: grant reference (IN)
+ * flags: flags describing the control operation (IN)
+ * status: XEN_NETIF_CTRL_STATUS_* (OUT)
+ *
+ * 'status' is an output parameter which does not require to be set to zero
+ * prior to its usage in the corresponding control messages.
+ */
+
+struct xen_netif_gref {
+       grant_ref_t ref;
+       uint16_t flags;
+
+#define _XEN_NETIF_CTRLF_GREF_readonly    0
+#define XEN_NETIF_CTRLF_GREF_readonly    (1U<<_XEN_NETIF_CTRLF_GREF_readonly)
+
+       uint16_t status;
+};
+
+/*
+ * Control messages
+ * ================
+ *
+ * XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM
+ * --------------------------------------
+ *
+ * This is sent by the frontend to set the desired hash algorithm.
+ *
+ * Request:
+ *
+ *  type    = XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM
+ *  data[0] = a XEN_NETIF_CTRL_HASH_ALGORITHM_* value
+ *  data[1] = 0
+ *  data[2] = 0
+ *
+ * Response:
+ *
+ *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
+ *                                                     supported
+ *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - The algorithm is not
+ *                                                     supported
+ *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
+ *
+ * NOTE: Setting data[0] to XEN_NETIF_CTRL_HASH_ALGORITHM_NONE disables
+ *       hashing and the backend is free to choose how it steers packets
+ *       to queues (which is the default behaviour).
+ *
+ * XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS
+ * ----------------------------------
+ *
+ * This is sent by the frontend to query the types of hash supported by
+ * the backend.
+ *
+ * Request:
+ *
+ *  type    = XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS
+ *  data[0] = 0
+ *  data[1] = 0
+ *  data[2] = 0
+ *
+ * Response:
+ *
+ *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not supported
+ *           XEN_NETIF_CTRL_STATUS_SUCCESS       - Operation successful
+ *  data   = supported hash types (if operation was successful)
+ *
+ * NOTE: A valid hash algorithm must be selected before this operation can
+ *       succeed.
+ *
+ * XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS
+ * ----------------------------------
+ *
+ * This is sent by the frontend to set the types of hash that the backend
+ * should calculate. (See above for hash type definitions).
+ * Note that the 'maximal' type of hash should always be chosen. For
+ * example, if the frontend sets both IPV4 and IPV4_TCP hash types then
+ * the latter hash type should be calculated for any TCP packet and the
+ * former only calculated for non-TCP packets.
+ *
+ * Request:
+ *
+ *  type    = XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS
+ *  data[0] = bitwise OR of XEN_NETIF_CTRL_HASH_TYPE_* values
+ *  data[1] = 0
+ *  data[2] = 0
+ *
+ * Response:
+ *
+ *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
+ *                                                     supported
+ *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - One or more flag
+ *                                                     value is invalid or
+ *                                                     unsupported
+ *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
+ *  data   = 0
+ *
+ * NOTE: A valid hash algorithm must be selected before this operation can
+ *       succeed.
+ *       Also, setting data[0] to zero disables hashing and the backend
+ *       is free to choose how it steers packets to queues.
+ *
+ * XEN_NETIF_CTRL_TYPE_SET_HASH_KEY
+ * --------------------------------
+ *
+ * This is sent by the frontend to set the key of the hash if the algorithm
+ * requires it. (See hash algorithms above).
+ *
+ * Request:
+ *
+ *  type    = XEN_NETIF_CTRL_TYPE_SET_HASH_KEY
+ *  data[0] = grant reference of page containing the key (assumed to
+ *            start at beginning of grant)
+ *  data[1] = size of key in octets
+ *  data[2] = 0
+ *
+ * Response:
+ *
+ *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
+ *                                                     supported
+ *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Key size is invalid
+ *           XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW   - Key size is larger
+ *                                                     than the backend
+ *                                                     supports
+ *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
+ *  data   = 0
+ *
+ * NOTE: Any key octets not specified are assumed to be zero (the key
+ *       is assumed to be empty by default) and specifying a new key
+ *       invalidates any previous key, hence specifying a key size of
+ *       zero will clear the key (which ensures that the calculated hash
+ *       will always be zero).
+ *       The maximum size of key is algorithm and backend specific, but
+ *       is also limited by the single grant reference.
+ *       The grant reference may be read-only and must remain valid until
+ *       the response has been processed.
+ *
+ * XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE
+ * -----------------------------------------
+ *
+ * This is sent by the frontend to query the maximum size of mapping
+ * table supported by the backend. The size is specified in terms of
+ * table entries.
+ *
+ * Request:
+ *
+ *  type    = XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE
+ *  data[0] = 0
+ *  data[1] = 0
+ *  data[2] = 0
+ *
+ * Response:
+ *
+ *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not supported
+ *           XEN_NETIF_CTRL_STATUS_SUCCESS       - Operation successful
+ *  data   = maximum number of entries allowed in the mapping table
+ *           (if operation was successful) or zero if a mapping table is
+ *           not supported (i.e. hash mapping is done only by modular
+ *           arithmetic).
+ *
+ * XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE
+ * -------------------------------------
+ *
+ * This is sent by the frontend to set the actual size of the mapping
+ * table to be used by the backend. The size is specified in terms of
+ * table entries.
+ * Any previous table is invalidated by this message and any new table
+ * is assumed to be zero filled.
+ *
+ * Request:
+ *
+ *  type    = XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE
+ *  data[0] = number of entries in mapping table
+ *  data[1] = 0
+ *  data[2] = 0
+ *
+ * Response:
+ *
+ *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
+ *                                                     supported
+ *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Table size is invalid
+ *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
+ *  data   = 0
+ *
+ * NOTE: Setting data[0] to 0 means that hash mapping should be done
+ *       using modular arithmetic.
+ *
+ * XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING
+ * ------------------------------------
+ *
+ * This is sent by the frontend to set the content of the table mapping
+ * hash value to queue number. The backend should calculate the hash from
+ * the packet header, use it as an index into the table (modulo the size
+ * of the table) and then steer the packet to the queue number found at
+ * that index.
+ *
+ * Request:
+ *
+ *  type    = XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING
+ *  data[0] = grant reference of page containing the mapping (sub-)table
+ *            (assumed to start at beginning of grant)
+ *  data[1] = size of (sub-)table in entries
+ *  data[2] = offset, in entries, of sub-table within overall table
+ *
+ * Response:
+ *
+ *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
+ *                                                     supported
+ *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Table size or content
+ *                                                     is invalid
+ *           XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW   - Table size is larger
+ *                                                     than the backend
+ *                                                     supports
+ *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
+ *  data   = 0
+ *
+ * NOTE: The overall table has the following format:
+ *
+ *          0     1     2     3     4     5     6     7  octet
+ *       +-----+-----+-----+-----+-----+-----+-----+-----+
+ *       |       mapping[0]      |       mapping[1]      |
+ *       +-----+-----+-----+-----+-----+-----+-----+-----+
+ *       |                       .                       |
+ *       |                       .                       |
+ *       |                       .                       |
+ *       +-----+-----+-----+-----+-----+-----+-----+-----+
+ *       |      mapping[N-2]     |      mapping[N-1]     |
+ *       +-----+-----+-----+-----+-----+-----+-----+-----+
+ *
+ *       where N is specified by a XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE
+ *       message and each  mapping must specifies a queue between 0 and
+ *       "multi-queue-num-queues" (see above).
+ *       The backend may support a mapping table larger than can be
+ *       mapped by a single grant reference. Thus sub-tables within a
+ *       larger table can be individually set by sending multiple messages
+ *       with differing offset values. Specifying a new sub-table does not
+ *       invalidate any table data outside that range.
+ *       The grant reference may be read-only and must remain valid until
+ *       the response has been processed.
+ *
+ * XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE
+ * -----------------------------------------
+ *
+ * This is sent by the frontend to fetch the number of grefs that can be kept
+ * mapped in the backend.
+ *
+ * Request:
+ *
+ *  type    = XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE
+ *  data[0] = queue index (assumed 0 for single queue)
+ *  data[1] = 0
+ *  data[2] = 0
+ *
+ * Response:
+ *
+ *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
+ *                                                     supported
+ *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - The queue index is
+ *                                                     out of range
+ *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
+ *  data   = maximum number of entries allowed in the gref mapping table
+ *           (if operation was successful) or zero if it is not supported.
+ *
+ * XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING
+ * ------------------------------------
+ *
+ * This is sent by the frontend for backend to map a list of grant
+ * references.
+ *
+ * Request:
+ *
+ *  type    = XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING
+ *  data[0] = queue index
+ *  data[1] = grant reference of page containing the mapping list
+ *            (r/w and assumed to start at beginning of page)
+ *  data[2] = size of list in entries
+ *
+ * Response:
+ *
+ *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
+ *                                                     supported
+ *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Operation failed
+ *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
+ *
+ * NOTE: Each entry in the input table has the format outlined
+ *       in struct xen_netif_gref.
+ *       Contrary to XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING, the struct
+ *       xen_netif_gref 'status' field is not used and therefore the response
+ *       'status' determines the success of this operation. In case of
+ *       failure none of grants mappings get added in the backend.
+ *
+ * XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING
+ * ------------------------------------
+ *
+ * This is sent by the frontend for backend to unmap a list of grant
+ * references.
+ *
+ * Request:
+ *
+ *  type    = XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING
+ *  data[0] = queue index
+ *  data[1] = grant reference of page containing the mapping list
+ *            (r/w and assumed to start at beginning of page)
+ *  data[2] = size of list in entries
+ *
+ * Response:
+ *
+ *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
+ *                                                     supported
+ *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Operation failed
+ *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
+ *  data   = number of entries that were unmapped
+ *
+ * NOTE: Each entry in the input table has the format outlined in struct
+ *       xen_netif_gref.
+ *       The struct xen_netif_gref 'status' field determines if the entry
+ *       was successfully removed.
+ *       The entries used are only the ones representing grant references that
+ *       were previously the subject of a XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING
+ *       operation. Any other entries will have their status set to
+ *       XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER upon completion.
+ */
+
+DEFINE_RING_TYPES(xen_netif_ctrl,
+                  struct xen_netif_ctrl_request,
+                  struct xen_netif_ctrl_response);
+
+/*
+ * Guest transmit
+ * ==============
+ *
+ * This is the 'wire' format for transmit (frontend -> backend) packets:
+ *
+ *  Fragment 1: netif_tx_request_t  - flags = NETTXF_*
+ *                                    size = total packet size
+ * [Extra 1: netif_extra_info_t]    - (only if fragment 1 flags include
+ *                                     NETTXF_extra_info)
+ *  ...
+ * [Extra N: netif_extra_info_t]    - (only if extra N-1 flags include
+ *                                     XEN_NETIF_EXTRA_MORE)
+ *  ...
+ *  Fragment N: netif_tx_request_t  - (only if fragment N-1 flags include
+ *                                     NETTXF_more_data - flags on preceding
+ *                                     extras are not relevant here)
+ *                                    flags = 0
+ *                                    size = fragment size
+ *
+ * NOTE:
+ *
+ * This format slightly is different from that used for receive
+ * (backend -> frontend) packets. Specifically, in a multi-fragment
+ * packet the actual size of fragment 1 can only be determined by
+ * subtracting the sizes of fragments 2..N from the total packet size.
+ *
+ * Ring slot size is 12 octets, however not all request/response
+ * structs use the full size.
+ *
+ * tx request data (netif_tx_request_t)
+ * ------------------------------------
+ *
+ *    0     1     2     3     4     5     6     7  octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | grant ref             | offset    | flags     |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | id        | size      |
+ * +-----+-----+-----+-----+
+ *
+ * grant ref: Reference to buffer page.
+ * offset: Offset within buffer page.
+ * flags: NETTXF_*.
+ * id: request identifier, echoed in response.
+ * size: packet size in bytes.
+ *
+ * tx response (netif_tx_response_t)
+ * ---------------------------------
+ *
+ *    0     1     2     3     4     5     6     7  octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | id        | status    | unused                |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | unused                |
+ * +-----+-----+-----+-----+
+ *
+ * id: reflects id in transmit request
+ * status: NETIF_RSP_*
+ *
+ * Guest receive
+ * =============
+ *
+ * This is the 'wire' format for receive (backend -> frontend) packets:
+ *
+ *  Fragment 1: netif_rx_request_t  - flags = NETRXF_*
+ *                                    size = fragment size
+ * [Extra 1: netif_extra_info_t]    - (only if fragment 1 flags include
+ *                                     NETRXF_extra_info)
+ *  ...
+ * [Extra N: netif_extra_info_t]    - (only if extra N-1 flags include
+ *                                     XEN_NETIF_EXTRA_MORE)
+ *  ...
+ *  Fragment N: netif_rx_request_t  - (only if fragment N-1 flags include
+ *                                     NETRXF_more_data - flags on preceding
+ *                                     extras are not relevant here)
+ *                                    flags = 0
+ *                                    size = fragment size
+ *
+ * NOTE:
+ *
+ * This format slightly is different from that used for transmit
+ * (frontend -> backend) packets. Specifically, in a multi-fragment
+ * packet the size of the packet can only be determined by summing the
+ * sizes of fragments 1..N.
+ *
+ * Ring slot size is 8 octets.
+ *
+ * rx request (netif_rx_request_t)
+ * -------------------------------
+ *
+ *    0     1     2     3     4     5     6     7  octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | id        | pad       | gref                  |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ *
+ * id: request identifier, echoed in response.
+ * gref: reference to incoming granted frame.
+ *
+ * rx response (netif_rx_response_t)
+ * ---------------------------------
+ *
+ *    0     1     2     3     4     5     6     7  octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | id        | offset    | flags     | status    |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ *
+ * id: reflects id in receive request
+ * offset: offset in page of start of received packet
+ * flags: NETRXF_*
+ * status: -ve: NETIF_RSP_*; +ve: Rx'ed pkt size.
+ *
+ * NOTE: Historically, to support GSO on the frontend receive side, Linux
+ *       netfront does not make use of the rx response id (because, as
+ *       described below, extra info structures overlay the id field).
+ *       Instead it assumes that responses always appear in the same ring
+ *       slot as their corresponding request. Thus, to maintain
+ *       compatibility, backends must make sure this is the case.
+ *
+ * Extra Info
+ * ==========
+ *
+ * Can be present if initial request or response has NET{T,R}XF_extra_info,
+ * or previous extra request has XEN_NETIF_EXTRA_MORE.
+ *
+ * The struct therefore needs to fit into either a tx or rx slot and
+ * is therefore limited to 8 octets.
+ *
+ * NOTE: Because extra info data overlays the usual request/response
+ *       structures, there is no id information in the opposite direction.
+ *       So, if an extra info overlays an rx response the frontend can
+ *       assume that it is in the same ring slot as the request that was
+ *       consumed to make the slot available, and the backend must ensure
+ *       this assumption is true.
+ *
+ * extra info (netif_extra_info_t)
+ * -------------------------------
+ *
+ * General format:
+ *
+ *    0     1     2     3     4     5     6     7  octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * |type |flags| type specific data                |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | padding for tx        |
+ * +-----+-----+-----+-----+
+ *
+ * type: XEN_NETIF_EXTRA_TYPE_*
+ * flags: XEN_NETIF_EXTRA_FLAG_*
+ * padding for tx: present only in the tx case due to 8 octet limit
+ *                 from rx case. Not shown in type specific entries
+ *                 below.
+ *
+ * XEN_NETIF_EXTRA_TYPE_GSO:
+ *
+ *    0     1     2     3     4     5     6     7  octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * |type |flags| size      |type | pad | features  |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ *
+ * type: Must be XEN_NETIF_EXTRA_TYPE_GSO
+ * flags: XEN_NETIF_EXTRA_FLAG_*
+ * size: Maximum payload size of each segment. For example,
+ *       for TCP this is just the path MSS.
+ * type: XEN_NETIF_GSO_TYPE_*: This determines the protocol of
+ *       the packet and any extra features required to segment the
+ *       packet properly.
+ * features: EN_NETIF_GSO_FEAT_*: This specifies any extra GSO
+ *           features required to process this packet, such as ECN
+ *           support for TCPv4.
+ *
+ * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}:
+ *
+ *    0     1     2     3     4     5     6     7  octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * |type |flags| addr                              |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ *
+ * type: Must be XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}
+ * flags: XEN_NETIF_EXTRA_FLAG_*
+ * addr: address to add/remove
+ *
+ * XEN_NETIF_EXTRA_TYPE_HASH:
+ *
+ * A backend that supports teoplitz hashing is assumed to accept
+ * this type of extra info in transmit packets.
+ * A frontend that enables hashing is assumed to accept
+ * this type of extra info in receive packets.
+ *
+ *    0     1     2     3     4     5     6     7  octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * |type |flags|htype| alg |LSB ---- value ---- MSB|
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ *
+ * type: Must be XEN_NETIF_EXTRA_TYPE_HASH
+ * flags: XEN_NETIF_EXTRA_FLAG_*
+ * htype: Hash type (one of _XEN_NETIF_CTRL_HASH_TYPE_* - see above)
+ * alg: The algorithm used to calculate the hash (one of
+ *      XEN_NETIF_CTRL_HASH_TYPE_ALGORITHM_* - see above)
+ * value: Hash value
+ */
+
+/* Protocol checksum field is blank in the packet (hardware offload)? */
+#define _NETTXF_csum_blank     (0)
+#define  NETTXF_csum_blank     (1U<<_NETTXF_csum_blank)
+
+/* Packet data has been validated against protocol checksum. */
+#define _NETTXF_data_validated (1)
+#define  NETTXF_data_validated (1U<<_NETTXF_data_validated)
+
+/* Packet continues in the next request descriptor. */
+#define _NETTXF_more_data      (2)
+#define  NETTXF_more_data      (1U<<_NETTXF_more_data)
+
+/* Packet to be followed by extra descriptor(s). */
+#define _NETTXF_extra_info     (3)
+#define  NETTXF_extra_info     (1U<<_NETTXF_extra_info)
+
+#define XEN_NETIF_MAX_TX_SIZE 0xFFFF
+struct netif_tx_request {
+    grant_ref_t gref;
+    uint16_t offset;
+    uint16_t flags;
+    uint16_t id;
+    uint16_t size;
+};
+typedef struct netif_tx_request netif_tx_request_t;
+
+/* Types of netif_extra_info descriptors. */
+#define XEN_NETIF_EXTRA_TYPE_NONE      (0)  /* Never used - invalid */
+#define XEN_NETIF_EXTRA_TYPE_GSO       (1)  /* u.gso */
+#define XEN_NETIF_EXTRA_TYPE_MCAST_ADD (2)  /* u.mcast */
+#define XEN_NETIF_EXTRA_TYPE_MCAST_DEL (3)  /* u.mcast */
+#define XEN_NETIF_EXTRA_TYPE_HASH      (4)  /* u.hash */
+#define XEN_NETIF_EXTRA_TYPE_MAX       (5)
+
+/* netif_extra_info_t flags. */
+#define _XEN_NETIF_EXTRA_FLAG_MORE (0)
+#define XEN_NETIF_EXTRA_FLAG_MORE  (1U<<_XEN_NETIF_EXTRA_FLAG_MORE)
+
+/* GSO types */
+#define XEN_NETIF_GSO_TYPE_NONE         (0)
+#define XEN_NETIF_GSO_TYPE_TCPV4        (1)
+#define XEN_NETIF_GSO_TYPE_TCPV6        (2)
+
+/*
+ * This structure needs to fit within both netif_tx_request_t and
+ * netif_rx_response_t for compatibility.
+ */
+struct netif_extra_info {
+    uint8_t type;
+    uint8_t flags;
+    union {
+        struct {
+            uint16_t size;
+            uint8_t type;
+            uint8_t pad;
+            uint16_t features;
+        } gso;
+        struct {
+            uint8_t addr[6];
+        } mcast;
+        struct {
+            uint8_t type;
+            uint8_t algorithm;
+            uint8_t value[4];
+        } hash;
+        uint16_t pad[3];
+    } u;
+};
+typedef struct netif_extra_info netif_extra_info_t;
+
+struct netif_tx_response {
+    uint16_t id;
+    int16_t  status;
+};
+typedef struct netif_tx_response netif_tx_response_t;
+
+struct netif_rx_request {
+    uint16_t    id;        /* Echoed in response message.        */
+    uint16_t    pad;
+    grant_ref_t gref;
+};
+typedef struct netif_rx_request netif_rx_request_t;
+
+/* Packet data has been validated against protocol checksum. */
+#define _NETRXF_data_validated (0)
+#define  NETRXF_data_validated (1U<<_NETRXF_data_validated)
+
+/* Protocol checksum field is blank in the packet (hardware offload)? */
+#define _NETRXF_csum_blank     (1)
+#define  NETRXF_csum_blank     (1U<<_NETRXF_csum_blank)
+
+/* Packet continues in the next request descriptor. */
+#define _NETRXF_more_data      (2)
+#define  NETRXF_more_data      (1U<<_NETRXF_more_data)
+
+/* Packet to be followed by extra descriptor(s). */
+#define _NETRXF_extra_info     (3)
+#define  NETRXF_extra_info     (1U<<_NETRXF_extra_info)
+
+/* Packet has GSO prefix. Deprecated but included for compatibility */
+#define _NETRXF_gso_prefix     (4)
+#define  NETRXF_gso_prefix     (1U<<_NETRXF_gso_prefix)
+
+struct netif_rx_response {
+    uint16_t id;
+    uint16_t offset;
+    uint16_t flags;
+    int16_t  status;
+};
+typedef struct netif_rx_response netif_rx_response_t;
+
+/*
+ * Generate netif ring structures and types.
+ */
+
+DEFINE_RING_TYPES(netif_tx, struct netif_tx_request, struct netif_tx_response);
+DEFINE_RING_TYPES(netif_rx, struct netif_rx_request, struct netif_rx_response);
+
+#define NETIF_RSP_DROPPED         -2
+#define NETIF_RSP_ERROR           -1
+#define NETIF_RSP_OKAY             0
+/* No response: used for auxiliary requests (e.g., netif_extra_info_t). */
+#define NETIF_RSP_NULL             1
+
+#endif
diff --git a/include/hw/xen/interface/io/protocols.h b/include/hw/xen/interface/io/protocols.h
new file mode 100644
index 0000000000..52b4de0f81
--- /dev/null
+++ b/include/hw/xen/interface/io/protocols.h
@@ -0,0 +1,42 @@
+/******************************************************************************
+ * protocols.h
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2008, Keir Fraser
+ */
+
+#ifndef __XEN_PROTOCOLS_H__
+#define __XEN_PROTOCOLS_H__
+
+#define XEN_IO_PROTO_ABI_X86_32     "x86_32-abi"
+#define XEN_IO_PROTO_ABI_X86_64     "x86_64-abi"
+#define XEN_IO_PROTO_ABI_ARM        "arm-abi"
+
+#if defined(__i386__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32
+#elif defined(__x86_64__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64
+#elif defined(__arm__) || defined(__aarch64__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_ARM
+#else
+# error arch fixup needed here
+#endif
+
+#endif
diff --git a/include/hw/xen/io/ring.h b/include/hw/xen/interface/io/ring.h
index 62abfd7a6e..1adacf09f9 100644
--- a/include/hw/xen/io/ring.h
+++ b/include/hw/xen/interface/io/ring.h
@@ -24,8 +24,8 @@
  * Tim Deegan and Andrew Warfield November 2004.
  */
 
-#ifndef XEN_PUBLIC_IO_RING_H
-#define XEN_PUBLIC_IO_RING_H
+#ifndef __XEN_PUBLIC_IO_RING_H__
+#define __XEN_PUBLIC_IO_RING_H__
 
 /*
  * When #include'ing this header, you need to provide the following
@@ -469,7 +469,7 @@ struct name##_data_intf {                                                     \
 };                                                                            \
 DEFINE_XEN_FLEX_RING(name)
 
-#endif /* XEN_PUBLIC_IO_RING_H */
+#endif /* __XEN_PUBLIC_IO_RING_H__ */
 
 /*
  * Local variables:
diff --git a/include/hw/xen/interface/io/usbif.h b/include/hw/xen/interface/io/usbif.h
new file mode 100644
index 0000000000..c6a58639d6
--- /dev/null
+++ b/include/hw/xen/interface/io/usbif.h
@@ -0,0 +1,254 @@
+/*
+ * usbif.h
+ *
+ * USB I/O interface for Xen guest OSes.
+ *
+ * Copyright (C) 2009, FUJITSU LABORATORIES LTD.
+ * Author: Noboru Iwamatsu <n_iwamatsu@jp.fujitsu.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_IO_USBIF_H__
+#define __XEN_PUBLIC_IO_USBIF_H__
+
+#include "ring.h"
+#include "../grant_table.h"
+
+/*
+ * Feature and Parameter Negotiation
+ * =================================
+ * The two halves of a Xen pvUSB driver utilize nodes within the XenStore to
+ * communicate capabilities and to negotiate operating parameters. This
+ * section enumerates these nodes which reside in the respective front and
+ * backend portions of the XenStore, following the XenBus convention.
+ *
+ * Any specified default value is in effect if the corresponding XenBus node
+ * is not present in the XenStore.
+ *
+ * XenStore nodes in sections marked "PRIVATE" are solely for use by the
+ * driver side whose XenBus tree contains them.
+ *
+ *****************************************************************************
+ *                            Backend XenBus Nodes
+ *****************************************************************************
+ *
+ *------------------ Backend Device Identification (PRIVATE) ------------------
+ *
+ * num-ports
+ *      Values:         unsigned [1...31]
+ *
+ *      Number of ports for this (virtual) USB host connector.
+ *
+ * usb-ver
+ *      Values:         unsigned [1...2]
+ *
+ *      USB version of this host connector: 1 = USB 1.1, 2 = USB 2.0.
+ *
+ * port/[1...31]
+ *      Values:         string
+ *
+ *      Physical USB device connected to the given port, e.g. "3-1.5".
+ *
+ *****************************************************************************
+ *                            Frontend XenBus Nodes
+ *****************************************************************************
+ *
+ *----------------------- Request Transport Parameters -----------------------
+ *
+ * event-channel
+ *      Values:         unsigned
+ *
+ *      The identifier of the Xen event channel used to signal activity
+ *      in the ring buffer.
+ *
+ * urb-ring-ref
+ *      Values:         unsigned
+ *
+ *      The Xen grant reference granting permission for the backend to map
+ *      the sole page in a single page sized ring buffer. This is the ring
+ *      buffer for urb requests.
+ *
+ * conn-ring-ref
+ *      Values:         unsigned
+ *
+ *      The Xen grant reference granting permission for the backend to map
+ *      the sole page in a single page sized ring buffer. This is the ring
+ *      buffer for connection/disconnection requests.
+ *
+ * protocol
+ *      Values:         string (XEN_IO_PROTO_ABI_*)
+ *      Default Value:  XEN_IO_PROTO_ABI_NATIVE
+ *
+ *      The machine ABI rules governing the format of all ring request and
+ *      response structures.
+ *
+ */
+
+enum usb_spec_version {
+	USB_VER_UNKNOWN = 0,
+	USB_VER_USB11,
+	USB_VER_USB20,
+	USB_VER_USB30,	/* not supported yet */
+};
+
+/*
+ *  USB pipe in usbif_request
+ *
+ *  - port number:	bits 0-4
+ *				(USB_MAXCHILDREN is 31)
+ *
+ *  - operation flag:	bit 5
+ *				(0 = submit urb,
+ *				 1 = unlink urb)
+ *
+ *  - direction:		bit 7
+ *				(0 = Host-to-Device [Out]
+ *				 1 = Device-to-Host [In])
+ *
+ *  - device address:	bits 8-14
+ *
+ *  - endpoint:		bits 15-18
+ *
+ *  - pipe type:	bits 30-31
+ *				(00 = isochronous, 01 = interrupt,
+ *				 10 = control, 11 = bulk)
+ */
+
+#define USBIF_PIPE_PORT_MASK	0x0000001f
+#define USBIF_PIPE_UNLINK	0x00000020
+#define USBIF_PIPE_DIR		0x00000080
+#define USBIF_PIPE_DEV_MASK	0x0000007f
+#define USBIF_PIPE_DEV_SHIFT	8
+#define USBIF_PIPE_EP_MASK	0x0000000f
+#define USBIF_PIPE_EP_SHIFT	15
+#define USBIF_PIPE_TYPE_MASK	0x00000003
+#define USBIF_PIPE_TYPE_SHIFT	30
+#define USBIF_PIPE_TYPE_ISOC	0
+#define USBIF_PIPE_TYPE_INT	1
+#define USBIF_PIPE_TYPE_CTRL	2
+#define USBIF_PIPE_TYPE_BULK	3
+
+#define usbif_pipeportnum(pipe)			((pipe) & USBIF_PIPE_PORT_MASK)
+#define usbif_setportnum_pipe(pipe, portnum)	((pipe) | (portnum))
+
+#define usbif_pipeunlink(pipe)			((pipe) & USBIF_PIPE_UNLINK)
+#define usbif_pipesubmit(pipe)			(!usbif_pipeunlink(pipe))
+#define usbif_setunlink_pipe(pipe)		((pipe) | USBIF_PIPE_UNLINK)
+
+#define usbif_pipein(pipe)			((pipe) & USBIF_PIPE_DIR)
+#define usbif_pipeout(pipe)			(!usbif_pipein(pipe))
+
+#define usbif_pipedevice(pipe)			\
+		(((pipe) >> USBIF_PIPE_DEV_SHIFT) & USBIF_PIPE_DEV_MASK)
+
+#define usbif_pipeendpoint(pipe)		\
+		(((pipe) >> USBIF_PIPE_EP_SHIFT) & USBIF_PIPE_EP_MASK)
+
+#define usbif_pipetype(pipe)			\
+		(((pipe) >> USBIF_PIPE_TYPE_SHIFT) & USBIF_PIPE_TYPE_MASK)
+#define usbif_pipeisoc(pipe)	(usbif_pipetype(pipe) == USBIF_PIPE_TYPE_ISOC)
+#define usbif_pipeint(pipe)	(usbif_pipetype(pipe) == USBIF_PIPE_TYPE_INT)
+#define usbif_pipectrl(pipe)	(usbif_pipetype(pipe) == USBIF_PIPE_TYPE_CTRL)
+#define usbif_pipebulk(pipe)	(usbif_pipetype(pipe) == USBIF_PIPE_TYPE_BULK)
+
+#define USBIF_MAX_SEGMENTS_PER_REQUEST (16)
+#define USBIF_MAX_PORTNR	31
+#define USBIF_RING_SIZE	4096
+
+/*
+ * RING for transferring urbs.
+ */
+struct usbif_request_segment {
+	grant_ref_t gref;
+	uint16_t offset;
+	uint16_t length;
+};
+
+struct usbif_urb_request {
+	uint16_t id; /* request id */
+	uint16_t nr_buffer_segs; /* number of urb->transfer_buffer segments */
+
+	/* basic urb parameter */
+	uint32_t pipe;
+	uint16_t transfer_flags;
+#define USBIF_SHORT_NOT_OK	0x0001
+	uint16_t buffer_length;
+	union {
+		uint8_t ctrl[8]; /* setup_packet (Ctrl) */
+
+		struct {
+			uint16_t interval; /* maximum (1024*8) in usb core */
+			uint16_t start_frame; /* start frame */
+			uint16_t number_of_packets; /* number of ISO packet */
+			uint16_t nr_frame_desc_segs; /* number of iso_frame_desc segments */
+		} isoc;
+
+		struct {
+			uint16_t interval; /* maximum (1024*8) in usb core */
+			uint16_t pad[3];
+		} intr;
+
+		struct {
+			uint16_t unlink_id; /* unlink request id */
+			uint16_t pad[3];
+		} unlink;
+
+	} u;
+
+	/* urb data segments */
+	struct usbif_request_segment seg[USBIF_MAX_SEGMENTS_PER_REQUEST];
+};
+typedef struct usbif_urb_request usbif_urb_request_t;
+
+struct usbif_urb_response {
+	uint16_t id; /* request id */
+	uint16_t start_frame;  /* start frame (ISO) */
+	int32_t status; /* status (non-ISO) */
+	int32_t actual_length; /* actual transfer length */
+	int32_t error_count; /* number of ISO errors */
+};
+typedef struct usbif_urb_response usbif_urb_response_t;
+
+DEFINE_RING_TYPES(usbif_urb, struct usbif_urb_request, struct usbif_urb_response);
+#define USB_URB_RING_SIZE __CONST_RING_SIZE(usbif_urb, USBIF_RING_SIZE)
+
+/*
+ * RING for notifying connect/disconnect events to frontend
+ */
+struct usbif_conn_request {
+	uint16_t id;
+};
+typedef struct usbif_conn_request usbif_conn_request_t;
+
+struct usbif_conn_response {
+	uint16_t id; /* request id */
+	uint8_t portnum; /* port number */
+	uint8_t speed; /* usb_device_speed */
+#define USBIF_SPEED_NONE	0
+#define USBIF_SPEED_LOW		1
+#define USBIF_SPEED_FULL	2
+#define USBIF_SPEED_HIGH	3
+};
+typedef struct usbif_conn_response usbif_conn_response_t;
+
+DEFINE_RING_TYPES(usbif_conn, struct usbif_conn_request, struct usbif_conn_response);
+#define USB_CONN_RING_SIZE __CONST_RING_SIZE(usbif_conn, USBIF_RING_SIZE)
+
+#endif /* __XEN_PUBLIC_IO_USBIF_H__ */
diff --git a/include/hw/xen/interface/io/xenbus.h b/include/hw/xen/interface/io/xenbus.h
new file mode 100644
index 0000000000..2fbf2a7fdc
--- /dev/null
+++ b/include/hw/xen/interface/io/xenbus.h
@@ -0,0 +1,70 @@
+/*****************************************************************************
+ * xenbus.h
+ *
+ * Xenbus protocol details.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (C) 2005 XenSource Ltd.
+ */
+
+#ifndef _XEN_PUBLIC_IO_XENBUS_H
+#define _XEN_PUBLIC_IO_XENBUS_H
+
+/*
+ * The state of either end of the Xenbus, i.e. the current communication
+ * status of initialisation across the bus.  States here imply nothing about
+ * the state of the connection between the driver and the kernel's device
+ * layers.
+ */
+enum xenbus_state {
+    XenbusStateUnknown       = 0,
+
+    XenbusStateInitialising  = 1,
+
+    /*
+     * InitWait: Finished early initialisation but waiting for information
+     * from the peer or hotplug scripts.
+     */
+    XenbusStateInitWait      = 2,
+
+    /*
+     * Initialised: Waiting for a connection from the peer.
+     */
+    XenbusStateInitialised   = 3,
+
+    XenbusStateConnected     = 4,
+
+    /*
+     * Closing: The device is being closed due to an error or an unplug event.
+     */
+    XenbusStateClosing       = 5,
+
+    XenbusStateClosed        = 6,
+
+    /*
+     * Reconfiguring: The device is being reconfigured.
+     */
+    XenbusStateReconfiguring = 7,
+
+    XenbusStateReconfigured  = 8
+};
+typedef enum xenbus_state XenbusState;
+
+#endif /* _XEN_PUBLIC_IO_XENBUS_H */
diff --git a/include/hw/xen/xen-bus.h b/include/hw/xen/xen-bus.h
index 3183f10e3c..1c2d9dfdb8 100644
--- a/include/hw/xen/xen-bus.h
+++ b/include/hw/xen/xen-bus.h
@@ -15,6 +15,7 @@
 typedef void (*XenWatchHandler)(void *opaque);
 
 typedef struct XenWatch XenWatch;
+typedef struct XenEventChannel XenEventChannel;
 
 typedef struct XenDevice {
     DeviceState qdev;
@@ -28,8 +29,7 @@ typedef struct XenDevice {
     XenWatch *backend_online_watch;
     xengnttab_handle *xgth;
     bool feature_grant_copy;
-    xenevtchn_handle *xeh;
-    NotifierList event_notifiers;
+    QLIST_HEAD(, XenEventChannel) event_channels;
 } XenDevice;
 
 typedef char *(*XenDeviceGetName)(XenDevice *xendev, Error **errp);
@@ -119,11 +119,10 @@ void xen_device_copy_grant_refs(XenDevice *xendev, bool to_domain,
                                 XenDeviceGrantCopySegment segs[],
                                 unsigned int nr_segs, Error **errp);
 
-typedef struct XenEventChannel XenEventChannel;
-
-typedef void (*XenEventHandler)(void *opaque);
+typedef bool (*XenEventHandler)(void *opaque);
 
 XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev,
+                                               AioContext *ctx,
                                                unsigned int port,
                                                XenEventHandler handler,
                                                void *opaque, Error **errp);
diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_common.h
index 9a8155e172..0504b43659 100644
--- a/include/hw/xen/xen_common.h
+++ b/include/hw/xen/xen_common.h
@@ -12,7 +12,7 @@
 
 #include <xenctrl.h>
 #include <xenstore.h>
-#include <xen/io/xenbus.h>
+#include "hw/xen/interface/io/xenbus.h"
 
 #include "hw/hw.h"
 #include "hw/xen/xen.h"
diff --git a/tests/docker/dockerfiles/debian-win32-cross.docker b/tests/docker/dockerfiles/debian-win32-cross.docker
index dd021f2df0..0a4970c068 100644
--- a/tests/docker/dockerfiles/debian-win32-cross.docker
+++ b/tests/docker/dockerfiles/debian-win32-cross.docker
@@ -15,7 +15,6 @@ RUN DEBIAN_FRONTEND=noninteractive eatmydata \
         mxe-$TARGET-w64-mingw32.shared-curl \
         mxe-$TARGET-w64-mingw32.shared-glib \
         mxe-$TARGET-w64-mingw32.shared-libgcrypt \
-        mxe-$TARGET-w64-mingw32.shared-libssh2 \
         mxe-$TARGET-w64-mingw32.shared-libusb1 \
         mxe-$TARGET-w64-mingw32.shared-lzo \
         mxe-$TARGET-w64-mingw32.shared-nettle \
diff --git a/tests/docker/dockerfiles/debian-win64-cross.docker b/tests/docker/dockerfiles/debian-win64-cross.docker
index 4542bcc821..b27985b1b1 100644
--- a/tests/docker/dockerfiles/debian-win64-cross.docker
+++ b/tests/docker/dockerfiles/debian-win64-cross.docker
@@ -15,7 +15,6 @@ RUN DEBIAN_FRONTEND=noninteractive eatmydata \
         mxe-$TARGET-w64-mingw32.shared-curl \
         mxe-$TARGET-w64-mingw32.shared-glib \
         mxe-$TARGET-w64-mingw32.shared-libgcrypt \
-        mxe-$TARGET-w64-mingw32.shared-libssh2 \
         mxe-$TARGET-w64-mingw32.shared-libusb1 \
         mxe-$TARGET-w64-mingw32.shared-lzo \
         mxe-$TARGET-w64-mingw32.shared-nettle \
diff --git a/tests/docker/dockerfiles/fedora.docker b/tests/docker/dockerfiles/fedora.docker
index 12c460597e..619d1b5656 100644
--- a/tests/docker/dockerfiles/fedora.docker
+++ b/tests/docker/dockerfiles/fedora.docker
@@ -35,7 +35,7 @@ ENV PACKAGES \
     libpng-devel \
     librbd-devel \
     libseccomp-devel \
-    libssh2-devel \
+    libssh-devel \
     libubsan \
     libusbx-devel \
     libxml2-devel \
@@ -50,7 +50,6 @@ ENV PACKAGES \
     mingw32-gtk3 \
     mingw32-libjpeg-turbo \
     mingw32-libpng \
-    mingw32-libssh2 \
     mingw32-libtasn1 \
     mingw32-nettle \
     mingw32-pixman \
@@ -64,7 +63,6 @@ ENV PACKAGES \
     mingw64-gtk3 \
     mingw64-libjpeg-turbo \
     mingw64-libpng \
-    mingw64-libssh2 \
     mingw64-libtasn1 \
     mingw64-nettle \
     mingw64-pixman \
diff --git a/tests/docker/dockerfiles/ubuntu.docker b/tests/docker/dockerfiles/ubuntu.docker
index 8d256961f0..d3b72209c8 100644
--- a/tests/docker/dockerfiles/ubuntu.docker
+++ b/tests/docker/dockerfiles/ubuntu.docker
@@ -53,7 +53,7 @@ ENV PACKAGES flex bison \
     libsnappy-dev \
     libspice-protocol-dev \
     libspice-server-dev \
-    libssh2-1-dev \
+    libssh-dev \
     libusb-1.0-0-dev \
     libusbredirhost-dev \
     libvdeplug-dev \
diff --git a/tests/docker/dockerfiles/ubuntu1804.docker b/tests/docker/dockerfiles/ubuntu1804.docker
index 2e2900150b..9d80b11500 100644
--- a/tests/docker/dockerfiles/ubuntu1804.docker
+++ b/tests/docker/dockerfiles/ubuntu1804.docker
@@ -40,7 +40,7 @@ ENV PACKAGES flex bison \
     libsnappy-dev \
     libspice-protocol-dev \
     libspice-server-dev \
-    libssh2-1-dev \
+    libssh-dev \
     libusb-1.0-0-dev \
     libusbredirhost-dev \
     libvdeplug-dev \
diff --git a/tests/qemu-iotests/059.out b/tests/qemu-iotests/059.out
index f51394ae8e..4fab42a28c 100644
--- a/tests/qemu-iotests/059.out
+++ b/tests/qemu-iotests/059.out
@@ -2358,5 +2358,5 @@ Offset          Length          Mapped to       File
 0x140000000     0x10000         0x50000         TEST_DIR/t-s003.vmdk
 
 === Testing afl image with a very large capacity ===
-qemu-img: Can't get image size 'TEST_DIR/afl9.IMGFMT': File too large
+qemu-img: Could not open 'TEST_DIR/afl9.IMGFMT': L1 size too big
 *** done
diff --git a/tests/qemu-iotests/134 b/tests/qemu-iotests/134
index 141a2eaa7e..5f0fb86211 100755
--- a/tests/qemu-iotests/134
+++ b/tests/qemu-iotests/134
@@ -57,6 +57,15 @@ echo "== reading whole image =="
 $QEMU_IO --object $SECRET -c "read 0 $size" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
 
 echo
+echo "== rewriting cluster part =="
+$QEMU_IO --object $SECRET -c "write -P 0xb 512 512" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
+
+echo
+echo "== verify pattern =="
+$QEMU_IO --object $SECRET -c "read -P 0 0 512"  --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
+$QEMU_IO --object $SECRET -c "read -P 0xb 512 512"  --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
+
+echo
 echo "== rewriting whole image =="
 $QEMU_IO --object $SECRET -c "write -P 0xa 0 $size" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
 
diff --git a/tests/qemu-iotests/134.out b/tests/qemu-iotests/134.out
index 972be49d91..09d46f6b17 100644
--- a/tests/qemu-iotests/134.out
+++ b/tests/qemu-iotests/134.out
@@ -5,6 +5,16 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 encryption=on encrypt.
 read 134217728/134217728 bytes at offset 0
 128 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 
+== rewriting cluster part ==
+wrote 512/512 bytes at offset 512
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== verify pattern ==
+read 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 512
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
 == rewriting whole image ==
 wrote 134217728/134217728 bytes at offset 0
 128 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
diff --git a/tests/qemu-iotests/205 b/tests/qemu-iotests/205
index 69f2c1d392..b8a86c446e 100755
--- a/tests/qemu-iotests/205
+++ b/tests/qemu-iotests/205
@@ -24,7 +24,7 @@ import iotests
 import time
 from iotests import qemu_img_create, qemu_io, filter_qemu_io, QemuIoInteractive
 
-nbd_sock = 'nbd_sock'
+nbd_sock = os.path.join(iotests.test_dir, 'nbd_sock')
 nbd_uri = 'nbd+unix:///exp?socket=' + nbd_sock
 disk = os.path.join(iotests.test_dir, 'disk')
 
diff --git a/tests/qemu-iotests/207 b/tests/qemu-iotests/207
index b3816136f7..ec8c1d06f0 100755
--- a/tests/qemu-iotests/207
+++ b/tests/qemu-iotests/207
@@ -110,12 +110,49 @@ with iotests.FilePath('t.img') as disk_path, \
 
     iotests.img_info_log(remote_path)
 
-    md5_key = subprocess.check_output(
-        'ssh-keyscan -t rsa 127.0.0.1 2>/dev/null | grep -v "\\^#" | ' +
-        'cut -d" " -f3 | base64 -d | md5sum -b | cut -d" " -f1',
-        shell=True).rstrip().decode('ascii')
+    keys = subprocess.check_output(
+        'ssh-keyscan 127.0.0.1 2>/dev/null | grep -v "\\^#" | ' +
+        'cut -d" " -f3',
+        shell=True).rstrip().decode('ascii').split('\n')
+
+    # Mappings of base64 representations to digests
+    md5_keys = {}
+    sha1_keys = {}
+
+    for key in keys:
+        md5_keys[key] = subprocess.check_output(
+            'echo %s | base64 -d | md5sum -b | cut -d" " -f1' % key,
+            shell=True).rstrip().decode('ascii')
+
+        sha1_keys[key] = subprocess.check_output(
+            'echo %s | base64 -d | sha1sum -b | cut -d" " -f1' % key,
+            shell=True).rstrip().decode('ascii')
 
     vm.launch()
+
+    # Find correct key first
+    matching_key = None
+    for key in keys:
+        result = vm.qmp('blockdev-add',
+                        driver='ssh', node_name='node0', path=disk_path,
+                        server={
+                             'host': '127.0.0.1',
+                             'port': '22',
+                        }, host_key_check={
+                             'mode': 'hash',
+                             'type': 'md5',
+                             'hash': md5_keys[key],
+                        })
+
+        if 'error' not in result:
+            vm.qmp('blockdev-del', node_name='node0')
+            matching_key = key
+            break
+
+    if matching_key is None:
+        vm.shutdown()
+        iotests.notrun('Did not find a key that fits 127.0.0.1')
+
     blockdev_create(vm, { 'driver': 'ssh',
                           'location': {
                               'path': disk_path,
@@ -140,7 +177,7 @@ with iotests.FilePath('t.img') as disk_path, \
                               'host-key-check': {
                                   'mode': 'hash',
                                   'type': 'md5',
-                                  'hash': md5_key,
+                                  'hash': md5_keys[matching_key],
                               }
                           },
                           'size': 8388608 })
@@ -148,11 +185,6 @@ with iotests.FilePath('t.img') as disk_path, \
 
     iotests.img_info_log(remote_path)
 
-    sha1_key = subprocess.check_output(
-        'ssh-keyscan -t rsa 127.0.0.1 2>/dev/null | grep -v "\\^#" | ' +
-        'cut -d" " -f3 | base64 -d | sha1sum -b | cut -d" " -f1',
-        shell=True).rstrip().decode('ascii')
-
     vm.launch()
     blockdev_create(vm, { 'driver': 'ssh',
                           'location': {
@@ -178,7 +210,7 @@ with iotests.FilePath('t.img') as disk_path, \
                               'host-key-check': {
                                   'mode': 'hash',
                                   'type': 'sha1',
-                                  'hash': sha1_key,
+                                  'hash': sha1_keys[matching_key],
                               }
                           },
                           'size': 4194304 })
diff --git a/tests/qemu-iotests/207.out b/tests/qemu-iotests/207.out
index ec9823793a..1239d9d648 100644
--- a/tests/qemu-iotests/207.out
+++ b/tests/qemu-iotests/207.out
@@ -68,7 +68,7 @@ virtual size: 4 MiB (4194304 bytes)
 
 {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "ssh", "location": {"host-key-check": {"mode": "none"}, "path": "/this/is/not/an/existing/path", "server": {"host": "127.0.0.1", "port": "22"}}, "size": 4194304}}}
 {"return": {}}
-Job failed: failed to open remote file '/this/is/not/an/existing/path': Failed opening remote file (libssh2 error code: -31)
+Job failed: failed to open remote file '/this/is/not/an/existing/path': SFTP server: No such file (libssh error code: 1, sftp error code: 2)
 {"execute": "job-dismiss", "arguments": {"id": "job0"}}
 {"return": {}}