aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.travis.yml4
-rw-r--r--block/Makefile.objs6
-rw-r--r--block/ssh.c652
-rw-r--r--block/trace-events14
-rw-r--r--block/vmdk.c372
-rw-r--r--blockdev.c2
-rwxr-xr-xconfigure65
-rw-r--r--docs/qemu-block-drivers.texi2
-rw-r--r--hw/9pfs/xen-9pfs.h4
-rw-r--r--hw/block/dataplane/xen-block.c44
-rw-r--r--hw/block/dataplane/xen-block.h3
-rw-r--r--hw/block/nvme.c1
-rw-r--r--hw/block/xen-block.c38
-rw-r--r--hw/block/xen_blkif.h5
-rw-r--r--hw/char/xen_console.c2
-rw-r--r--hw/display/xenfb.c7
-rw-r--r--hw/i386/xen/xen-hvm.c16
-rw-r--r--hw/i386/xen/xen-mapcache.c2
-rw-r--r--hw/net/xen_nic.c2
-rw-r--r--hw/usb/xen-usb.c3
-rw-r--r--hw/xen/xen-bus.c92
-rw-r--r--hw/xen/xen-legacy-backend.c2
-rw-r--r--include/hw/xen/interface/grant_table.h36
-rw-r--r--include/hw/xen/interface/io/blkif.h712
-rw-r--r--include/hw/xen/interface/io/console.h46
-rw-r--r--include/hw/xen/interface/io/fbif.h156
-rw-r--r--include/hw/xen/interface/io/kbdif.h566
-rw-r--r--include/hw/xen/interface/io/netif.h1010
-rw-r--r--include/hw/xen/interface/io/protocols.h42
-rw-r--r--include/hw/xen/interface/io/ring.h (renamed from include/hw/xen/io/ring.h)6
-rw-r--r--include/hw/xen/interface/io/usbif.h254
-rw-r--r--include/hw/xen/interface/io/xenbus.h70
-rw-r--r--include/hw/xen/xen-bus.h9
-rw-r--r--include/hw/xen/xen_common.h2
-rw-r--r--tests/docker/dockerfiles/debian-win32-cross.docker1
-rw-r--r--tests/docker/dockerfiles/debian-win64-cross.docker1
-rw-r--r--tests/docker/dockerfiles/fedora.docker4
-rw-r--r--tests/docker/dockerfiles/ubuntu.docker2
-rw-r--r--tests/docker/dockerfiles/ubuntu1804.docker2
-rw-r--r--tests/qemu-iotests/059.out2
-rwxr-xr-xtests/qemu-iotests/1349
-rw-r--r--tests/qemu-iotests/134.out10
-rwxr-xr-xtests/qemu-iotests/2052
-rwxr-xr-xtests/qemu-iotests/20754
-rw-r--r--tests/qemu-iotests/207.out2
45 files changed, 3846 insertions, 490 deletions
diff --git a/.travis.yml b/.travis.yml
index aeb9b211cd..279658b116 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -31,7 +31,7 @@ addons:
- libseccomp-dev
- libspice-protocol-dev
- libspice-server-dev
- - libssh2-1-dev
+ - libssh-dev
- liburcu-dev
- libusb-1.0-0-dev
- libvte-2.91-dev
@@ -270,7 +270,7 @@ matrix:
- libseccomp-dev
- libspice-protocol-dev
- libspice-server-dev
- - libssh2-1-dev
+ - libssh-dev
- liburcu-dev
- libusb-1.0-0-dev
- libvte-2.91-dev
diff --git a/block/Makefile.objs b/block/Makefile.objs
index dbd1522722..35f3bca4d9 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -31,7 +31,7 @@ block-obj-$(CONFIG_CURL) += curl.o
block-obj-$(CONFIG_RBD) += rbd.o
block-obj-$(CONFIG_GLUSTERFS) += gluster.o
block-obj-$(CONFIG_VXHS) += vxhs.o
-block-obj-$(CONFIG_LIBSSH2) += ssh.o
+block-obj-$(CONFIG_LIBSSH) += ssh.o
block-obj-y += accounting.o dirty-bitmap.o
block-obj-y += write-threshold.o
block-obj-y += backup.o
@@ -52,8 +52,8 @@ rbd.o-libs := $(RBD_LIBS)
gluster.o-cflags := $(GLUSTERFS_CFLAGS)
gluster.o-libs := $(GLUSTERFS_LIBS)
vxhs.o-libs := $(VXHS_LIBS)
-ssh.o-cflags := $(LIBSSH2_CFLAGS)
-ssh.o-libs := $(LIBSSH2_LIBS)
+ssh.o-cflags := $(LIBSSH_CFLAGS)
+ssh.o-libs := $(LIBSSH_LIBS)
block-obj-dmg-bz2-$(CONFIG_BZIP2) += dmg-bz2.o
block-obj-$(if $(CONFIG_DMG),m,n) += $(block-obj-dmg-bz2-y)
dmg-bz2.o-libs := $(BZIP2_LIBS)
diff --git a/block/ssh.c b/block/ssh.c
index 6da7b9cbfe..501933b855 100644
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -24,8 +24,8 @@
#include "qemu/osdep.h"
-#include <libssh2.h>
-#include <libssh2_sftp.h>
+#include <libssh/libssh.h>
+#include <libssh/sftp.h>
#include "block/block_int.h"
#include "block/qdict.h"
@@ -46,13 +46,11 @@
#include "trace.h"
/*
- * TRACE_LIBSSH2=<bitmask> enables tracing in libssh2 itself. Note
- * that this requires that libssh2 was specially compiled with the
- * `./configure --enable-debug' option, so most likely you will have
- * to compile it yourself. The meaning of <bitmask> is described
- * here: http://www.libssh2.org/libssh2_trace.html
+ * TRACE_LIBSSH=<level> enables tracing in libssh itself.
+ * The meaning of <level> is described here:
+ * http://api.libssh.org/master/group__libssh__log.html
*/
-#define TRACE_LIBSSH2 0 /* or try: LIBSSH2_TRACE_SFTP */
+#define TRACE_LIBSSH 0 /* see: SSH_LOG_* */
typedef struct BDRVSSHState {
/* Coroutine. */
@@ -60,18 +58,15 @@ typedef struct BDRVSSHState {
/* SSH connection. */
int sock; /* socket */
- LIBSSH2_SESSION *session; /* ssh session */
- LIBSSH2_SFTP *sftp; /* sftp session */
- LIBSSH2_SFTP_HANDLE *sftp_handle; /* sftp remote file handle */
+ ssh_session session; /* ssh session */
+ sftp_session sftp; /* sftp session */
+ sftp_file sftp_handle; /* sftp remote file handle */
- /* See ssh_seek() function below. */
- int64_t offset;
- bool offset_op_read;
-
- /* File attributes at open. We try to keep the .filesize field
+ /*
+ * File attributes at open. We try to keep the .size field
* updated if it changes (eg by writing at the end of the file).
*/
- LIBSSH2_SFTP_ATTRIBUTES attrs;
+ sftp_attributes attrs;
InetSocketAddress *inet;
@@ -91,7 +86,6 @@ static void ssh_state_init(BDRVSSHState *s)
{
memset(s, 0, sizeof *s);
s->sock = -1;
- s->offset = -1;
qemu_co_mutex_init(&s->lock);
}
@@ -99,20 +93,18 @@ static void ssh_state_free(BDRVSSHState *s)
{
g_free(s->user);
+ if (s->attrs) {
+ sftp_attributes_free(s->attrs);
+ }
if (s->sftp_handle) {
- libssh2_sftp_close(s->sftp_handle);
+ sftp_close(s->sftp_handle);
}
if (s->sftp) {
- libssh2_sftp_shutdown(s->sftp);
+ sftp_free(s->sftp);
}
if (s->session) {
- libssh2_session_disconnect(s->session,
- "from qemu ssh client: "
- "user closed the connection");
- libssh2_session_free(s->session);
- }
- if (s->sock >= 0) {
- close(s->sock);
+ ssh_disconnect(s->session);
+ ssh_free(s->session); /* This frees s->sock */
}
}
@@ -127,13 +119,13 @@ session_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
va_end(args);
if (s->session) {
- char *ssh_err;
+ const char *ssh_err;
int ssh_err_code;
- /* This is not an errno. See <libssh2.h>. */
- ssh_err_code = libssh2_session_last_error(s->session,
- &ssh_err, NULL, 0);
- error_setg(errp, "%s: %s (libssh2 error code: %d)",
+ /* This is not an errno. See <libssh/libssh.h>. */
+ ssh_err = ssh_get_error(s->session);
+ ssh_err_code = ssh_get_error_code(s->session);
+ error_setg(errp, "%s: %s (libssh error code: %d)",
msg, ssh_err, ssh_err_code);
} else {
error_setg(errp, "%s", msg);
@@ -152,18 +144,18 @@ sftp_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
va_end(args);
if (s->sftp) {
- char *ssh_err;
+ const char *ssh_err;
int ssh_err_code;
- unsigned long sftp_err_code;
+ int sftp_err_code;
- /* This is not an errno. See <libssh2.h>. */
- ssh_err_code = libssh2_session_last_error(s->session,
- &ssh_err, NULL, 0);
- /* See <libssh2_sftp.h>. */
- sftp_err_code = libssh2_sftp_last_error((s)->sftp);
+ /* This is not an errno. See <libssh/libssh.h>. */
+ ssh_err = ssh_get_error(s->session);
+ ssh_err_code = ssh_get_error_code(s->session);
+ /* See <libssh/sftp.h>. */
+ sftp_err_code = sftp_get_error(s->sftp);
error_setg(errp,
- "%s: %s (libssh2 error code: %d, sftp error code: %lu)",
+ "%s: %s (libssh error code: %d, sftp error code: %d)",
msg, ssh_err, ssh_err_code, sftp_err_code);
} else {
error_setg(errp, "%s", msg);
@@ -173,15 +165,15 @@ sftp_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
static void sftp_error_trace(BDRVSSHState *s, const char *op)
{
- char *ssh_err;
+ const char *ssh_err;
int ssh_err_code;
- unsigned long sftp_err_code;
+ int sftp_err_code;
- /* This is not an errno. See <libssh2.h>. */
- ssh_err_code = libssh2_session_last_error(s->session,
- &ssh_err, NULL, 0);
- /* See <libssh2_sftp.h>. */
- sftp_err_code = libssh2_sftp_last_error((s)->sftp);
+ /* This is not an errno. See <libssh/libssh.h>. */
+ ssh_err = ssh_get_error(s->session);
+ ssh_err_code = ssh_get_error_code(s->session);
+ /* See <libssh/sftp.h>. */
+ sftp_err_code = sftp_get_error(s->sftp);
trace_sftp_error(op, ssh_err, ssh_err_code, sftp_err_code);
}
@@ -282,82 +274,120 @@ static void ssh_parse_filename(const char *filename, QDict *options,
parse_uri(filename, options, errp);
}
-static int check_host_key_knownhosts(BDRVSSHState *s,
- const char *host, int port, Error **errp)
+static int check_host_key_knownhosts(BDRVSSHState *s, Error **errp)
{
- const char *home;
- char *knh_file = NULL;
- LIBSSH2_KNOWNHOSTS *knh = NULL;
- struct libssh2_knownhost *found;
- int ret, r;
- const char *hostkey;
- size_t len;
- int type;
-
- hostkey = libssh2_session_hostkey(s->session, &len, &type);
- if (!hostkey) {
+ int ret;
+#ifdef HAVE_LIBSSH_0_8
+ enum ssh_known_hosts_e state;
+ int r;
+ ssh_key pubkey;
+ enum ssh_keytypes_e pubkey_type;
+ unsigned char *server_hash = NULL;
+ size_t server_hash_len;
+ char *fingerprint = NULL;
+
+ state = ssh_session_is_known_server(s->session);
+ trace_ssh_server_status(state);
+
+ switch (state) {
+ case SSH_KNOWN_HOSTS_OK:
+ /* OK */
+ trace_ssh_check_host_key_knownhosts();
+ break;
+ case SSH_KNOWN_HOSTS_CHANGED:
ret = -EINVAL;
- session_error_setg(errp, s, "failed to read remote host key");
+ r = ssh_get_server_publickey(s->session, &pubkey);
+ if (r == 0) {
+ r = ssh_get_publickey_hash(pubkey, SSH_PUBLICKEY_HASH_SHA256,
+ &server_hash, &server_hash_len);
+ pubkey_type = ssh_key_type(pubkey);
+ ssh_key_free(pubkey);
+ }
+ if (r == 0) {
+ fingerprint = ssh_get_fingerprint_hash(SSH_PUBLICKEY_HASH_SHA256,
+ server_hash,
+ server_hash_len);
+ ssh_clean_pubkey_hash(&server_hash);
+ }
+ if (fingerprint) {
+ error_setg(errp,
+ "host key (%s key with fingerprint %s) does not match "
+ "the one in known_hosts; this may be a possible attack",
+ ssh_key_type_to_char(pubkey_type), fingerprint);
+ ssh_string_free_char(fingerprint);
+ } else {
+ error_setg(errp,
+ "host key does not match the one in known_hosts; this "
+ "may be a possible attack");
+ }
goto out;
- }
-
- knh = libssh2_knownhost_init(s->session);
- if (!knh) {
+ case SSH_KNOWN_HOSTS_OTHER:
ret = -EINVAL;
- session_error_setg(errp, s,
- "failed to initialize known hosts support");
+ error_setg(errp,
+ "host key for this server not found, another type exists");
+ goto out;
+ case SSH_KNOWN_HOSTS_UNKNOWN:
+ ret = -EINVAL;
+ error_setg(errp, "no host key was found in known_hosts");
+ goto out;
+ case SSH_KNOWN_HOSTS_NOT_FOUND:
+ ret = -ENOENT;
+ error_setg(errp, "known_hosts file not found");
+ goto out;
+ case SSH_KNOWN_HOSTS_ERROR:
+ ret = -EINVAL;
+ error_setg(errp, "error while checking the host");
+ goto out;
+ default:
+ ret = -EINVAL;
+ error_setg(errp, "error while checking for known server (%d)", state);
goto out;
}
+#else /* !HAVE_LIBSSH_0_8 */
+ int state;
- home = getenv("HOME");
- if (home) {
- knh_file = g_strdup_printf("%s/.ssh/known_hosts", home);
- } else {
- knh_file = g_strdup_printf("/root/.ssh/known_hosts");
- }
-
- /* Read all known hosts from OpenSSH-style known_hosts file. */
- libssh2_knownhost_readfile(knh, knh_file, LIBSSH2_KNOWNHOST_FILE_OPENSSH);
+ state = ssh_is_server_known(s->session);
+ trace_ssh_server_status(state);
- r = libssh2_knownhost_checkp(knh, host, port, hostkey, len,
- LIBSSH2_KNOWNHOST_TYPE_PLAIN|
- LIBSSH2_KNOWNHOST_KEYENC_RAW,
- &found);
- switch (r) {
- case LIBSSH2_KNOWNHOST_CHECK_MATCH:
+ switch (state) {
+ case SSH_SERVER_KNOWN_OK:
/* OK */
- trace_ssh_check_host_key_knownhosts(found->key);
+ trace_ssh_check_host_key_knownhosts();
break;
- case LIBSSH2_KNOWNHOST_CHECK_MISMATCH:
+ case SSH_SERVER_KNOWN_CHANGED:
ret = -EINVAL;
- session_error_setg(errp, s,
- "host key does not match the one in known_hosts"
- " (found key %s)", found->key);
+ error_setg(errp,
+ "host key does not match the one in known_hosts; this "
+ "may be a possible attack");
goto out;
- case LIBSSH2_KNOWNHOST_CHECK_NOTFOUND:
+ case SSH_SERVER_FOUND_OTHER:
ret = -EINVAL;
- session_error_setg(errp, s, "no host key was found in known_hosts");
+ error_setg(errp,
+ "host key for this server not found, another type exists");
+ goto out;
+ case SSH_SERVER_FILE_NOT_FOUND:
+ ret = -ENOENT;
+ error_setg(errp, "known_hosts file not found");
goto out;
- case LIBSSH2_KNOWNHOST_CHECK_FAILURE:
+ case SSH_SERVER_NOT_KNOWN:
ret = -EINVAL;
- session_error_setg(errp, s,
- "failure matching the host key with known_hosts");
+ error_setg(errp, "no host key was found in known_hosts");
+ goto out;
+ case SSH_SERVER_ERROR:
+ ret = -EINVAL;
+ error_setg(errp, "server error");
goto out;
default:
ret = -EINVAL;
- session_error_setg(errp, s, "unknown error matching the host key"
- " with known_hosts (%d)", r);
+ error_setg(errp, "error while checking for known server (%d)", state);
goto out;
}
+#endif /* !HAVE_LIBSSH_0_8 */
/* known_hosts checking successful. */
ret = 0;
out:
- if (knh != NULL) {
- libssh2_knownhost_free(knh);
- }
- g_free(knh_file);
return ret;
}
@@ -401,18 +431,34 @@ static int compare_fingerprint(const unsigned char *fingerprint, size_t len,
static int
check_host_key_hash(BDRVSSHState *s, const char *hash,
- int hash_type, size_t fingerprint_len, Error **errp)
+ enum ssh_publickey_hash_type type, Error **errp)
{
- const char *fingerprint;
-
- fingerprint = libssh2_hostkey_hash(s->session, hash_type);
- if (!fingerprint) {
+ int r;
+ ssh_key pubkey;
+ unsigned char *server_hash;
+ size_t server_hash_len;
+
+#ifdef HAVE_LIBSSH_0_8
+ r = ssh_get_server_publickey(s->session, &pubkey);
+#else
+ r = ssh_get_publickey(s->session, &pubkey);
+#endif
+ if (r != SSH_OK) {
session_error_setg(errp, s, "failed to read remote host key");
return -EINVAL;
}
- if(compare_fingerprint((unsigned char *) fingerprint, fingerprint_len,
- hash) != 0) {
+ r = ssh_get_publickey_hash(pubkey, type, &server_hash, &server_hash_len);
+ ssh_key_free(pubkey);
+ if (r != 0) {
+ session_error_setg(errp, s,
+ "failed reading the hash of the server SSH key");
+ return -EINVAL;
+ }
+
+ r = compare_fingerprint(server_hash, server_hash_len, hash);
+ ssh_clean_pubkey_hash(&server_hash);
+ if (r != 0) {
error_setg(errp, "remote host key does not match host_key_check '%s'",
hash);
return -EPERM;
@@ -421,8 +467,7 @@ check_host_key_hash(BDRVSSHState *s, const char *hash,
return 0;
}
-static int check_host_key(BDRVSSHState *s, const char *host, int port,
- SshHostKeyCheck *hkc, Error **errp)
+static int check_host_key(BDRVSSHState *s, SshHostKeyCheck *hkc, Error **errp)
{
SshHostKeyCheckMode mode;
@@ -438,15 +483,15 @@ static int check_host_key(BDRVSSHState *s, const char *host, int port,
case SSH_HOST_KEY_CHECK_MODE_HASH:
if (hkc->u.hash.type == SSH_HOST_KEY_CHECK_HASH_TYPE_MD5) {
return check_host_key_hash(s, hkc->u.hash.hash,
- LIBSSH2_HOSTKEY_HASH_MD5, 16, errp);
+ SSH_PUBLICKEY_HASH_MD5, errp);
} else if (hkc->u.hash.type == SSH_HOST_KEY_CHECK_HASH_TYPE_SHA1) {
return check_host_key_hash(s, hkc->u.hash.hash,
- LIBSSH2_HOSTKEY_HASH_SHA1, 20, errp);
+ SSH_PUBLICKEY_HASH_SHA1, errp);
}
g_assert_not_reached();
break;
case SSH_HOST_KEY_CHECK_MODE_KNOWN_HOSTS:
- return check_host_key_knownhosts(s, host, port, errp);
+ return check_host_key_knownhosts(s, errp);
default:
g_assert_not_reached();
}
@@ -454,60 +499,43 @@ static int check_host_key(BDRVSSHState *s, const char *host, int port,
return -EINVAL;
}
-static int authenticate(BDRVSSHState *s, const char *user, Error **errp)
+static int authenticate(BDRVSSHState *s, Error **errp)
{
int r, ret;
- const char *userauthlist;
- LIBSSH2_AGENT *agent = NULL;
- struct libssh2_agent_publickey *identity;
- struct libssh2_agent_publickey *prev_identity = NULL;
+ int method;
- userauthlist = libssh2_userauth_list(s->session, user, strlen(user));
- if (strstr(userauthlist, "publickey") == NULL) {
+ /* Try to authenticate with the "none" method. */
+ r = ssh_userauth_none(s->session, NULL);
+ if (r == SSH_AUTH_ERROR) {
ret = -EPERM;
- error_setg(errp,
- "remote server does not support \"publickey\" authentication");
+ session_error_setg(errp, s, "failed to authenticate using none "
+ "authentication");
goto out;
- }
-
- /* Connect to ssh-agent and try each identity in turn. */
- agent = libssh2_agent_init(s->session);
- if (!agent) {
- ret = -EINVAL;
- session_error_setg(errp, s, "failed to initialize ssh-agent support");
- goto out;
- }
- if (libssh2_agent_connect(agent)) {
- ret = -ECONNREFUSED;
- session_error_setg(errp, s, "failed to connect to ssh-agent");
- goto out;
- }
- if (libssh2_agent_list_identities(agent)) {
- ret = -EINVAL;
- session_error_setg(errp, s,
- "failed requesting identities from ssh-agent");
+ } else if (r == SSH_AUTH_SUCCESS) {
+ /* Authenticated! */
+ ret = 0;
goto out;
}
- for(;;) {
- r = libssh2_agent_get_identity(agent, &identity, prev_identity);
- if (r == 1) { /* end of list */
- break;
- }
- if (r < 0) {
+ method = ssh_userauth_list(s->session, NULL);
+ trace_ssh_auth_methods(method);
+
+ /*
+ * Try to authenticate with publickey, using the ssh-agent
+ * if available.
+ */
+ if (method & SSH_AUTH_METHOD_PUBLICKEY) {
+ r = ssh_userauth_publickey_auto(s->session, NULL, NULL);
+ if (r == SSH_AUTH_ERROR) {
ret = -EINVAL;
- session_error_setg(errp, s,
- "failed to obtain identity from ssh-agent");
+ session_error_setg(errp, s, "failed to authenticate using "
+ "publickey authentication");
goto out;
- }
- r = libssh2_agent_userauth(agent, user, identity);
- if (r == 0) {
+ } else if (r == SSH_AUTH_SUCCESS) {
/* Authenticated! */
ret = 0;
goto out;
}
- /* Failed to authenticate with this identity, try the next one. */
- prev_identity = identity;
}
ret = -EPERM;
@@ -515,13 +543,6 @@ static int authenticate(BDRVSSHState *s, const char *user, Error **errp)
"and the identities held by your ssh-agent");
out:
- if (agent != NULL) {
- /* Note: libssh2 implementation implicitly calls
- * libssh2_agent_disconnect if necessary.
- */
- libssh2_agent_free(agent);
- }
-
return ret;
}
@@ -640,7 +661,8 @@ static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts,
int ssh_flags, int creat_mode, Error **errp)
{
int r, ret;
- long port = 0;
+ unsigned int port = 0;
+ int new_sock = -1;
if (opts->has_user) {
s->user = g_strdup(opts->user);
@@ -657,71 +679,147 @@ static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts,
s->inet = opts->server;
opts->server = NULL;
- if (qemu_strtol(s->inet->port, NULL, 10, &port) < 0) {
+ if (qemu_strtoui(s->inet->port, NULL, 10, &port) < 0) {
error_setg(errp, "Use only numeric port value");
ret = -EINVAL;
goto err;
}
/* Open the socket and connect. */
- s->sock = inet_connect_saddr(s->inet, errp);
- if (s->sock < 0) {
+ new_sock = inet_connect_saddr(s->inet, errp);
+ if (new_sock < 0) {
ret = -EIO;
goto err;
}
+ /*
+ * Try to disable the Nagle algorithm on TCP sockets to reduce latency,
+ * but do not fail if it cannot be disabled.
+ */
+ r = socket_set_nodelay(new_sock);
+ if (r < 0) {
+ warn_report("can't set TCP_NODELAY for the ssh server %s: %s",
+ s->inet->host, strerror(errno));
+ }
+
/* Create SSH session. */
- s->session = libssh2_session_init();
+ s->session = ssh_new();
if (!s->session) {
ret = -EINVAL;
- session_error_setg(errp, s, "failed to initialize libssh2 session");
+ session_error_setg(errp, s, "failed to initialize libssh session");
goto err;
}
-#if TRACE_LIBSSH2 != 0
- libssh2_trace(s->session, TRACE_LIBSSH2);
-#endif
+ /*
+ * Make sure we are in blocking mode during the connection and
+ * authentication phases.
+ */
+ ssh_set_blocking(s->session, 1);
- r = libssh2_session_handshake(s->session, s->sock);
- if (r != 0) {
+ r = ssh_options_set(s->session, SSH_OPTIONS_USER, s->user);
+ if (r < 0) {
+ ret = -EINVAL;
+ session_error_setg(errp, s,
+ "failed to set the user in the libssh session");
+ goto err;
+ }
+
+ r = ssh_options_set(s->session, SSH_OPTIONS_HOST, s->inet->host);
+ if (r < 0) {
+ ret = -EINVAL;
+ session_error_setg(errp, s,
+ "failed to set the host in the libssh session");
+ goto err;
+ }
+
+ if (port > 0) {
+ r = ssh_options_set(s->session, SSH_OPTIONS_PORT, &port);
+ if (r < 0) {
+ ret = -EINVAL;
+ session_error_setg(errp, s,
+ "failed to set the port in the libssh session");
+ goto err;
+ }
+ }
+
+ r = ssh_options_set(s->session, SSH_OPTIONS_COMPRESSION, "none");
+ if (r < 0) {
+ ret = -EINVAL;
+ session_error_setg(errp, s,
+ "failed to disable the compression in the libssh "
+ "session");
+ goto err;
+ }
+
+ /* Read ~/.ssh/config. */
+ r = ssh_options_parse_config(s->session, NULL);
+ if (r < 0) {
+ ret = -EINVAL;
+ session_error_setg(errp, s, "failed to parse ~/.ssh/config");
+ goto err;
+ }
+
+ r = ssh_options_set(s->session, SSH_OPTIONS_FD, &new_sock);
+ if (r < 0) {
+ ret = -EINVAL;
+ session_error_setg(errp, s,
+ "failed to set the socket in the libssh session");
+ goto err;
+ }
+ /* libssh took ownership of the socket. */
+ s->sock = new_sock;
+ new_sock = -1;
+
+ /* Connect. */
+ r = ssh_connect(s->session);
+ if (r != SSH_OK) {
ret = -EINVAL;
session_error_setg(errp, s, "failed to establish SSH session");
goto err;
}
/* Check the remote host's key against known_hosts. */
- ret = check_host_key(s, s->inet->host, port, opts->host_key_check, errp);
+ ret = check_host_key(s, opts->host_key_check, errp);
if (ret < 0) {
goto err;
}
/* Authenticate. */
- ret = authenticate(s, s->user, errp);
+ ret = authenticate(s, errp);
if (ret < 0) {
goto err;
}
/* Start SFTP. */
- s->sftp = libssh2_sftp_init(s->session);
+ s->sftp = sftp_new(s->session);
if (!s->sftp) {
- session_error_setg(errp, s, "failed to initialize sftp handle");
+ session_error_setg(errp, s, "failed to create sftp handle");
+ ret = -EINVAL;
+ goto err;
+ }
+
+ r = sftp_init(s->sftp);
+ if (r < 0) {
+ sftp_error_setg(errp, s, "failed to initialize sftp handle");
ret = -EINVAL;
goto err;
}
/* Open the remote file. */
trace_ssh_connect_to_ssh(opts->path, ssh_flags, creat_mode);
- s->sftp_handle = libssh2_sftp_open(s->sftp, opts->path, ssh_flags,
- creat_mode);
+ s->sftp_handle = sftp_open(s->sftp, opts->path, ssh_flags, creat_mode);
if (!s->sftp_handle) {
- session_error_setg(errp, s, "failed to open remote file '%s'",
- opts->path);
+ sftp_error_setg(errp, s, "failed to open remote file '%s'",
+ opts->path);
ret = -EINVAL;
goto err;
}
- r = libssh2_sftp_fstat(s->sftp_handle, &s->attrs);
- if (r < 0) {
+ /* Make sure the SFTP file is handled in blocking mode. */
+ sftp_file_set_blocking(s->sftp_handle);
+
+ s->attrs = sftp_fstat(s->sftp_handle);
+ if (!s->attrs) {
sftp_error_setg(errp, s, "failed to read file attributes");
return -EINVAL;
}
@@ -729,21 +827,27 @@ static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts,
return 0;
err:
+ if (s->attrs) {
+ sftp_attributes_free(s->attrs);
+ }
+ s->attrs = NULL;
if (s->sftp_handle) {
- libssh2_sftp_close(s->sftp_handle);
+ sftp_close(s->sftp_handle);
}
s->sftp_handle = NULL;
if (s->sftp) {
- libssh2_sftp_shutdown(s->sftp);
+ sftp_free(s->sftp);
}
s->sftp = NULL;
if (s->session) {
- libssh2_session_disconnect(s->session,
- "from qemu ssh client: "
- "error opening connection");
- libssh2_session_free(s->session);
+ ssh_disconnect(s->session);
+ ssh_free(s->session);
}
s->session = NULL;
+ s->sock = -1;
+ if (new_sock >= 0) {
+ close(new_sock);
+ }
return ret;
}
@@ -758,9 +862,11 @@ static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags,
ssh_state_init(s);
- ssh_flags = LIBSSH2_FXF_READ;
+ ssh_flags = 0;
if (bdrv_flags & BDRV_O_RDWR) {
- ssh_flags |= LIBSSH2_FXF_WRITE;
+ ssh_flags |= O_RDWR;
+ } else {
+ ssh_flags |= O_RDONLY;
}
opts = ssh_parse_options(options, errp);
@@ -775,18 +881,13 @@ static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags,
}
/* Go non-blocking. */
- libssh2_session_set_blocking(s->session, 0);
+ ssh_set_blocking(s->session, 0);
qapi_free_BlockdevOptionsSsh(opts);
return 0;
err:
- if (s->sock >= 0) {
- close(s->sock);
- }
- s->sock = -1;
-
qapi_free_BlockdevOptionsSsh(opts);
return ret;
@@ -797,25 +898,25 @@ static int ssh_grow_file(BDRVSSHState *s, int64_t offset, Error **errp)
{
ssize_t ret;
char c[1] = { '\0' };
- int was_blocking = libssh2_session_get_blocking(s->session);
+ int was_blocking = ssh_is_blocking(s->session);
/* offset must be strictly greater than the current size so we do
* not overwrite anything */
- assert(offset > 0 && offset > s->attrs.filesize);
+ assert(offset > 0 && offset > s->attrs->size);
- libssh2_session_set_blocking(s->session, 1);
+ ssh_set_blocking(s->session, 1);
- libssh2_sftp_seek64(s->sftp_handle, offset - 1);
- ret = libssh2_sftp_write(s->sftp_handle, c, 1);
+ sftp_seek64(s->sftp_handle, offset - 1);
+ ret = sftp_write(s->sftp_handle, c, 1);
- libssh2_session_set_blocking(s->session, was_blocking);
+ ssh_set_blocking(s->session, was_blocking);
if (ret < 0) {
sftp_error_setg(errp, s, "Failed to grow file");
return -EIO;
}
- s->attrs.filesize = offset;
+ s->attrs->size = offset;
return 0;
}
@@ -843,8 +944,7 @@ static int ssh_co_create(BlockdevCreateOptions *options, Error **errp)
ssh_state_init(&s);
ret = connect_to_ssh(&s, opts->location,
- LIBSSH2_FXF_READ|LIBSSH2_FXF_WRITE|
- LIBSSH2_FXF_CREAT|LIBSSH2_FXF_TRUNC,
+ O_RDWR | O_CREAT | O_TRUNC,
0644, errp);
if (ret < 0) {
goto fail;
@@ -913,10 +1013,8 @@ static int ssh_has_zero_init(BlockDriverState *bs)
/* Assume false, unless we can positively prove it's true. */
int has_zero_init = 0;
- if (s->attrs.flags & LIBSSH2_SFTP_ATTR_PERMISSIONS) {
- if (s->attrs.permissions & LIBSSH2_SFTP_S_IFREG) {
- has_zero_init = 1;
- }
+ if (s->attrs->type == SSH_FILEXFER_TYPE_REGULAR) {
+ has_zero_init = 1;
}
return has_zero_init;
@@ -953,12 +1051,12 @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
.co = qemu_coroutine_self()
};
- r = libssh2_session_block_directions(s->session);
+ r = ssh_get_poll_flags(s->session);
- if (r & LIBSSH2_SESSION_BLOCK_INBOUND) {
+ if (r & SSH_READ_PENDING) {
rd_handler = restart_coroutine;
}
- if (r & LIBSSH2_SESSION_BLOCK_OUTBOUND) {
+ if (r & SSH_WRITE_PENDING) {
wr_handler = restart_coroutine;
}
@@ -970,33 +1068,6 @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
trace_ssh_co_yield_back(s->sock);
}
-/* SFTP has a function `libssh2_sftp_seek64' which seeks to a position
- * in the remote file. Notice that it just updates a field in the
- * sftp_handle structure, so there is no network traffic and it cannot
- * fail.
- *
- * However, `libssh2_sftp_seek64' does have a catastrophic effect on
- * performance since it causes the handle to throw away all in-flight
- * reads and buffered readahead data. Therefore this function tries
- * to be intelligent about when to call the underlying libssh2 function.
- */
-#define SSH_SEEK_WRITE 0
-#define SSH_SEEK_READ 1
-#define SSH_SEEK_FORCE 2
-
-static void ssh_seek(BDRVSSHState *s, int64_t offset, int flags)
-{
- bool op_read = (flags & SSH_SEEK_READ) != 0;
- bool force = (flags & SSH_SEEK_FORCE) != 0;
-
- if (force || op_read != s->offset_op_read || offset != s->offset) {
- trace_ssh_seek(offset);
- libssh2_sftp_seek64(s->sftp_handle, offset);
- s->offset = offset;
- s->offset_op_read = op_read;
- }
-}
-
static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
int64_t offset, size_t size,
QEMUIOVector *qiov)
@@ -1008,7 +1079,8 @@ static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
trace_ssh_read(offset, size);
- ssh_seek(s, offset, SSH_SEEK_READ);
+ trace_ssh_seek(offset);
+ sftp_seek64(s->sftp_handle, offset);
/* This keeps track of the current iovec element ('i'), where we
* will write to next ('buf'), and the end of the current iovec
@@ -1018,35 +1090,35 @@ static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
buf = i->iov_base;
end_of_vec = i->iov_base + i->iov_len;
- /* libssh2 has a hard-coded limit of 2000 bytes per request,
- * although it will also do readahead behind our backs. Therefore
- * we may have to do repeated reads here until we have read 'size'
- * bytes.
- */
for (got = 0; got < size; ) {
+ size_t request_read_size;
again:
- trace_ssh_read_buf(buf, end_of_vec - buf);
- r = libssh2_sftp_read(s->sftp_handle, buf, end_of_vec - buf);
- trace_ssh_read_return(r);
+ /*
+ * The size of SFTP packets is limited to 32K bytes, so limit
+ * the amount of data requested to 16K, as libssh currently
+ * does not handle multiple requests on its own.
+ */
+ request_read_size = MIN(end_of_vec - buf, 16384);
+ trace_ssh_read_buf(buf, end_of_vec - buf, request_read_size);
+ r = sftp_read(s->sftp_handle, buf, request_read_size);
+ trace_ssh_read_return(r, sftp_get_error(s->sftp));
- if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
+ if (r == SSH_AGAIN) {
co_yield(s, bs);
goto again;
}
- if (r < 0) {
- sftp_error_trace(s, "read");
- s->offset = -1;
- return -EIO;
- }
- if (r == 0) {
+ if (r == SSH_EOF || (r == 0 && sftp_get_error(s->sftp) == SSH_FX_EOF)) {
/* EOF: Short read so pad the buffer with zeroes and return it. */
qemu_iovec_memset(qiov, got, 0, size - got);
return 0;
}
+ if (r <= 0) {
+ sftp_error_trace(s, "read");
+ return -EIO;
+ }
got += r;
buf += r;
- s->offset += r;
if (buf >= end_of_vec && got < size) {
i++;
buf = i->iov_base;
@@ -1083,7 +1155,8 @@ static int ssh_write(BDRVSSHState *s, BlockDriverState *bs,
trace_ssh_write(offset, size);
- ssh_seek(s, offset, SSH_SEEK_WRITE);
+ trace_ssh_seek(offset);
+ sftp_seek64(s->sftp_handle, offset);
/* This keeps track of the current iovec element ('i'), where we
* will read from next ('buf'), and the end of the current iovec
@@ -1094,46 +1167,37 @@ static int ssh_write(BDRVSSHState *s, BlockDriverState *bs,
end_of_vec = i->iov_base + i->iov_len;
for (written = 0; written < size; ) {
+ size_t request_write_size;
again:
- trace_ssh_write_buf(buf, end_of_vec - buf);
- r = libssh2_sftp_write(s->sftp_handle, buf, end_of_vec - buf);
- trace_ssh_write_return(r);
+ /*
+ * Avoid too large data packets, as libssh currently does not
+ * handle multiple requests on its own.
+ */
+ request_write_size = MIN(end_of_vec - buf, 131072);
+ trace_ssh_write_buf(buf, end_of_vec - buf, request_write_size);
+ r = sftp_write(s->sftp_handle, buf, request_write_size);
+ trace_ssh_write_return(r, sftp_get_error(s->sftp));
- if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
+ if (r == SSH_AGAIN) {
co_yield(s, bs);
goto again;
}
if (r < 0) {
sftp_error_trace(s, "write");
- s->offset = -1;
return -EIO;
}
- /* The libssh2 API is very unclear about this. A comment in
- * the code says "nothing was acked, and no EAGAIN was
- * received!" which apparently means that no data got sent
- * out, and the underlying channel didn't return any EAGAIN
- * indication. I think this is a bug in either libssh2 or
- * OpenSSH (server-side). In any case, forcing a seek (to
- * discard libssh2 internal buffers), and then trying again
- * works for me.
- */
- if (r == 0) {
- ssh_seek(s, offset + written, SSH_SEEK_WRITE|SSH_SEEK_FORCE);
- co_yield(s, bs);
- goto again;
- }
written += r;
buf += r;
- s->offset += r;
if (buf >= end_of_vec && written < size) {
i++;
buf = i->iov_base;
end_of_vec = i->iov_base + i->iov_len;
}
- if (offset + written > s->attrs.filesize)
- s->attrs.filesize = offset + written;
+ if (offset + written > s->attrs->size) {
+ s->attrs->size = offset + written;
+ }
}
return 0;
@@ -1168,24 +1232,24 @@ static void unsafe_flush_warning(BDRVSSHState *s, const char *what)
}
}
-#ifdef HAS_LIBSSH2_SFTP_FSYNC
+#ifdef HAVE_LIBSSH_0_8
static coroutine_fn int ssh_flush(BDRVSSHState *s, BlockDriverState *bs)
{
int r;
trace_ssh_flush();
+
+ if (!sftp_extension_supported(s->sftp, "fsync@openssh.com", "1")) {
+ unsafe_flush_warning(s, "OpenSSH >= 6.3");
+ return 0;
+ }
again:
- r = libssh2_sftp_fsync(s->sftp_handle);
- if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
+ r = sftp_fsync(s->sftp_handle);
+ if (r == SSH_AGAIN) {
co_yield(s, bs);
goto again;
}
- if (r == LIBSSH2_ERROR_SFTP_PROTOCOL &&
- libssh2_sftp_last_error(s->sftp) == LIBSSH2_FX_OP_UNSUPPORTED) {
- unsafe_flush_warning(s, "OpenSSH >= 6.3");
- return 0;
- }
if (r < 0) {
sftp_error_trace(s, "fsync");
return -EIO;
@@ -1206,25 +1270,25 @@ static coroutine_fn int ssh_co_flush(BlockDriverState *bs)
return ret;
}
-#else /* !HAS_LIBSSH2_SFTP_FSYNC */
+#else /* !HAVE_LIBSSH_0_8 */
static coroutine_fn int ssh_co_flush(BlockDriverState *bs)
{
BDRVSSHState *s = bs->opaque;
- unsafe_flush_warning(s, "libssh2 >= 1.4.4");
+ unsafe_flush_warning(s, "libssh >= 0.8.0");
return 0;
}
-#endif /* !HAS_LIBSSH2_SFTP_FSYNC */
+#endif /* !HAVE_LIBSSH_0_8 */
static int64_t ssh_getlength(BlockDriverState *bs)
{
BDRVSSHState *s = bs->opaque;
int64_t length;
- /* Note we cannot make a libssh2 call here. */
- length = (int64_t) s->attrs.filesize;
+ /* Note we cannot make a libssh call here. */
+ length = (int64_t) s->attrs->size;
trace_ssh_getlength(length);
return length;
@@ -1241,12 +1305,12 @@ static int coroutine_fn ssh_co_truncate(BlockDriverState *bs, int64_t offset,
return -ENOTSUP;
}
- if (offset < s->attrs.filesize) {
+ if (offset < s->attrs->size) {
error_setg(errp, "ssh driver does not support shrinking files");
return -ENOTSUP;
}
- if (offset == s->attrs.filesize) {
+ if (offset == s->attrs->size) {
return 0;
}
@@ -1341,12 +1405,16 @@ static void bdrv_ssh_init(void)
{
int r;
- r = libssh2_init(0);
+ r = ssh_init();
if (r != 0) {
- fprintf(stderr, "libssh2 initialization failed, %d\n", r);
+ fprintf(stderr, "libssh initialization failed, %d\n", r);
exit(EXIT_FAILURE);
}
+#if TRACE_LIBSSH != 0
+ ssh_set_log_level(TRACE_LIBSSH);
+#endif
+
bdrv_register(&bdrv_ssh);
}
diff --git a/block/trace-events b/block/trace-events
index 9ccea755da..d724df0117 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -171,19 +171,21 @@ nbd_client_connect_success(const char *export_name) "export '%s'"
# ssh.c
ssh_restart_coroutine(void *co) "co=%p"
ssh_flush(void) "fsync"
-ssh_check_host_key_knownhosts(const char *key) "host key OK: %s"
+ssh_check_host_key_knownhosts(void) "host key OK"
ssh_connect_to_ssh(char *path, int flags, int mode) "opening file %s flags=0x%x creat_mode=0%o"
ssh_co_yield(int sock, void *rd_handler, void *wr_handler) "s->sock=%d rd_handler=%p wr_handler=%p"
ssh_co_yield_back(int sock) "s->sock=%d - back"
ssh_getlength(int64_t length) "length=%" PRIi64
ssh_co_create_opts(uint64_t size) "total_size=%" PRIu64
ssh_read(int64_t offset, size_t size) "offset=%" PRIi64 " size=%zu"
-ssh_read_buf(void *buf, size_t size) "sftp_read buf=%p size=%zu"
-ssh_read_return(ssize_t ret) "sftp_read returned %zd"
+ssh_read_buf(void *buf, size_t size, size_t actual_size) "sftp_read buf=%p size=%zu (actual size=%zu)"
+ssh_read_return(ssize_t ret, int sftp_err) "sftp_read returned %zd (sftp error=%d)"
ssh_write(int64_t offset, size_t size) "offset=%" PRIi64 " size=%zu"
-ssh_write_buf(void *buf, size_t size) "sftp_write buf=%p size=%zu"
-ssh_write_return(ssize_t ret) "sftp_write returned %zd"
+ssh_write_buf(void *buf, size_t size, size_t actual_size) "sftp_write buf=%p size=%zu (actual size=%zu)"
+ssh_write_return(ssize_t ret, int sftp_err) "sftp_write returned %zd (sftp error=%d)"
ssh_seek(int64_t offset) "seeking to offset=%" PRIi64
+ssh_auth_methods(int methods) "auth methods=0x%x"
+ssh_server_status(int status) "server status=%d"
# curl.c
curl_timer_cb(long timeout_ms) "timer callback timeout_ms %ld"
@@ -216,4 +218,4 @@ sheepdog_snapshot_create(const char *sn_name, const char *id) "%s %s"
sheepdog_snapshot_create_inode(const char *name, uint32_t snap, uint32_t vdi) "s->inode: name %s snap_id 0x%" PRIx32 " vdi 0x%" PRIx32
# ssh.c
-sftp_error(const char *op, const char *ssh_err, int ssh_err_code, unsigned long sftp_err_code) "%s failed: %s (libssh2 error code: %d, sftp error code: %lu)"
+sftp_error(const char *op, const char *ssh_err, int ssh_err_code, int sftp_err_code) "%s failed: %s (libssh error code: %d, sftp error code: %d)"
diff --git a/block/vmdk.c b/block/vmdk.c
index 51067c774f..bd36ece125 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -91,6 +91,44 @@ typedef struct {
uint16_t compressAlgorithm;
} QEMU_PACKED VMDK4Header;
+typedef struct VMDKSESparseConstHeader {
+ uint64_t magic;
+ uint64_t version;
+ uint64_t capacity;
+ uint64_t grain_size;
+ uint64_t grain_table_size;
+ uint64_t flags;
+ uint64_t reserved1;
+ uint64_t reserved2;
+ uint64_t reserved3;
+ uint64_t reserved4;
+ uint64_t volatile_header_offset;
+ uint64_t volatile_header_size;
+ uint64_t journal_header_offset;
+ uint64_t journal_header_size;
+ uint64_t journal_offset;
+ uint64_t journal_size;
+ uint64_t grain_dir_offset;
+ uint64_t grain_dir_size;
+ uint64_t grain_tables_offset;
+ uint64_t grain_tables_size;
+ uint64_t free_bitmap_offset;
+ uint64_t free_bitmap_size;
+ uint64_t backmap_offset;
+ uint64_t backmap_size;
+ uint64_t grains_offset;
+ uint64_t grains_size;
+ uint8_t pad[304];
+} QEMU_PACKED VMDKSESparseConstHeader;
+
+typedef struct VMDKSESparseVolatileHeader {
+ uint64_t magic;
+ uint64_t free_gt_number;
+ uint64_t next_txn_seq_number;
+ uint64_t replay_journal;
+ uint8_t pad[480];
+} QEMU_PACKED VMDKSESparseVolatileHeader;
+
#define L2_CACHE_SIZE 16
typedef struct VmdkExtent {
@@ -99,19 +137,23 @@ typedef struct VmdkExtent {
bool compressed;
bool has_marker;
bool has_zero_grain;
+ bool sesparse;
+ uint64_t sesparse_l2_tables_offset;
+ uint64_t sesparse_clusters_offset;
+ int32_t entry_size;
int version;
int64_t sectors;
int64_t end_sector;
int64_t flat_start_offset;
int64_t l1_table_offset;
int64_t l1_backup_table_offset;
- uint32_t *l1_table;
+ void *l1_table;
uint32_t *l1_backup_table;
unsigned int l1_size;
uint32_t l1_entry_sectors;
unsigned int l2_size;
- uint32_t *l2_cache;
+ void *l2_cache;
uint32_t l2_cache_offsets[L2_CACHE_SIZE];
uint32_t l2_cache_counts[L2_CACHE_SIZE];
@@ -425,11 +467,22 @@ static int vmdk_add_extent(BlockDriverState *bs,
error_setg(errp, "Invalid granularity, image may be corrupt");
return -EFBIG;
}
- if (l1_size > 512 * 1024 * 1024) {
- /* Although with big capacity and small l1_entry_sectors, we can get a
+ if (l1_size > 32 * 1024 * 1024) {
+ /*
+ * Although with big capacity and small l1_entry_sectors, we can get a
* big l1_size, we don't want unbounded value to allocate the table.
- * Limit it to 512M, which is 16PB for default cluster and L2 table
- * size */
+ * Limit it to 32M, which is enough to store:
+ * 8TB - for both VMDK3 & VMDK4 with
+ * minimal cluster size: 512B
+ * minimal L2 table size: 512 entries
+ * 8 TB is still more than the maximal value supported for
+ * VMDK3 & VMDK4 which is 2TB.
+ * 64TB - for "ESXi seSparse Extent"
+ * minimal cluster size: 512B (default is 4KB)
+ * L2 table size: 4096 entries (const).
+ * 64TB is more than the maximal value supported for
+ * seSparse VMDKs (which is slightly less than 64TB)
+ */
error_setg(errp, "L1 size too big");
return -EFBIG;
}
@@ -454,6 +507,7 @@ static int vmdk_add_extent(BlockDriverState *bs,
extent->l2_size = l2_size;
extent->cluster_sectors = flat ? sectors : cluster_sectors;
extent->next_cluster_sector = ROUND_UP(nb_sectors, cluster_sectors);
+ extent->entry_size = sizeof(uint32_t);
if (s->num_extents > 1) {
extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
@@ -475,7 +529,7 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
int i;
/* read the L1 table */
- l1_size = extent->l1_size * sizeof(uint32_t);
+ l1_size = extent->l1_size * extent->entry_size;
extent->l1_table = g_try_malloc(l1_size);
if (l1_size && extent->l1_table == NULL) {
return -ENOMEM;
@@ -493,10 +547,16 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
goto fail_l1;
}
for (i = 0; i < extent->l1_size; i++) {
- le32_to_cpus(&extent->l1_table[i]);
+ if (extent->entry_size == sizeof(uint64_t)) {
+ le64_to_cpus((uint64_t *)extent->l1_table + i);
+ } else {
+ assert(extent->entry_size == sizeof(uint32_t));
+ le32_to_cpus((uint32_t *)extent->l1_table + i);
+ }
}
if (extent->l1_backup_table_offset) {
+ assert(!extent->sesparse);
extent->l1_backup_table = g_try_malloc(l1_size);
if (l1_size && extent->l1_backup_table == NULL) {
ret = -ENOMEM;
@@ -519,7 +579,7 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
}
extent->l2_cache =
- g_new(uint32_t, extent->l2_size * L2_CACHE_SIZE);
+ g_malloc(extent->entry_size * extent->l2_size * L2_CACHE_SIZE);
return 0;
fail_l1b:
g_free(extent->l1_backup_table);
@@ -565,6 +625,205 @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
return ret;
}
+#define SESPARSE_CONST_HEADER_MAGIC UINT64_C(0x00000000cafebabe)
+#define SESPARSE_VOLATILE_HEADER_MAGIC UINT64_C(0x00000000cafecafe)
+
+/* Strict checks - format not officially documented */
+static int check_se_sparse_const_header(VMDKSESparseConstHeader *header,
+ Error **errp)
+{
+ header->magic = le64_to_cpu(header->magic);
+ header->version = le64_to_cpu(header->version);
+ header->grain_size = le64_to_cpu(header->grain_size);
+ header->grain_table_size = le64_to_cpu(header->grain_table_size);
+ header->flags = le64_to_cpu(header->flags);
+ header->reserved1 = le64_to_cpu(header->reserved1);
+ header->reserved2 = le64_to_cpu(header->reserved2);
+ header->reserved3 = le64_to_cpu(header->reserved3);
+ header->reserved4 = le64_to_cpu(header->reserved4);
+
+ header->volatile_header_offset =
+ le64_to_cpu(header->volatile_header_offset);
+ header->volatile_header_size = le64_to_cpu(header->volatile_header_size);
+
+ header->journal_header_offset = le64_to_cpu(header->journal_header_offset);
+ header->journal_header_size = le64_to_cpu(header->journal_header_size);
+
+ header->journal_offset = le64_to_cpu(header->journal_offset);
+ header->journal_size = le64_to_cpu(header->journal_size);
+
+ header->grain_dir_offset = le64_to_cpu(header->grain_dir_offset);
+ header->grain_dir_size = le64_to_cpu(header->grain_dir_size);
+
+ header->grain_tables_offset = le64_to_cpu(header->grain_tables_offset);
+ header->grain_tables_size = le64_to_cpu(header->grain_tables_size);
+
+ header->free_bitmap_offset = le64_to_cpu(header->free_bitmap_offset);
+ header->free_bitmap_size = le64_to_cpu(header->free_bitmap_size);
+
+ header->backmap_offset = le64_to_cpu(header->backmap_offset);
+ header->backmap_size = le64_to_cpu(header->backmap_size);
+
+ header->grains_offset = le64_to_cpu(header->grains_offset);
+ header->grains_size = le64_to_cpu(header->grains_size);
+
+ if (header->magic != SESPARSE_CONST_HEADER_MAGIC) {
+ error_setg(errp, "Bad const header magic: 0x%016" PRIx64,
+ header->magic);
+ return -EINVAL;
+ }
+
+ if (header->version != 0x0000000200000001) {
+ error_setg(errp, "Unsupported version: 0x%016" PRIx64,
+ header->version);
+ return -ENOTSUP;
+ }
+
+ if (header->grain_size != 8) {
+ error_setg(errp, "Unsupported grain size: %" PRIu64,
+ header->grain_size);
+ return -ENOTSUP;
+ }
+
+ if (header->grain_table_size != 64) {
+ error_setg(errp, "Unsupported grain table size: %" PRIu64,
+ header->grain_table_size);
+ return -ENOTSUP;
+ }
+
+ if (header->flags != 0) {
+ error_setg(errp, "Unsupported flags: 0x%016" PRIx64,
+ header->flags);
+ return -ENOTSUP;
+ }
+
+ if (header->reserved1 != 0 || header->reserved2 != 0 ||
+ header->reserved3 != 0 || header->reserved4 != 0) {
+ error_setg(errp, "Unsupported reserved bits:"
+ " 0x%016" PRIx64 " 0x%016" PRIx64
+ " 0x%016" PRIx64 " 0x%016" PRIx64,
+ header->reserved1, header->reserved2,
+ header->reserved3, header->reserved4);
+ return -ENOTSUP;
+ }
+
+ /* check that padding is 0 */
+ if (!buffer_is_zero(header->pad, sizeof(header->pad))) {
+ error_setg(errp, "Unsupported non-zero const header padding");
+ return -ENOTSUP;
+ }
+
+ return 0;
+}
+
+static int check_se_sparse_volatile_header(VMDKSESparseVolatileHeader *header,
+ Error **errp)
+{
+ header->magic = le64_to_cpu(header->magic);
+ header->free_gt_number = le64_to_cpu(header->free_gt_number);
+ header->next_txn_seq_number = le64_to_cpu(header->next_txn_seq_number);
+ header->replay_journal = le64_to_cpu(header->replay_journal);
+
+ if (header->magic != SESPARSE_VOLATILE_HEADER_MAGIC) {
+ error_setg(errp, "Bad volatile header magic: 0x%016" PRIx64,
+ header->magic);
+ return -EINVAL;
+ }
+
+ if (header->replay_journal) {
+ error_setg(errp, "Image is dirty, Replaying journal not supported");
+ return -ENOTSUP;
+ }
+
+ /* check that padding is 0 */
+ if (!buffer_is_zero(header->pad, sizeof(header->pad))) {
+ error_setg(errp, "Unsupported non-zero volatile header padding");
+ return -ENOTSUP;
+ }
+
+ return 0;
+}
+
+static int vmdk_open_se_sparse(BlockDriverState *bs,
+ BdrvChild *file,
+ int flags, Error **errp)
+{
+ int ret;
+ VMDKSESparseConstHeader const_header;
+ VMDKSESparseVolatileHeader volatile_header;
+ VmdkExtent *extent;
+
+ ret = bdrv_apply_auto_read_only(bs,
+ "No write support for seSparse images available", errp);
+ if (ret < 0) {
+ return ret;
+ }
+
+ assert(sizeof(const_header) == SECTOR_SIZE);
+
+ ret = bdrv_pread(file, 0, &const_header, sizeof(const_header));
+ if (ret < 0) {
+ bdrv_refresh_filename(file->bs);
+ error_setg_errno(errp, -ret,
+ "Could not read const header from file '%s'",
+ file->bs->filename);
+ return ret;
+ }
+
+ /* check const header */
+ ret = check_se_sparse_const_header(&const_header, errp);
+ if (ret < 0) {
+ return ret;
+ }
+
+ assert(sizeof(volatile_header) == SECTOR_SIZE);
+
+ ret = bdrv_pread(file,
+ const_header.volatile_header_offset * SECTOR_SIZE,
+ &volatile_header, sizeof(volatile_header));
+ if (ret < 0) {
+ bdrv_refresh_filename(file->bs);
+ error_setg_errno(errp, -ret,
+ "Could not read volatile header from file '%s'",
+ file->bs->filename);
+ return ret;
+ }
+
+ /* check volatile header */
+ ret = check_se_sparse_volatile_header(&volatile_header, errp);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = vmdk_add_extent(bs, file, false,
+ const_header.capacity,
+ const_header.grain_dir_offset * SECTOR_SIZE,
+ 0,
+ const_header.grain_dir_size *
+ SECTOR_SIZE / sizeof(uint64_t),
+ const_header.grain_table_size *
+ SECTOR_SIZE / sizeof(uint64_t),
+ const_header.grain_size,
+ &extent,
+ errp);
+ if (ret < 0) {
+ return ret;
+ }
+
+ extent->sesparse = true;
+ extent->sesparse_l2_tables_offset = const_header.grain_tables_offset;
+ extent->sesparse_clusters_offset = const_header.grains_offset;
+ extent->entry_size = sizeof(uint64_t);
+
+ ret = vmdk_init_tables(bs, extent, errp);
+ if (ret) {
+ /* free extent allocated by vmdk_add_extent */
+ vmdk_free_last_extent(bs);
+ }
+
+ return ret;
+}
+
static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
QDict *options, Error **errp);
@@ -842,6 +1101,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
* RW [size in sectors] SPARSE "file-name.vmdk"
* RW [size in sectors] VMFS "file-name.vmdk"
* RW [size in sectors] VMFSSPARSE "file-name.vmdk"
+ * RW [size in sectors] SESPARSE "file-name.vmdk"
*/
flat_offset = -1;
matches = sscanf(p, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64,
@@ -864,7 +1124,8 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
if (sectors <= 0 ||
(strcmp(type, "FLAT") && strcmp(type, "SPARSE") &&
- strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE")) ||
+ strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE") &&
+ strcmp(type, "SESPARSE")) ||
(strcmp(access, "RW"))) {
continue;
}
@@ -917,6 +1178,13 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
return ret;
}
extent = &s->extents[s->num_extents - 1];
+ } else if (!strcmp(type, "SESPARSE")) {
+ ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp);
+ if (ret) {
+ bdrv_unref_child(bs, extent_file);
+ return ret;
+ }
+ extent = &s->extents[s->num_extents - 1];
} else {
error_setg(errp, "Unsupported extent type '%s'", type);
bdrv_unref_child(bs, extent_file);
@@ -951,6 +1219,7 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
if (strcmp(ct, "monolithicFlat") &&
strcmp(ct, "vmfs") &&
strcmp(ct, "vmfsSparse") &&
+ strcmp(ct, "seSparse") &&
strcmp(ct, "twoGbMaxExtentSparse") &&
strcmp(ct, "twoGbMaxExtentFlat")) {
error_setg(errp, "Unsupported image type '%s'", ct);
@@ -1201,10 +1470,12 @@ static int get_cluster_offset(BlockDriverState *bs,
{
unsigned int l1_index, l2_offset, l2_index;
int min_index, i, j;
- uint32_t min_count, *l2_table;
+ uint32_t min_count;
+ void *l2_table;
bool zeroed = false;
int64_t ret;
int64_t cluster_sector;
+ unsigned int l2_size_bytes = extent->l2_size * extent->entry_size;
if (m_data) {
m_data->valid = 0;
@@ -1219,7 +1490,36 @@ static int get_cluster_offset(BlockDriverState *bs,
if (l1_index >= extent->l1_size) {
return VMDK_ERROR;
}
- l2_offset = extent->l1_table[l1_index];
+ if (extent->sesparse) {
+ uint64_t l2_offset_u64;
+
+ assert(extent->entry_size == sizeof(uint64_t));
+
+ l2_offset_u64 = ((uint64_t *)extent->l1_table)[l1_index];
+ if (l2_offset_u64 == 0) {
+ l2_offset = 0;
+ } else if ((l2_offset_u64 & 0xffffffff00000000) != 0x1000000000000000) {
+ /*
+ * Top most nibble is 0x1 if grain table is allocated.
+ * strict check - top most 4 bytes must be 0x10000000 since max
+ * supported size is 64TB for disk - so no more than 64TB / 16MB
+ * grain directories which is smaller than uint32,
+ * where 16MB is the only supported default grain table coverage.
+ */
+ return VMDK_ERROR;
+ } else {
+ l2_offset_u64 = l2_offset_u64 & 0x00000000ffffffff;
+ l2_offset_u64 = extent->sesparse_l2_tables_offset +
+ l2_offset_u64 * l2_size_bytes / SECTOR_SIZE;
+ if (l2_offset_u64 > 0x00000000ffffffff) {
+ return VMDK_ERROR;
+ }
+ l2_offset = (unsigned int)(l2_offset_u64);
+ }
+ } else {
+ assert(extent->entry_size == sizeof(uint32_t));
+ l2_offset = ((uint32_t *)extent->l1_table)[l1_index];
+ }
if (!l2_offset) {
return VMDK_UNALLOC;
}
@@ -1231,7 +1531,7 @@ static int get_cluster_offset(BlockDriverState *bs,
extent->l2_cache_counts[j] >>= 1;
}
}
- l2_table = extent->l2_cache + (i * extent->l2_size);
+ l2_table = (char *)extent->l2_cache + (i * l2_size_bytes);
goto found;
}
}
@@ -1244,13 +1544,13 @@ static int get_cluster_offset(BlockDriverState *bs,
min_index = i;
}
}
- l2_table = extent->l2_cache + (min_index * extent->l2_size);
+ l2_table = (char *)extent->l2_cache + (min_index * l2_size_bytes);
BLKDBG_EVENT(extent->file, BLKDBG_L2_LOAD);
if (bdrv_pread(extent->file,
(int64_t)l2_offset * 512,
l2_table,
- extent->l2_size * sizeof(uint32_t)
- ) != extent->l2_size * sizeof(uint32_t)) {
+ l2_size_bytes
+ ) != l2_size_bytes) {
return VMDK_ERROR;
}
@@ -1258,16 +1558,45 @@ static int get_cluster_offset(BlockDriverState *bs,
extent->l2_cache_counts[min_index] = 1;
found:
l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
- cluster_sector = le32_to_cpu(l2_table[l2_index]);
- if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
- zeroed = true;
+ if (extent->sesparse) {
+ cluster_sector = le64_to_cpu(((uint64_t *)l2_table)[l2_index]);
+ switch (cluster_sector & 0xf000000000000000) {
+ case 0x0000000000000000:
+ /* unallocated grain */
+ if (cluster_sector != 0) {
+ return VMDK_ERROR;
+ }
+ break;
+ case 0x1000000000000000:
+ /* scsi-unmapped grain - fallthrough */
+ case 0x2000000000000000:
+ /* zero grain */
+ zeroed = true;
+ break;
+ case 0x3000000000000000:
+ /* allocated grain */
+ cluster_sector = (((cluster_sector & 0x0fff000000000000) >> 48) |
+ ((cluster_sector & 0x0000ffffffffffff) << 12));
+ cluster_sector = extent->sesparse_clusters_offset +
+ cluster_sector * extent->cluster_sectors;
+ break;
+ default:
+ return VMDK_ERROR;
+ }
+ } else {
+ cluster_sector = le32_to_cpu(((uint32_t *)l2_table)[l2_index]);
+
+ if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
+ zeroed = true;
+ }
}
if (!cluster_sector || zeroed) {
if (!allocate) {
return zeroed ? VMDK_ZEROED : VMDK_UNALLOC;
}
+ assert(!extent->sesparse);
if (extent->next_cluster_sector >= VMDK_EXTENT_MAX_SECTORS) {
return VMDK_ERROR;
@@ -1291,7 +1620,7 @@ static int get_cluster_offset(BlockDriverState *bs,
m_data->l1_index = l1_index;
m_data->l2_index = l2_index;
m_data->l2_offset = l2_offset;
- m_data->l2_cache_entry = &l2_table[l2_index];
+ m_data->l2_cache_entry = ((uint32_t *)l2_table) + l2_index;
}
}
*cluster_offset = cluster_sector << BDRV_SECTOR_BITS;
@@ -1617,6 +1946,9 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
if (!extent) {
return -EIO;
}
+ if (extent->sesparse) {
+ return -ENOTSUP;
+ }
offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset);
n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE
- offset_in_cluster);
diff --git a/blockdev.c b/blockdev.c
index 5d6a13dea9..4d141e9a1f 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1774,7 +1774,7 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
backup = common->action->u.drive_backup.data;
- bs = qmp_get_root_bs(backup->device, errp);
+ bs = bdrv_lookup_bs(backup->device, backup->device, errp);
if (!bs) {
return;
}
diff --git a/configure b/configure
index b091b82cb3..5c7914570e 100755
--- a/configure
+++ b/configure
@@ -472,7 +472,7 @@ auth_pam=""
vte=""
virglrenderer=""
tpm=""
-libssh2=""
+libssh=""
live_block_migration="yes"
numa=""
tcmalloc="no"
@@ -1439,9 +1439,9 @@ for opt do
;;
--enable-tpm) tpm="yes"
;;
- --disable-libssh2) libssh2="no"
+ --disable-libssh) libssh="no"
;;
- --enable-libssh2) libssh2="yes"
+ --enable-libssh) libssh="yes"
;;
--disable-live-block-migration) live_block_migration="no"
;;
@@ -1810,7 +1810,7 @@ disabled with --disable-FEATURE, default is enabled if available:
coroutine-pool coroutine freelist (better performance)
glusterfs GlusterFS backend
tpm TPM support
- libssh2 ssh block device support
+ libssh ssh block device support
numa libnuma support
libxml2 for Parallels image format
tcmalloc tcmalloc support
@@ -3914,43 +3914,34 @@ EOF
fi
##########################################
-# libssh2 probe
-min_libssh2_version=1.2.8
-if test "$libssh2" != "no" ; then
- if $pkg_config --atleast-version=$min_libssh2_version libssh2; then
- libssh2_cflags=$($pkg_config libssh2 --cflags)
- libssh2_libs=$($pkg_config libssh2 --libs)
- libssh2=yes
+# libssh probe
+if test "$libssh" != "no" ; then
+ if $pkg_config --exists libssh; then
+ libssh_cflags=$($pkg_config libssh --cflags)
+ libssh_libs=$($pkg_config libssh --libs)
+ libssh=yes
else
- if test "$libssh2" = "yes" ; then
- error_exit "libssh2 >= $min_libssh2_version required for --enable-libssh2"
+ if test "$libssh" = "yes" ; then
+ error_exit "libssh required for --enable-libssh"
fi
- libssh2=no
+ libssh=no
fi
fi
##########################################
-# libssh2_sftp_fsync probe
+# Check for libssh 0.8
+# This is done like this instead of using the LIBSSH_VERSION_* and
+# SSH_VERSION_* macros because some distributions in the past shipped
+# snapshots of the future 0.8 from Git, and those snapshots did not
+# have updated version numbers (still referring to 0.7.0).
-if test "$libssh2" = "yes"; then
+if test "$libssh" = "yes"; then
cat > $TMPC <<EOF
-#include <stdio.h>
-#include <libssh2.h>
-#include <libssh2_sftp.h>
-int main(void) {
- LIBSSH2_SESSION *session;
- LIBSSH2_SFTP *sftp;
- LIBSSH2_SFTP_HANDLE *sftp_handle;
- session = libssh2_session_init ();
- sftp = libssh2_sftp_init (session);
- sftp_handle = libssh2_sftp_open (sftp, "/", 0, 0);
- libssh2_sftp_fsync (sftp_handle);
- return 0;
-}
+#include <libssh/libssh.h>
+int main(void) { return ssh_get_server_publickey(NULL, NULL); }
EOF
- # libssh2_cflags/libssh2_libs defined in previous test.
- if compile_prog "$libssh2_cflags" "$libssh2_libs" ; then
- QEMU_CFLAGS="-DHAS_LIBSSH2_SFTP_FSYNC $QEMU_CFLAGS"
+ if compile_prog "$libssh_cflags" "$libssh_libs"; then
+ libssh_cflags="-DHAVE_LIBSSH_0_8 $libssh_cflags"
fi
fi
@@ -6451,7 +6442,7 @@ echo "GlusterFS support $glusterfs"
echo "gcov $gcov_tool"
echo "gcov enabled $gcov"
echo "TPM support $tpm"
-echo "libssh2 support $libssh2"
+echo "libssh support $libssh"
echo "QOM debugging $qom_cast_debug"
echo "Live block migration $live_block_migration"
echo "lzo support $lzo"
@@ -7144,10 +7135,10 @@ if test "$glusterfs_iocb_has_stat" = "yes" ; then
echo "CONFIG_GLUSTERFS_IOCB_HAS_STAT=y" >> $config_host_mak
fi
-if test "$libssh2" = "yes" ; then
- echo "CONFIG_LIBSSH2=m" >> $config_host_mak
- echo "LIBSSH2_CFLAGS=$libssh2_cflags" >> $config_host_mak
- echo "LIBSSH2_LIBS=$libssh2_libs" >> $config_host_mak
+if test "$libssh" = "yes" ; then
+ echo "CONFIG_LIBSSH=m" >> $config_host_mak
+ echo "LIBSSH_CFLAGS=$libssh_cflags" >> $config_host_mak
+ echo "LIBSSH_LIBS=$libssh_libs" >> $config_host_mak
fi
if test "$live_block_migration" = "yes" ; then
diff --git a/docs/qemu-block-drivers.texi b/docs/qemu-block-drivers.texi
index da06a9bc83..91ab0eceae 100644
--- a/docs/qemu-block-drivers.texi
+++ b/docs/qemu-block-drivers.texi
@@ -782,7 +782,7 @@ print a warning when @code{fsync} is not supported:
warning: ssh server @code{ssh.example.com:22} does not support fsync
-With sufficiently new versions of libssh2 and OpenSSH, @code{fsync} is
+With sufficiently new versions of libssh and OpenSSH, @code{fsync} is
supported.
@node disk_images_nvme
diff --git a/hw/9pfs/xen-9pfs.h b/hw/9pfs/xen-9pfs.h
index fbdee3d843..241e2216a4 100644
--- a/hw/9pfs/xen-9pfs.h
+++ b/hw/9pfs/xen-9pfs.h
@@ -13,8 +13,8 @@
#ifndef HW_9PFS_XEN_9PFS_H
#define HW_9PFS_XEN_9PFS_H
-#include <xen/io/protocols.h>
-#include "hw/xen/io/ring.h"
+#include "hw/xen/interface/io/protocols.h"
+#include "hw/xen/interface/io/ring.h"
/*
* Do not merge into xen-9p-backend.c: clang doesn't allow unused static
diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
index f7ad452bbd..0f200c5fb0 100644
--- a/hw/block/dataplane/xen-block.c
+++ b/hw/block/dataplane/xen-block.c
@@ -58,6 +58,7 @@ struct XenBlockDataPlane {
int requests_inflight;
unsigned int max_requests;
BlockBackend *blk;
+ unsigned int sector_size;
QEMUBH *bh;
IOThread *iothread;
AioContext *ctx;
@@ -167,7 +168,7 @@ static int xen_block_parse_request(XenBlockRequest *request)
goto err;
}
- request->start = request->req.sector_number * XEN_BLKIF_SECTOR_SIZE;
+ request->start = request->req.sector_number * dataplane->sector_size;
for (i = 0; i < request->req.nr_segments; i++) {
if (i == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
error_report("error: nr_segments too big");
@@ -177,14 +178,14 @@ static int xen_block_parse_request(XenBlockRequest *request)
error_report("error: first > last sector");
goto err;
}
- if (request->req.seg[i].last_sect * XEN_BLKIF_SECTOR_SIZE >=
+ if (request->req.seg[i].last_sect * dataplane->sector_size >=
XC_PAGE_SIZE) {
error_report("error: page crossing");
goto err;
}
len = (request->req.seg[i].last_sect -
- request->req.seg[i].first_sect + 1) * XEN_BLKIF_SECTOR_SIZE;
+ request->req.seg[i].first_sect + 1) * dataplane->sector_size;
request->size += len;
}
if (request->start + request->size > blk_getlength(dataplane->blk)) {
@@ -218,17 +219,17 @@ static int xen_block_copy_request(XenBlockRequest *request)
if (to_domain) {
segs[i].dest.foreign.ref = request->req.seg[i].gref;
segs[i].dest.foreign.offset = request->req.seg[i].first_sect *
- XEN_BLKIF_SECTOR_SIZE;
+ dataplane->sector_size;
segs[i].source.virt = virt;
} else {
segs[i].source.foreign.ref = request->req.seg[i].gref;
segs[i].source.foreign.offset = request->req.seg[i].first_sect *
- XEN_BLKIF_SECTOR_SIZE;
+ dataplane->sector_size;
segs[i].dest.virt = virt;
}
segs[i].len = (request->req.seg[i].last_sect -
request->req.seg[i].first_sect + 1) *
- XEN_BLKIF_SECTOR_SIZE;
+ dataplane->sector_size;
virt += segs[i].len;
}
@@ -317,7 +318,9 @@ static void xen_block_complete_aio(void *opaque, int ret)
}
xen_block_release_request(request);
- qemu_bh_schedule(dataplane->bh);
+ if (dataplane->more_work) {
+ qemu_bh_schedule(dataplane->bh);
+ }
done:
aio_context_release(dataplane->ctx);
@@ -336,12 +339,12 @@ static bool xen_block_split_discard(XenBlockRequest *request,
/* Wrap around, or overflowing byte limit? */
if (sec_start + sec_count < sec_count ||
- sec_start + sec_count > INT64_MAX / XEN_BLKIF_SECTOR_SIZE) {
+ sec_start + sec_count > INT64_MAX / dataplane->sector_size) {
return false;
}
- byte_offset = sec_start * XEN_BLKIF_SECTOR_SIZE;
- byte_remaining = sec_count * XEN_BLKIF_SECTOR_SIZE;
+ byte_offset = sec_start * dataplane->sector_size;
+ byte_remaining = sec_count * dataplane->sector_size;
do {
byte_chunk = byte_remaining > BDRV_REQUEST_MAX_BYTES ?
@@ -514,12 +517,13 @@ static int xen_block_get_request(XenBlockDataPlane *dataplane,
*/
#define IO_PLUG_THRESHOLD 1
-static void xen_block_handle_requests(XenBlockDataPlane *dataplane)
+static bool xen_block_handle_requests(XenBlockDataPlane *dataplane)
{
RING_IDX rc, rp;
XenBlockRequest *request;
int inflight_atstart = dataplane->requests_inflight;
int batched = 0;
+ bool done_something = false;
dataplane->more_work = 0;
@@ -551,6 +555,7 @@ static void xen_block_handle_requests(XenBlockDataPlane *dataplane)
}
xen_block_get_request(dataplane, request, rc);
dataplane->rings.common.req_cons = ++rc;
+ done_something = true;
/* parse them */
if (xen_block_parse_request(request) != 0) {
@@ -602,10 +607,7 @@ static void xen_block_handle_requests(XenBlockDataPlane *dataplane)
blk_io_unplug(dataplane->blk);
}
- if (dataplane->more_work &&
- dataplane->requests_inflight < dataplane->max_requests) {
- qemu_bh_schedule(dataplane->bh);
- }
+ return done_something;
}
static void xen_block_dataplane_bh(void *opaque)
@@ -617,21 +619,23 @@ static void xen_block_dataplane_bh(void *opaque)
aio_context_release(dataplane->ctx);
}
-static void xen_block_dataplane_event(void *opaque)
+static bool xen_block_dataplane_event(void *opaque)
{
XenBlockDataPlane *dataplane = opaque;
- qemu_bh_schedule(dataplane->bh);
+ return xen_block_handle_requests(dataplane);
}
XenBlockDataPlane *xen_block_dataplane_create(XenDevice *xendev,
- BlockConf *conf,
+ BlockBackend *blk,
+ unsigned int sector_size,
IOThread *iothread)
{
XenBlockDataPlane *dataplane = g_new0(XenBlockDataPlane, 1);
dataplane->xendev = xendev;
- dataplane->blk = conf->blk;
+ dataplane->blk = blk;
+ dataplane->sector_size = sector_size;
QLIST_INIT(&dataplane->inflight);
QLIST_INIT(&dataplane->freelist);
@@ -803,7 +807,7 @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane,
}
dataplane->event_channel =
- xen_device_bind_event_channel(xendev, event_channel,
+ xen_device_bind_event_channel(xendev, dataplane->ctx, event_channel,
xen_block_dataplane_event, dataplane,
&local_err);
if (local_err) {
diff --git a/hw/block/dataplane/xen-block.h b/hw/block/dataplane/xen-block.h
index d6fa6d26dd..76dcd51c3d 100644
--- a/hw/block/dataplane/xen-block.h
+++ b/hw/block/dataplane/xen-block.h
@@ -15,7 +15,8 @@
typedef struct XenBlockDataPlane XenBlockDataPlane;
XenBlockDataPlane *xen_block_dataplane_create(XenDevice *xendev,
- BlockConf *conf,
+ BlockBackend *blk,
+ unsigned int sector_size,
IOThread *iothread);
void xen_block_dataplane_destroy(XenBlockDataPlane *dataplane);
void xen_block_dataplane_start(XenBlockDataPlane *dataplane,
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 107a719b95..36d6a8bb3a 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -1384,7 +1384,6 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
n->bar.cap = 0;
NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);
NVME_CAP_SET_CQR(n->bar.cap, 1);
- NVME_CAP_SET_AMS(n->bar.cap, 1);
NVME_CAP_SET_TO(n->bar.cap, 0xf);
NVME_CAP_SET_CSS(n->bar.cap, 1);
NVME_CAP_SET_MPSMAX(n->bar.cap, 4);
diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c
index 4de537aef4..8f224ef81d 100644
--- a/hw/block/xen-block.c
+++ b/hw/block/xen-block.c
@@ -52,11 +52,25 @@ static void xen_block_connect(XenDevice *xendev, Error **errp)
XenBlockDevice *blockdev = XEN_BLOCK_DEVICE(xendev);
const char *type = object_get_typename(OBJECT(blockdev));
XenBlockVdev *vdev = &blockdev->props.vdev;
+ BlockConf *conf = &blockdev->props.conf;
+ unsigned int feature_large_sector_size;
unsigned int order, nr_ring_ref, *ring_ref, event_channel, protocol;
char *str;
trace_xen_block_connect(type, vdev->disk, vdev->partition);
+ if (xen_device_frontend_scanf(xendev, "feature-large-sector-size", "%u",
+ &feature_large_sector_size) != 1) {
+ feature_large_sector_size = 0;
+ }
+
+ if (feature_large_sector_size != 1 &&
+ conf->logical_block_size != XEN_BLKIF_SECTOR_SIZE) {
+ error_setg(errp, "logical_block_size != %u not supported by frontend",
+ XEN_BLKIF_SECTOR_SIZE);
+ return;
+ }
+
if (xen_device_frontend_scanf(xendev, "ring-page-order", "%u",
&order) != 1) {
nr_ring_ref = 1;
@@ -150,7 +164,7 @@ static void xen_block_set_size(XenBlockDevice *blockdev)
const char *type = object_get_typename(OBJECT(blockdev));
XenBlockVdev *vdev = &blockdev->props.vdev;
BlockConf *conf = &blockdev->props.conf;
- int64_t sectors = blk_getlength(conf->blk) / XEN_BLKIF_SECTOR_SIZE;
+ int64_t sectors = blk_getlength(conf->blk) / conf->logical_block_size;
XenDevice *xendev = XEN_DEVICE(blockdev);
trace_xen_block_size(type, vdev->disk, vdev->partition, sectors);
@@ -185,6 +199,7 @@ static void xen_block_realize(XenDevice *xendev, Error **errp)
const char *type = object_get_typename(OBJECT(blockdev));
XenBlockVdev *vdev = &blockdev->props.vdev;
BlockConf *conf = &blockdev->props.conf;
+ BlockBackend *blk = conf->blk;
Error *local_err = NULL;
if (vdev->type == XEN_BLOCK_VDEV_TYPE_INVALID) {
@@ -206,8 +221,8 @@ static void xen_block_realize(XenDevice *xendev, Error **errp)
* The blkif protocol does not deal with removable media, so it must
* always be present, even for CDRom devices.
*/
- assert(conf->blk);
- if (!blk_is_inserted(conf->blk)) {
+ assert(blk);
+ if (!blk_is_inserted(blk)) {
error_setg(errp, "device needs media, but drive is empty");
return;
}
@@ -224,26 +239,20 @@ static void xen_block_realize(XenDevice *xendev, Error **errp)
blkconf_blocksizes(conf);
- if (conf->logical_block_size != XEN_BLKIF_SECTOR_SIZE) {
- error_setg(errp, "logical_block_size != %u not supported",
- XEN_BLKIF_SECTOR_SIZE);
- return;
- }
-
if (conf->logical_block_size > conf->physical_block_size) {
error_setg(
errp, "logical_block_size > physical_block_size not supported");
return;
}
- blk_set_dev_ops(conf->blk, &xen_block_dev_ops, blockdev);
- blk_set_guest_block_size(conf->blk, conf->logical_block_size);
+ blk_set_dev_ops(blk, &xen_block_dev_ops, blockdev);
+ blk_set_guest_block_size(blk, conf->logical_block_size);
if (conf->discard_granularity == -1) {
conf->discard_granularity = conf->physical_block_size;
}
- if (blk_get_flags(conf->blk) & BDRV_O_UNMAP) {
+ if (blk_get_flags(blk) & BDRV_O_UNMAP) {
xen_device_backend_printf(xendev, "feature-discard", "%u", 1);
xen_device_backend_printf(xendev, "discard-granularity", "%u",
conf->discard_granularity);
@@ -260,12 +269,13 @@ static void xen_block_realize(XenDevice *xendev, Error **errp)
blockdev->device_type);
xen_device_backend_printf(xendev, "sector-size", "%u",
- XEN_BLKIF_SECTOR_SIZE);
+ conf->logical_block_size);
xen_block_set_size(blockdev);
blockdev->dataplane =
- xen_block_dataplane_create(xendev, conf, blockdev->props.iothread);
+ xen_block_dataplane_create(xendev, blk, conf->logical_block_size,
+ blockdev->props.iothread);
}
static void xen_block_frontend_changed(XenDevice *xendev,
diff --git a/hw/block/xen_blkif.h b/hw/block/xen_blkif.h
index a353693ea0..99733529c1 100644
--- a/hw/block/xen_blkif.h
+++ b/hw/block/xen_blkif.h
@@ -1,9 +1,8 @@
#ifndef XEN_BLKIF_H
#define XEN_BLKIF_H
-#include "hw/xen/io/ring.h"
-#include <xen/io/blkif.h>
-#include <xen/io/protocols.h>
+#include "hw/xen/interface/io/blkif.h"
+#include "hw/xen/interface/io/protocols.h"
/*
* Not a real protocol. Used to generate ring structs which contain
diff --git a/hw/char/xen_console.c b/hw/char/xen_console.c
index 91f34ef06c..47e1092263 100644
--- a/hw/char/xen_console.c
+++ b/hw/char/xen_console.c
@@ -28,7 +28,7 @@
#include "chardev/char-fe.h"
#include "hw/xen/xen-legacy-backend.h"
-#include <xen/io/console.h>
+#include "hw/xen/interface/io/console.h"
struct buffer {
uint8_t *data;
diff --git a/hw/display/xenfb.c b/hw/display/xenfb.c
index 6202f1150e..3c79913b31 100644
--- a/hw/display/xenfb.c
+++ b/hw/display/xenfb.c
@@ -32,10 +32,9 @@
#include "ui/console.h"
#include "hw/xen/xen-legacy-backend.h"
-#include <xen/event_channel.h>
-#include <xen/io/fbif.h>
-#include <xen/io/kbdif.h>
-#include <xen/io/protocols.h>
+#include "hw/xen/interface/io/fbif.h"
+#include "hw/xen/interface/io/kbdif.h"
+#include "hw/xen/interface/io/protocols.h"
#include "trace.h"
diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
index 2939122e7c..469f1260a4 100644
--- a/hw/i386/xen/xen-hvm.c
+++ b/hw/i386/xen/xen-hvm.c
@@ -27,7 +27,6 @@
#include "exec/address-spaces.h"
#include <xen/hvm/ioreq.h>
-#include <xen/hvm/params.h>
#include <xen/hvm/e820.h>
//#define DEBUG_XEN_HVM
@@ -120,6 +119,8 @@ typedef struct XenIOState {
DeviceListener device_listener;
hwaddr free_phys_offset;
const XenPhysmap *log_for_dirtybit;
+ /* Buffer used by xen_sync_dirty_bitmap */
+ unsigned long *dirty_bitmap;
Notifier exit;
Notifier suspend;
@@ -465,6 +466,8 @@ static int xen_remove_from_physmap(XenIOState *state,
QLIST_REMOVE(physmap, list);
if (state->log_for_dirtybit == physmap) {
state->log_for_dirtybit = NULL;
+ g_free(state->dirty_bitmap);
+ state->dirty_bitmap = NULL;
}
g_free(physmap);
@@ -615,7 +618,7 @@ static void xen_sync_dirty_bitmap(XenIOState *state,
{
hwaddr npages = size >> TARGET_PAGE_BITS;
const int width = sizeof(unsigned long) * 8;
- unsigned long bitmap[DIV_ROUND_UP(npages, width)];
+ size_t bitmap_size = DIV_ROUND_UP(npages, width);
int rc, i, j;
const XenPhysmap *physmap = NULL;
@@ -627,13 +630,14 @@ static void xen_sync_dirty_bitmap(XenIOState *state,
if (state->log_for_dirtybit == NULL) {
state->log_for_dirtybit = physmap;
+ state->dirty_bitmap = g_new(unsigned long, bitmap_size);
} else if (state->log_for_dirtybit != physmap) {
/* Only one range for dirty bitmap can be tracked. */
return;
}
rc = xen_track_dirty_vram(xen_domid, start_addr >> TARGET_PAGE_BITS,
- npages, bitmap);
+ npages, state->dirty_bitmap);
if (rc < 0) {
#ifndef ENODATA
#define ENODATA ENOENT
@@ -647,8 +651,8 @@ static void xen_sync_dirty_bitmap(XenIOState *state,
return;
}
- for (i = 0; i < ARRAY_SIZE(bitmap); i++) {
- unsigned long map = bitmap[i];
+ for (i = 0; i < bitmap_size; i++) {
+ unsigned long map = state->dirty_bitmap[i];
while (map != 0) {
j = ctzl(map);
map &= ~(1ul << j);
@@ -678,6 +682,8 @@ static void xen_log_stop(MemoryListener *listener, MemoryRegionSection *section,
if (old & ~new & (1 << DIRTY_MEMORY_VGA)) {
state->log_for_dirtybit = NULL;
+ g_free(state->dirty_bitmap);
+ state->dirty_bitmap = NULL;
/* Disable dirty bit tracking */
xen_track_dirty_vram(xen_domid, 0, 0, NULL);
}
diff --git a/hw/i386/xen/xen-mapcache.c b/hw/i386/xen/xen-mapcache.c
index 254759f776..dc73c86c61 100644
--- a/hw/i386/xen/xen-mapcache.c
+++ b/hw/i386/xen/xen-mapcache.c
@@ -17,8 +17,6 @@
#include "hw/xen/xen-legacy-backend.h"
#include "qemu/bitmap.h"
-#include <xen/hvm/params.h>
-
#include "sysemu/xen-mapcache.h"
#include "trace.h"
diff --git a/hw/net/xen_nic.c b/hw/net/xen_nic.c
index 37cda8e4be..ffb3b5898d 100644
--- a/hw/net/xen_nic.c
+++ b/hw/net/xen_nic.c
@@ -30,7 +30,7 @@
#include "net/util.h"
#include "hw/xen/xen-legacy-backend.h"
-#include <xen/io/netif.h>
+#include "hw/xen/interface/io/netif.h"
/* ------------------------------------------------------------- */
diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c
index b20d0cfadf..dfbb418e77 100644
--- a/hw/usb/xen-usb.c
+++ b/hw/usb/xen-usb.c
@@ -32,8 +32,7 @@
#include "qapi/qmp/qdict.h"
#include "qapi/qmp/qstring.h"
-#include "hw/xen/io/ring.h"
-#include <xen/io/usbif.h>
+#include "hw/xen/interface/io/usbif.h"
/*
* Check for required support of usbif.h: USBIF_SHORT_NOT_OK was the last
diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c
index a4416d0bcf..7503eea9e9 100644
--- a/hw/xen/xen-bus.c
+++ b/hw/xen/xen-bus.c
@@ -924,23 +924,35 @@ done:
}
struct XenEventChannel {
+ QLIST_ENTRY(XenEventChannel) list;
+ AioContext *ctx;
+ xenevtchn_handle *xeh;
evtchn_port_t local_port;
XenEventHandler handler;
void *opaque;
- Notifier notifier;
};
-static void event_notify(Notifier *n, void *data)
+static bool xen_device_poll(void *opaque)
+{
+ XenEventChannel *channel = opaque;
+
+ return channel->handler(channel->opaque);
+}
+
+static void xen_device_event(void *opaque)
{
- XenEventChannel *channel = container_of(n, XenEventChannel, notifier);
- unsigned long port = (unsigned long)data;
+ XenEventChannel *channel = opaque;
+ unsigned long port = xenevtchn_pending(channel->xeh);
if (port == channel->local_port) {
- channel->handler(channel->opaque);
+ xen_device_poll(channel);
+
+ xenevtchn_unmask(channel->xeh, port);
}
}
XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev,
+ AioContext *ctx,
unsigned int port,
XenEventHandler handler,
void *opaque, Error **errp)
@@ -948,24 +960,40 @@ XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev,
XenEventChannel *channel = g_new0(XenEventChannel, 1);
xenevtchn_port_or_error_t local_port;
- local_port = xenevtchn_bind_interdomain(xendev->xeh,
+ channel->xeh = xenevtchn_open(NULL, 0);
+ if (!channel->xeh) {
+ error_setg_errno(errp, errno, "failed xenevtchn_open");
+ goto fail;
+ }
+
+ local_port = xenevtchn_bind_interdomain(channel->xeh,
xendev->frontend_id,
port);
if (local_port < 0) {
error_setg_errno(errp, errno, "xenevtchn_bind_interdomain failed");
-
- g_free(channel);
- return NULL;
+ goto fail;
}
channel->local_port = local_port;
channel->handler = handler;
channel->opaque = opaque;
- channel->notifier.notify = event_notify;
- notifier_list_add(&xendev->event_notifiers, &channel->notifier);
+ channel->ctx = ctx;
+ aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true,
+ xen_device_event, NULL, xen_device_poll, channel);
+
+ QLIST_INSERT_HEAD(&xendev->event_channels, channel, list);
return channel;
+
+fail:
+ if (channel->xeh) {
+ xenevtchn_close(channel->xeh);
+ }
+
+ g_free(channel);
+
+ return NULL;
}
void xen_device_notify_event_channel(XenDevice *xendev,
@@ -977,7 +1005,7 @@ void xen_device_notify_event_channel(XenDevice *xendev,
return;
}
- if (xenevtchn_notify(xendev->xeh, channel->local_port) < 0) {
+ if (xenevtchn_notify(channel->xeh, channel->local_port) < 0) {
error_setg_errno(errp, errno, "xenevtchn_notify failed");
}
}
@@ -991,12 +1019,16 @@ void xen_device_unbind_event_channel(XenDevice *xendev,
return;
}
- notifier_remove(&channel->notifier);
+ QLIST_REMOVE(channel, list);
+
+ aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true,
+ NULL, NULL, NULL, NULL);
- if (xenevtchn_unbind(xendev->xeh, channel->local_port) < 0) {
+ if (xenevtchn_unbind(channel->xeh, channel->local_port) < 0) {
error_setg_errno(errp, errno, "xenevtchn_unbind failed");
}
+ xenevtchn_close(channel->xeh);
g_free(channel);
}
@@ -1005,6 +1037,7 @@ static void xen_device_unrealize(DeviceState *dev, Error **errp)
XenDevice *xendev = XEN_DEVICE(dev);
XenDeviceClass *xendev_class = XEN_DEVICE_GET_CLASS(xendev);
const char *type = object_get_typename(OBJECT(xendev));
+ XenEventChannel *channel, *next;
if (!xendev->name) {
return;
@@ -1021,15 +1054,14 @@ static void xen_device_unrealize(DeviceState *dev, Error **errp)
xendev_class->unrealize(xendev, errp);
}
+ /* Make sure all event channels are cleaned up */
+ QLIST_FOREACH_SAFE(channel, &xendev->event_channels, list, next) {
+ xen_device_unbind_event_channel(xendev, channel, NULL);
+ }
+
xen_device_frontend_destroy(xendev);
xen_device_backend_destroy(xendev);
- if (xendev->xeh) {
- qemu_set_fd_handler(xenevtchn_fd(xendev->xeh), NULL, NULL, NULL);
- xenevtchn_close(xendev->xeh);
- xendev->xeh = NULL;
- }
-
if (xendev->xgth) {
xengnttab_close(xendev->xgth);
xendev->xgth = NULL;
@@ -1046,16 +1078,6 @@ static void xen_device_exit(Notifier *n, void *data)
xen_device_unrealize(DEVICE(xendev), &error_abort);
}
-static void xen_device_event(void *opaque)
-{
- XenDevice *xendev = opaque;
- unsigned long port = xenevtchn_pending(xendev->xeh);
-
- notifier_list_notify(&xendev->event_notifiers, (void *)port);
-
- xenevtchn_unmask(xendev->xeh, port);
-}
-
static void xen_device_realize(DeviceState *dev, Error **errp)
{
XenDevice *xendev = XEN_DEVICE(dev);
@@ -1096,16 +1118,6 @@ static void xen_device_realize(DeviceState *dev, Error **errp)
xendev->feature_grant_copy =
(xengnttab_grant_copy(xendev->xgth, 0, NULL) == 0);
- xendev->xeh = xenevtchn_open(NULL, 0);
- if (!xendev->xeh) {
- error_setg_errno(errp, errno, "failed xenevtchn_open");
- goto unrealize;
- }
-
- notifier_list_init(&xendev->event_notifiers);
- qemu_set_fd_handler(xenevtchn_fd(xendev->xeh), xen_device_event, NULL,
- xendev);
-
xen_device_backend_create(xendev, &local_err);
if (local_err) {
error_propagate(errp, local_err);
diff --git a/hw/xen/xen-legacy-backend.c b/hw/xen/xen-legacy-backend.c
index 36fd1e9b09..3715c94fa6 100644
--- a/hw/xen/xen-legacy-backend.c
+++ b/hw/xen/xen-legacy-backend.c
@@ -34,8 +34,6 @@
#include "hw/xen/xen_pvdev.h"
#include "monitor/qdev.h"
-#include <xen/grant_table.h>
-
DeviceState *xen_sysdev;
BusState *xen_sysbus;
diff --git a/include/hw/xen/interface/grant_table.h b/include/hw/xen/interface/grant_table.h
new file mode 100644
index 0000000000..2af0cbdde3
--- /dev/null
+++ b/include/hw/xen/interface/grant_table.h
@@ -0,0 +1,36 @@
+/******************************************************************************
+ * grant_table.h
+ *
+ * Interface for granting foreign access to page frames, and receiving
+ * page-ownership transfers.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2004, K A Fraser
+ */
+
+#ifndef __XEN_PUBLIC_GRANT_TABLE_H__
+#define __XEN_PUBLIC_GRANT_TABLE_H__
+
+/*
+ * Reference to a grant entry in a specified domain's grant table.
+ */
+typedef uint32_t grant_ref_t;
+
+#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */
diff --git a/include/hw/xen/interface/io/blkif.h b/include/hw/xen/interface/io/blkif.h
new file mode 100644
index 0000000000..8b1be50ce8
--- /dev/null
+++ b/include/hw/xen/interface/io/blkif.h
@@ -0,0 +1,712 @@
+/******************************************************************************
+ * blkif.h
+ *
+ * Unified block-device I/O interface for Xen guest OSes.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2003-2004, Keir Fraser
+ * Copyright (c) 2012, Spectra Logic Corporation
+ */
+
+#ifndef __XEN_PUBLIC_IO_BLKIF_H__
+#define __XEN_PUBLIC_IO_BLKIF_H__
+
+#include "ring.h"
+#include "../grant_table.h"
+
+/*
+ * Front->back notifications: When enqueuing a new request, sending a
+ * notification can be made conditional on req_event (i.e., the generic
+ * hold-off mechanism provided by the ring macros). Backends must set
+ * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()).
+ *
+ * Back->front notifications: When enqueuing a new response, sending a
+ * notification can be made conditional on rsp_event (i.e., the generic
+ * hold-off mechanism provided by the ring macros). Frontends must set
+ * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()).
+ */
+
+#ifndef blkif_vdev_t
+#define blkif_vdev_t uint16_t
+#endif
+#define blkif_sector_t uint64_t
+
+/*
+ * Feature and Parameter Negotiation
+ * =================================
+ * The two halves of a Xen block driver utilize nodes within the XenStore to
+ * communicate capabilities and to negotiate operating parameters. This
+ * section enumerates these nodes which reside in the respective front and
+ * backend portions of the XenStore, following the XenBus convention.
+ *
+ * All data in the XenStore is stored as strings. Nodes specifying numeric
+ * values are encoded in decimal. Integer value ranges listed below are
+ * expressed as fixed sized integer types capable of storing the conversion
+ * of a properly formated node string, without loss of information.
+ *
+ * Any specified default value is in effect if the corresponding XenBus node
+ * is not present in the XenStore.
+ *
+ * XenStore nodes in sections marked "PRIVATE" are solely for use by the
+ * driver side whose XenBus tree contains them.
+ *
+ * XenStore nodes marked "DEPRECATED" in their notes section should only be
+ * used to provide interoperability with legacy implementations.
+ *
+ * See the XenBus state transition diagram below for details on when XenBus
+ * nodes must be published and when they can be queried.
+ *
+ *****************************************************************************
+ * Backend XenBus Nodes
+ *****************************************************************************
+ *
+ *------------------ Backend Device Identification (PRIVATE) ------------------
+ *
+ * mode
+ * Values: "r" (read only), "w" (writable)
+ *
+ * The read or write access permissions to the backing store to be
+ * granted to the frontend.
+ *
+ * params
+ * Values: string
+ *
+ * A free formatted string providing sufficient information for the
+ * hotplug script to attach the device and provide a suitable
+ * handler (ie: a block device) for blkback to use.
+ *
+ * physical-device
+ * Values: "MAJOR:MINOR"
+ * Notes: 11
+ *
+ * MAJOR and MINOR are the major number and minor number of the
+ * backing device respectively.
+ *
+ * physical-device-path
+ * Values: path string
+ *
+ * A string that contains the absolute path to the disk image. On
+ * NetBSD and Linux this is always a block device, while on FreeBSD
+ * it can be either a block device or a regular file.
+ *
+ * type
+ * Values: "file", "phy", "tap"
+ *
+ * The type of the backing device/object.
+ *
+ *
+ * direct-io-safe
+ * Values: 0/1 (boolean)
+ * Default Value: 0
+ *
+ * The underlying storage is not affected by the direct IO memory
+ * lifetime bug. See:
+ * http://lists.xen.org/archives/html/xen-devel/2012-12/msg01154.html
+ *
+ * Therefore this option gives the backend permission to use
+ * O_DIRECT, notwithstanding that bug.
+ *
+ * That is, if this option is enabled, use of O_DIRECT is safe,
+ * in circumstances where we would normally have avoided it as a
+ * workaround for that bug. This option is not relevant for all
+ * backends, and even not necessarily supported for those for
+ * which it is relevant. A backend which knows that it is not
+ * affected by the bug can ignore this option.
+ *
+ * This option doesn't require a backend to use O_DIRECT, so it
+ * should not be used to try to control the caching behaviour.
+ *
+ *--------------------------------- Features ---------------------------------
+ *
+ * feature-barrier
+ * Values: 0/1 (boolean)
+ * Default Value: 0
+ *
+ * A value of "1" indicates that the backend can process requests
+ * containing the BLKIF_OP_WRITE_BARRIER request opcode. Requests
+ * of this type may still be returned at any time with the
+ * BLKIF_RSP_EOPNOTSUPP result code.
+ *
+ * feature-flush-cache
+ * Values: 0/1 (boolean)
+ * Default Value: 0
+ *
+ * A value of "1" indicates that the backend can process requests
+ * containing the BLKIF_OP_FLUSH_DISKCACHE request opcode. Requests
+ * of this type may still be returned at any time with the
+ * BLKIF_RSP_EOPNOTSUPP result code.
+ *
+ * feature-discard
+ * Values: 0/1 (boolean)
+ * Default Value: 0
+ *
+ * A value of "1" indicates that the backend can process requests
+ * containing the BLKIF_OP_DISCARD request opcode. Requests
+ * of this type may still be returned at any time with the
+ * BLKIF_RSP_EOPNOTSUPP result code.
+ *
+ * feature-persistent
+ * Values: 0/1 (boolean)
+ * Default Value: 0
+ * Notes: 7
+ *
+ * A value of "1" indicates that the backend can keep the grants used
+ * by the frontend driver mapped, so the same set of grants should be
+ * used in all transactions. The maximum number of grants the backend
+ * can map persistently depends on the implementation, but ideally it
+ * should be RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. Using this
+ * feature the backend doesn't need to unmap each grant, preventing
+ * costly TLB flushes. The backend driver should only map grants
+ * persistently if the frontend supports it. If a backend driver chooses
+ * to use the persistent protocol when the frontend doesn't support it,
+ * it will probably hit the maximum number of persistently mapped grants
+ * (due to the fact that the frontend won't be reusing the same grants),
+ * and fall back to non-persistent mode. Backend implementations may
+ * shrink or expand the number of persistently mapped grants without
+ * notifying the frontend depending on memory constraints (this might
+ * cause a performance degradation).
+ *
+ * If a backend driver wants to limit the maximum number of persistently
+ * mapped grants to a value less than RING_SIZE *
+ * BLKIF_MAX_SEGMENTS_PER_REQUEST a LRU strategy should be used to
+ * discard the grants that are less commonly used. Using a LRU in the
+ * backend driver paired with a LIFO queue in the frontend will
+ * allow us to have better performance in this scenario.
+ *
+ *----------------------- Request Transport Parameters ------------------------
+ *
+ * max-ring-page-order
+ * Values: <uint32_t>
+ * Default Value: 0
+ * Notes: 1, 3
+ *
+ * The maximum supported size of the request ring buffer in units of
+ * lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages,
+ * etc.).
+ *
+ * max-ring-pages
+ * Values: <uint32_t>
+ * Default Value: 1
+ * Notes: DEPRECATED, 2, 3
+ *
+ * The maximum supported size of the request ring buffer in units of
+ * machine pages. The value must be a power of 2.
+ *
+ *------------------------- Backend Device Properties -------------------------
+ *
+ * discard-enable
+ * Values: 0/1 (boolean)
+ * Default Value: 1
+ *
+ * This optional property, set by the toolstack, instructs the backend
+ * to offer (or not to offer) discard to the frontend. If the property
+ * is missing the backend should offer discard if the backing storage
+ * actually supports it.
+ *
+ * discard-alignment
+ * Values: <uint32_t>
+ * Default Value: 0
+ * Notes: 4, 5
+ *
+ * The offset, in bytes from the beginning of the virtual block device,
+ * to the first, addressable, discard extent on the underlying device.
+ *
+ * discard-granularity
+ * Values: <uint32_t>
+ * Default Value: <"sector-size">
+ * Notes: 4
+ *
+ * The size, in bytes, of the individually addressable discard extents
+ * of the underlying device.
+ *
+ * discard-secure
+ * Values: 0/1 (boolean)
+ * Default Value: 0
+ * Notes: 10
+ *
+ * A value of "1" indicates that the backend can process BLKIF_OP_DISCARD
+ * requests with the BLKIF_DISCARD_SECURE flag set.
+ *
+ * info
+ * Values: <uint32_t> (bitmap)
+ *
+ * A collection of bit flags describing attributes of the backing
+ * device. The VDISK_* macros define the meaning of each bit
+ * location.
+ *
+ * sector-size
+ * Values: <uint32_t>
+ *
+ * The logical block size, in bytes, of the underlying storage. This
+ * must be a power of two with a minimum value of 512.
+ *
+ * NOTE: Because of implementation bugs in some frontends this must be
+ * set to 512, unless the frontend advertizes a non-zero value
+ * in its "feature-large-sector-size" xenbus node. (See below).
+ *
+ * physical-sector-size
+ * Values: <uint32_t>
+ * Default Value: <"sector-size">
+ *
+ * The physical block size, in bytes, of the backend storage. This
+ * must be an integer multiple of "sector-size".
+ *
+ * sectors
+ * Values: <uint64_t>
+ *
+ * The size of the backend device, expressed in units of "sector-size".
+ * The product of "sector-size" and "sectors" must also be an integer
+ * multiple of "physical-sector-size", if that node is present.
+ *
+ *****************************************************************************
+ * Frontend XenBus Nodes
+ *****************************************************************************
+ *
+ *----------------------- Request Transport Parameters -----------------------
+ *
+ * event-channel
+ * Values: <uint32_t>
+ *
+ * The identifier of the Xen event channel used to signal activity
+ * in the ring buffer.
+ *
+ * ring-ref
+ * Values: <uint32_t>
+ * Notes: 6
+ *
+ * The Xen grant reference granting permission for the backend to map
+ * the sole page in a single page sized ring buffer.
+ *
+ * ring-ref%u
+ * Values: <uint32_t>
+ * Notes: 6
+ *
+ * For a frontend providing a multi-page ring, a "number of ring pages"
+ * sized list of nodes, each containing a Xen grant reference granting
+ * permission for the backend to map the page of the ring located
+ * at page index "%u". Page indexes are zero based.
+ *
+ * protocol
+ * Values: string (XEN_IO_PROTO_ABI_*)
+ * Default Value: XEN_IO_PROTO_ABI_NATIVE
+ *
+ * The machine ABI rules governing the format of all ring request and
+ * response structures.
+ *
+ * ring-page-order
+ * Values: <uint32_t>
+ * Default Value: 0
+ * Maximum Value: MAX(ffs(max-ring-pages) - 1, max-ring-page-order)
+ * Notes: 1, 3
+ *
+ * The size of the frontend allocated request ring buffer in units
+ * of lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages,
+ * etc.).
+ *
+ * num-ring-pages
+ * Values: <uint32_t>
+ * Default Value: 1
+ * Maximum Value: MAX(max-ring-pages,(0x1 << max-ring-page-order))
+ * Notes: DEPRECATED, 2, 3
+ *
+ * The size of the frontend allocated request ring buffer in units of
+ * machine pages. The value must be a power of 2.
+ *
+ *--------------------------------- Features ---------------------------------
+ *
+ * feature-persistent
+ * Values: 0/1 (boolean)
+ * Default Value: 0
+ * Notes: 7, 8, 9
+ *
+ * A value of "1" indicates that the frontend will reuse the same grants
+ * for all transactions, allowing the backend to map them with write
+ * access (even when it should be read-only). If the frontend hits the
+ * maximum number of allowed persistently mapped grants, it can fallback
+ * to non persistent mode. This will cause a performance degradation,
+ * since the the backend driver will still try to map those grants
+ * persistently. Since the persistent grants protocol is compatible with
+ * the previous protocol, a frontend driver can choose to work in
+ * persistent mode even when the backend doesn't support it.
+ *
+ * It is recommended that the frontend driver stores the persistently
+ * mapped grants in a LIFO queue, so a subset of all persistently mapped
+ * grants gets used commonly. This is done in case the backend driver
+ * decides to limit the maximum number of persistently mapped grants
+ * to a value less than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST.
+ *
+ * feature-large-sector-size
+ * Values: 0/1 (boolean)
+ * Default Value: 0
+ *
+ * A value of "1" indicates that the frontend will correctly supply and
+ * interpret all sector-based quantities in terms of the "sector-size"
+ * value supplied in the backend info, whatever that may be set to.
+ * If this node is not present or its value is "0" then it is assumed
+ * that the frontend requires that the logical block size is 512 as it
+ * is hardcoded (which is the case in some frontend implementations).
+ *
+ *------------------------- Virtual Device Properties -------------------------
+ *
+ * device-type
+ * Values: "disk", "cdrom", "floppy", etc.
+ *
+ * virtual-device
+ * Values: <uint32_t>
+ *
+ * A value indicating the physical device to virtualize within the
+ * frontend's domain. (e.g. "The first ATA disk", "The third SCSI
+ * disk", etc.)
+ *
+ * See docs/misc/vbd-interface.txt for details on the format of this
+ * value.
+ *
+ * Notes
+ * -----
+ * (1) Multi-page ring buffer scheme first developed in the Citrix XenServer
+ * PV drivers.
+ * (2) Multi-page ring buffer scheme first used in some RedHat distributions
+ * including a distribution deployed on certain nodes of the Amazon
+ * EC2 cluster.
+ * (3) Support for multi-page ring buffers was implemented independently,
+ * in slightly different forms, by both Citrix and RedHat/Amazon.
+ * For full interoperability, block front and backends should publish
+ * identical ring parameters, adjusted for unit differences, to the
+ * XenStore nodes used in both schemes.
+ * (4) Devices that support discard functionality may internally allocate space
+ * (discardable extents) in units that are larger than the exported logical
+ * block size. If the backing device has such discardable extents the
+ * backend should provide both discard-granularity and discard-alignment.
+ * Providing just one of the two may be considered an error by the frontend.
+ * Backends supporting discard should include discard-granularity and
+ * discard-alignment even if it supports discarding individual sectors.
+ * Frontends should assume discard-alignment == 0 and discard-granularity
+ * == sector size if these keys are missing.
+ * (5) The discard-alignment parameter allows a physical device to be
+ * partitioned into virtual devices that do not necessarily begin or
+ * end on a discardable extent boundary.
+ * (6) When there is only a single page allocated to the request ring,
+ * 'ring-ref' is used to communicate the grant reference for this
+ * page to the backend. When using a multi-page ring, the 'ring-ref'
+ * node is not created. Instead 'ring-ref0' - 'ring-refN' are used.
+ * (7) When using persistent grants data has to be copied from/to the page
+ * where the grant is currently mapped. The overhead of doing this copy
+ * however doesn't suppress the speed improvement of not having to unmap
+ * the grants.
+ * (8) The frontend driver has to allow the backend driver to map all grants
+ * with write access, even when they should be mapped read-only, since
+ * further requests may reuse these grants and require write permissions.
+ * (9) Linux implementation doesn't have a limit on the maximum number of
+ * grants that can be persistently mapped in the frontend driver, but
+ * due to the frontent driver implementation it should never be bigger
+ * than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST.
+ *(10) The discard-secure property may be present and will be set to 1 if the
+ * backing device supports secure discard.
+ *(11) Only used by Linux and NetBSD.
+ */
+
+/*
+ * Multiple hardware queues/rings:
+ * If supported, the backend will write the key "multi-queue-max-queues" to
+ * the directory for that vbd, and set its value to the maximum supported
+ * number of queues.
+ * Frontends that are aware of this feature and wish to use it can write the
+ * key "multi-queue-num-queues" with the number they wish to use, which must be
+ * greater than zero, and no more than the value reported by the backend in
+ * "multi-queue-max-queues".
+ *
+ * For frontends requesting just one queue, the usual event-channel and
+ * ring-ref keys are written as before, simplifying the backend processing
+ * to avoid distinguishing between a frontend that doesn't understand the
+ * multi-queue feature, and one that does, but requested only one queue.
+ *
+ * Frontends requesting two or more queues must not write the toplevel
+ * event-channel and ring-ref keys, instead writing those keys under sub-keys
+ * having the name "queue-N" where N is the integer ID of the queue/ring for
+ * which those keys belong. Queues are indexed from zero.
+ * For example, a frontend with two queues must write the following set of
+ * queue-related keys:
+ *
+ * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2"
+ * /local/domain/1/device/vbd/0/queue-0 = ""
+ * /local/domain/1/device/vbd/0/queue-0/ring-ref = "<ring-ref#0>"
+ * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>"
+ * /local/domain/1/device/vbd/0/queue-1 = ""
+ * /local/domain/1/device/vbd/0/queue-1/ring-ref = "<ring-ref#1>"
+ * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>"
+ *
+ * It is also possible to use multiple queues/rings together with
+ * feature multi-page ring buffer.
+ * For example, a frontend requests two queues/rings and the size of each ring
+ * buffer is two pages must write the following set of related keys:
+ *
+ * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2"
+ * /local/domain/1/device/vbd/0/ring-page-order = "1"
+ * /local/domain/1/device/vbd/0/queue-0 = ""
+ * /local/domain/1/device/vbd/0/queue-0/ring-ref0 = "<ring-ref#0>"
+ * /local/domain/1/device/vbd/0/queue-0/ring-ref1 = "<ring-ref#1>"
+ * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>"
+ * /local/domain/1/device/vbd/0/queue-1 = ""
+ * /local/domain/1/device/vbd/0/queue-1/ring-ref0 = "<ring-ref#2>"
+ * /local/domain/1/device/vbd/0/queue-1/ring-ref1 = "<ring-ref#3>"
+ * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>"
+ *
+ */
+
+/*
+ * STATE DIAGRAMS
+ *
+ *****************************************************************************
+ * Startup *
+ *****************************************************************************
+ *
+ * Tool stack creates front and back nodes with state XenbusStateInitialising.
+ *
+ * Front Back
+ * ================================= =====================================
+ * XenbusStateInitialising XenbusStateInitialising
+ * o Query virtual device o Query backend device identification
+ * properties. data.
+ * o Setup OS device instance. o Open and validate backend device.
+ * o Publish backend features and
+ * transport parameters.
+ * |
+ * |
+ * V
+ * XenbusStateInitWait
+ *
+ * o Query backend features and
+ * transport parameters.
+ * o Allocate and initialize the
+ * request ring.
+ * o Publish transport parameters
+ * that will be in effect during
+ * this connection.
+ * |
+ * |
+ * V
+ * XenbusStateInitialised
+ *
+ * o Query frontend transport parameters.
+ * o Connect to the request ring and
+ * event channel.
+ * o Publish backend device properties.
+ * |
+ * |
+ * V
+ * XenbusStateConnected
+ *
+ * o Query backend device properties.
+ * o Finalize OS virtual device
+ * instance.
+ * |
+ * |
+ * V
+ * XenbusStateConnected
+ *
+ * Note: Drivers that do not support any optional features, or the negotiation
+ * of transport parameters, can skip certain states in the state machine:
+ *
+ * o A frontend may transition to XenbusStateInitialised without
+ * waiting for the backend to enter XenbusStateInitWait. In this
+ * case, default transport parameters are in effect and any
+ * transport parameters published by the frontend must contain
+ * their default values.
+ *
+ * o A backend may transition to XenbusStateInitialised, bypassing
+ * XenbusStateInitWait, without waiting for the frontend to first
+ * enter the XenbusStateInitialised state. In this case, default
+ * transport parameters are in effect and any transport parameters
+ * published by the backend must contain their default values.
+ *
+ * Drivers that support optional features and/or transport parameter
+ * negotiation must tolerate these additional state transition paths.
+ * In general this means performing the work of any skipped state
+ * transition, if it has not already been performed, in addition to the
+ * work associated with entry into the current state.
+ */
+
+/*
+ * REQUEST CODES.
+ */
+#define BLKIF_OP_READ 0
+#define BLKIF_OP_WRITE 1
+/*
+ * All writes issued prior to a request with the BLKIF_OP_WRITE_BARRIER
+ * operation code ("barrier request") must be completed prior to the
+ * execution of the barrier request. All writes issued after the barrier
+ * request must not execute until after the completion of the barrier request.
+ *
+ * Optional. See "feature-barrier" XenBus node documentation above.
+ */
+#define BLKIF_OP_WRITE_BARRIER 2
+/*
+ * Commit any uncommitted contents of the backing device's volatile cache
+ * to stable storage.
+ *
+ * Optional. See "feature-flush-cache" XenBus node documentation above.
+ */
+#define BLKIF_OP_FLUSH_DISKCACHE 3
+/*
+ * Used in SLES sources for device specific command packet
+ * contained within the request. Reserved for that purpose.
+ */
+#define BLKIF_OP_RESERVED_1 4
+/*
+ * Indicate to the backend device that a region of storage is no longer in
+ * use, and may be discarded at any time without impact to the client. If
+ * the BLKIF_DISCARD_SECURE flag is set on the request, all copies of the
+ * discarded region on the device must be rendered unrecoverable before the
+ * command returns.
+ *
+ * This operation is analogous to performing a trim (ATA) or unamp (SCSI),
+ * command on a native device.
+ *
+ * More information about trim/unmap operations can be found at:
+ * http://t13.org/Documents/UploadedDocuments/docs2008/
+ * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc
+ * http://www.seagate.com/staticfiles/support/disc/manuals/
+ * Interface%20manuals/100293068c.pdf
+ *
+ * Optional. See "feature-discard", "discard-alignment",
+ * "discard-granularity", and "discard-secure" in the XenBus node
+ * documentation above.
+ */
+#define BLKIF_OP_DISCARD 5
+
+/*
+ * Recognized if "feature-max-indirect-segments" in present in the backend
+ * xenbus info. The "feature-max-indirect-segments" node contains the maximum
+ * number of segments allowed by the backend per request. If the node is
+ * present, the frontend might use blkif_request_indirect structs in order to
+ * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST (11). The
+ * maximum number of indirect segments is fixed by the backend, but the
+ * frontend can issue requests with any number of indirect segments as long as
+ * it's less than the number provided by the backend. The indirect_grefs field
+ * in blkif_request_indirect should be filled by the frontend with the
+ * grant references of the pages that are holding the indirect segments.
+ * These pages are filled with an array of blkif_request_segment that hold the
+ * information about the segments. The number of indirect pages to use is
+ * determined by the number of segments an indirect request contains. Every
+ * indirect page can contain a maximum of
+ * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to
+ * calculate the number of indirect pages to use we have to do
+ * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))).
+ *
+ * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not*
+ * create the "feature-max-indirect-segments" node!
+ */
+#define BLKIF_OP_INDIRECT 6
+
+/*
+ * Maximum scatter/gather segments per request.
+ * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE.
+ * NB. This could be 12 if the ring indexes weren't stored in the same page.
+ */
+#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
+
+/*
+ * Maximum number of indirect pages to use per request.
+ */
+#define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8
+
+/*
+ * NB. 'first_sect' and 'last_sect' in blkif_request_segment, as well as
+ * 'sector_number' in blkif_request, blkif_request_discard and
+ * blkif_request_indirect are sector-based quantities. See the description
+ * of the "feature-large-sector-size" frontend xenbus node above for
+ * more information.
+ */
+struct blkif_request_segment {
+ grant_ref_t gref; /* reference to I/O buffer frame */
+ /* @first_sect: first sector in frame to transfer (inclusive). */
+ /* @last_sect: last sector in frame to transfer (inclusive). */
+ uint8_t first_sect, last_sect;
+};
+
+/*
+ * Starting ring element for any I/O request.
+ */
+struct blkif_request {
+ uint8_t operation; /* BLKIF_OP_??? */
+ uint8_t nr_segments; /* number of segments */
+ blkif_vdev_t handle; /* only for read/write requests */
+ uint64_t id; /* private guest value, echoed in resp */
+ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
+ struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+typedef struct blkif_request blkif_request_t;
+
+/*
+ * Cast to this structure when blkif_request.operation == BLKIF_OP_DISCARD
+ * sizeof(struct blkif_request_discard) <= sizeof(struct blkif_request)
+ */
+struct blkif_request_discard {
+ uint8_t operation; /* BLKIF_OP_DISCARD */
+ uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */
+#define BLKIF_DISCARD_SECURE (1<<0) /* ignored if discard-secure=0 */
+ blkif_vdev_t handle; /* same as for read/write requests */
+ uint64_t id; /* private guest value, echoed in resp */
+ blkif_sector_t sector_number;/* start sector idx on disk */
+ uint64_t nr_sectors; /* number of contiguous sectors to discard*/
+};
+typedef struct blkif_request_discard blkif_request_discard_t;
+
+struct blkif_request_indirect {
+ uint8_t operation; /* BLKIF_OP_INDIRECT */
+ uint8_t indirect_op; /* BLKIF_OP_{READ/WRITE} */
+ uint16_t nr_segments; /* number of segments */
+ uint64_t id; /* private guest value, echoed in resp */
+ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
+ blkif_vdev_t handle; /* same as for read/write requests */
+ grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
+#ifdef __i386__
+ uint64_t pad; /* Make it 64 byte aligned on i386 */
+#endif
+};
+typedef struct blkif_request_indirect blkif_request_indirect_t;
+
+struct blkif_response {
+ uint64_t id; /* copied from request */
+ uint8_t operation; /* copied from request */
+ int16_t status; /* BLKIF_RSP_??? */
+};
+typedef struct blkif_response blkif_response_t;
+
+/*
+ * STATUS RETURN CODES.
+ */
+ /* Operation not supported (only happens on barrier writes). */
+#define BLKIF_RSP_EOPNOTSUPP -2
+ /* Operation failed for some unspecified reason (-EIO). */
+#define BLKIF_RSP_ERROR -1
+ /* Operation completed successfully. */
+#define BLKIF_RSP_OKAY 0
+
+/*
+ * Generate blkif ring structures and types.
+ */
+DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
+
+#define VDISK_CDROM 0x1
+#define VDISK_REMOVABLE 0x2
+#define VDISK_READONLY 0x4
+
+#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */
diff --git a/include/hw/xen/interface/io/console.h b/include/hw/xen/interface/io/console.h
new file mode 100644
index 0000000000..e2155d1cf5
--- /dev/null
+++ b/include/hw/xen/interface/io/console.h
@@ -0,0 +1,46 @@
+/******************************************************************************
+ * console.h
+ *
+ * Console I/O interface for Xen guest OSes.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2005, Keir Fraser
+ */
+
+#ifndef __XEN_PUBLIC_IO_CONSOLE_H__
+#define __XEN_PUBLIC_IO_CONSOLE_H__
+
+typedef uint32_t XENCONS_RING_IDX;
+
+#define MASK_XENCONS_IDX(idx, ring) ((idx) & (sizeof(ring)-1))
+
+struct xencons_interface {
+ char in[1024];
+ char out[2048];
+ XENCONS_RING_IDX in_cons, in_prod;
+ XENCONS_RING_IDX out_cons, out_prod;
+};
+
+#ifdef XEN_WANT_FLEX_CONSOLE_RING
+#include "ring.h"
+DEFINE_XEN_FLEX_RING(xencons);
+#endif
+
+#endif /* __XEN_PUBLIC_IO_CONSOLE_H__ */
diff --git a/include/hw/xen/interface/io/fbif.h b/include/hw/xen/interface/io/fbif.h
new file mode 100644
index 0000000000..ea87ebec0a
--- /dev/null
+++ b/include/hw/xen/interface/io/fbif.h
@@ -0,0 +1,156 @@
+/*
+ * fbif.h -- Xen virtual frame buffer device
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com>
+ * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
+ */
+
+#ifndef __XEN_PUBLIC_IO_FBIF_H__
+#define __XEN_PUBLIC_IO_FBIF_H__
+
+/* Out events (frontend -> backend) */
+
+/*
+ * Out events may be sent only when requested by backend, and receipt
+ * of an unknown out event is an error.
+ */
+
+/* Event type 1 currently not used */
+/*
+ * Framebuffer update notification event
+ * Capable frontend sets feature-update in xenstore.
+ * Backend requests it by setting request-update in xenstore.
+ */
+#define XENFB_TYPE_UPDATE 2
+
+struct xenfb_update
+{
+ uint8_t type; /* XENFB_TYPE_UPDATE */
+ int32_t x; /* source x */
+ int32_t y; /* source y */
+ int32_t width; /* rect width */
+ int32_t height; /* rect height */
+};
+
+/*
+ * Framebuffer resize notification event
+ * Capable backend sets feature-resize in xenstore.
+ */
+#define XENFB_TYPE_RESIZE 3
+
+struct xenfb_resize
+{
+ uint8_t type; /* XENFB_TYPE_RESIZE */
+ int32_t width; /* width in pixels */
+ int32_t height; /* height in pixels */
+ int32_t stride; /* stride in bytes */
+ int32_t depth; /* depth in bits */
+ int32_t offset; /* offset of the framebuffer in bytes */
+};
+
+#define XENFB_OUT_EVENT_SIZE 40
+
+union xenfb_out_event
+{
+ uint8_t type;
+ struct xenfb_update update;
+ struct xenfb_resize resize;
+ char pad[XENFB_OUT_EVENT_SIZE];
+};
+
+/* In events (backend -> frontend) */
+
+/*
+ * Frontends should ignore unknown in events.
+ */
+
+/*
+ * Framebuffer refresh period advice
+ * Backend sends it to advise the frontend their preferred period of
+ * refresh. Frontends that keep the framebuffer constantly up-to-date
+ * just ignore it. Frontends that use the advice should immediately
+ * refresh the framebuffer (and send an update notification event if
+ * those have been requested), then use the update frequency to guide
+ * their periodical refreshs.
+ */
+#define XENFB_TYPE_REFRESH_PERIOD 1
+#define XENFB_NO_REFRESH 0
+
+struct xenfb_refresh_period
+{
+ uint8_t type; /* XENFB_TYPE_UPDATE_PERIOD */
+ uint32_t period; /* period of refresh, in ms,
+ * XENFB_NO_REFRESH if no refresh is needed */
+};
+
+#define XENFB_IN_EVENT_SIZE 40
+
+union xenfb_in_event
+{
+ uint8_t type;
+ struct xenfb_refresh_period refresh_period;
+ char pad[XENFB_IN_EVENT_SIZE];
+};
+
+/* shared page */
+
+#define XENFB_IN_RING_SIZE 1024
+#define XENFB_IN_RING_LEN (XENFB_IN_RING_SIZE / XENFB_IN_EVENT_SIZE)
+#define XENFB_IN_RING_OFFS 1024
+#define XENFB_IN_RING(page) \
+ ((union xenfb_in_event *)((char *)(page) + XENFB_IN_RING_OFFS))
+#define XENFB_IN_RING_REF(page, idx) \
+ (XENFB_IN_RING((page))[(idx) % XENFB_IN_RING_LEN])
+
+#define XENFB_OUT_RING_SIZE 2048
+#define XENFB_OUT_RING_LEN (XENFB_OUT_RING_SIZE / XENFB_OUT_EVENT_SIZE)
+#define XENFB_OUT_RING_OFFS (XENFB_IN_RING_OFFS + XENFB_IN_RING_SIZE)
+#define XENFB_OUT_RING(page) \
+ ((union xenfb_out_event *)((char *)(page) + XENFB_OUT_RING_OFFS))
+#define XENFB_OUT_RING_REF(page, idx) \
+ (XENFB_OUT_RING((page))[(idx) % XENFB_OUT_RING_LEN])
+
+struct xenfb_page
+{
+ uint32_t in_cons, in_prod;
+ uint32_t out_cons, out_prod;
+
+ int32_t width; /* the width of the framebuffer (in pixels) */
+ int32_t height; /* the height of the framebuffer (in pixels) */
+ uint32_t line_length; /* the length of a row of pixels (in bytes) */
+ uint32_t mem_length; /* the length of the framebuffer (in bytes) */
+ uint8_t depth; /* the depth of a pixel (in bits) */
+
+ /*
+ * Framebuffer page directory
+ *
+ * Each directory page holds PAGE_SIZE / sizeof(*pd)
+ * framebuffer pages, and can thus map up to PAGE_SIZE *
+ * PAGE_SIZE / sizeof(*pd) bytes. With PAGE_SIZE == 4096 and
+ * sizeof(unsigned long) == 4/8, that's 4 Megs 32 bit and 2 Megs
+ * 64 bit. 256 directories give enough room for a 512 Meg
+ * framebuffer with a max resolution of 12,800x10,240. Should
+ * be enough for a while with room leftover for expansion.
+ */
+ unsigned long pd[256];
+};
+
+#endif
diff --git a/include/hw/xen/interface/io/kbdif.h b/include/hw/xen/interface/io/kbdif.h
new file mode 100644
index 0000000000..1d68cd458e
--- /dev/null
+++ b/include/hw/xen/interface/io/kbdif.h
@@ -0,0 +1,566 @@
+/*
+ * kbdif.h -- Xen virtual keyboard/mouse
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com>
+ * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
+ */
+
+#ifndef __XEN_PUBLIC_IO_KBDIF_H__
+#define __XEN_PUBLIC_IO_KBDIF_H__
+
+/*
+ *****************************************************************************
+ * Feature and Parameter Negotiation
+ *****************************************************************************
+ *
+ * The two halves of a para-virtual driver utilize nodes within
+ * XenStore to communicate capabilities and to negotiate operating parameters.
+ * This section enumerates these nodes which reside in the respective front and
+ * backend portions of XenStore, following XenBus convention.
+ *
+ * All data in XenStore is stored as strings. Nodes specifying numeric
+ * values are encoded in decimal. Integer value ranges listed below are
+ * expressed as fixed sized integer types capable of storing the conversion
+ * of a properly formated node string, without loss of information.
+ *
+ *****************************************************************************
+ * Backend XenBus Nodes
+ *****************************************************************************
+ *
+ *---------------------------- Features supported ----------------------------
+ *
+ * Capable backend advertises supported features by publishing
+ * corresponding entries in XenStore and puts 1 as the value of the entry.
+ * If a feature is not supported then 0 must be set or feature entry omitted.
+ *
+ * feature-disable-keyboard
+ * Values: <uint>
+ *
+ * If there is no need to expose a virtual keyboard device by the
+ * frontend then this must be set to 1.
+ *
+ * feature-disable-pointer
+ * Values: <uint>
+ *
+ * If there is no need to expose a virtual pointer device by the
+ * frontend then this must be set to 1.
+ *
+ * feature-abs-pointer
+ * Values: <uint>
+ *
+ * Backends, which support reporting of absolute coordinates for pointer
+ * device should set this to 1.
+ *
+ * feature-multi-touch
+ * Values: <uint>
+ *
+ * Backends, which support reporting of multi-touch events
+ * should set this to 1.
+ *
+ * feature-raw-pointer
+ * Values: <uint>
+ *
+ * Backends, which support reporting raw (unscaled) absolute coordinates
+ * for pointer devices should set this to 1. Raw (unscaled) values have
+ * a range of [0, 0x7fff].
+ *
+ *----------------------- Device Instance Parameters ------------------------
+ *
+ * unique-id
+ * Values: <string>
+ *
+ * After device instance initialization it is assigned a unique ID,
+ * so every instance of the frontend can be identified by the backend
+ * by this ID. This can be UUID or such.
+ *
+ *------------------------- Pointer Device Parameters ------------------------
+ *
+ * width
+ * Values: <uint>
+ *
+ * Maximum X coordinate (width) to be used by the frontend
+ * while reporting input events, pixels, [0; UINT32_MAX].
+ *
+ * height
+ * Values: <uint>
+ *
+ * Maximum Y coordinate (height) to be used by the frontend
+ * while reporting input events, pixels, [0; UINT32_MAX].
+ *
+ *----------------------- Multi-touch Device Parameters ----------------------
+ *
+ * multi-touch-num-contacts
+ * Values: <uint>
+ *
+ * Number of simultaneous touches reported.
+ *
+ * multi-touch-width
+ * Values: <uint>
+ *
+ * Width of the touch area to be used by the frontend
+ * while reporting input events, pixels, [0; UINT32_MAX].
+ *
+ * multi-touch-height
+ * Values: <uint>
+ *
+ * Height of the touch area to be used by the frontend
+ * while reporting input events, pixels, [0; UINT32_MAX].
+ *
+ *****************************************************************************
+ * Frontend XenBus Nodes
+ *****************************************************************************
+ *
+ *------------------------------ Feature request -----------------------------
+ *
+ * Capable frontend requests features from backend via setting corresponding
+ * entries to 1 in XenStore. Requests for features not advertised as supported
+ * by the backend have no effect.
+ *
+ * request-abs-pointer
+ * Values: <uint>
+ *
+ * Request backend to report absolute pointer coordinates
+ * (XENKBD_TYPE_POS) instead of relative ones (XENKBD_TYPE_MOTION).
+ *
+ * request-multi-touch
+ * Values: <uint>
+ *
+ * Request backend to report multi-touch events.
+ *
+ * request-raw-pointer
+ * Values: <uint>
+ *
+ * Request backend to report raw unscaled absolute pointer coordinates.
+ * This option is only valid if request-abs-pointer is also set.
+ * Raw unscaled coordinates have the range [0, 0x7fff]
+ *
+ *----------------------- Request Transport Parameters -----------------------
+ *
+ * event-channel
+ * Values: <uint>
+ *
+ * The identifier of the Xen event channel used to signal activity
+ * in the ring buffer.
+ *
+ * page-gref
+ * Values: <uint>
+ *
+ * The Xen grant reference granting permission for the backend to map
+ * a sole page in a single page sized event ring buffer.
+ *
+ * page-ref
+ * Values: <uint>
+ *
+ * OBSOLETE, not recommended for use.
+ * PFN of the shared page.
+ */
+
+/*
+ * EVENT CODES.
+ */
+
+#define XENKBD_TYPE_MOTION 1
+#define XENKBD_TYPE_RESERVED 2
+#define XENKBD_TYPE_KEY 3
+#define XENKBD_TYPE_POS 4
+#define XENKBD_TYPE_MTOUCH 5
+
+/* Multi-touch event sub-codes */
+
+#define XENKBD_MT_EV_DOWN 0
+#define XENKBD_MT_EV_UP 1
+#define XENKBD_MT_EV_MOTION 2
+#define XENKBD_MT_EV_SYN 3
+#define XENKBD_MT_EV_SHAPE 4
+#define XENKBD_MT_EV_ORIENT 5
+
+/*
+ * CONSTANTS, XENSTORE FIELD AND PATH NAME STRINGS, HELPERS.
+ */
+
+#define XENKBD_DRIVER_NAME "vkbd"
+
+#define XENKBD_FIELD_FEAT_DSBL_KEYBRD "feature-disable-keyboard"
+#define XENKBD_FIELD_FEAT_DSBL_POINTER "feature-disable-pointer"
+#define XENKBD_FIELD_FEAT_ABS_POINTER "feature-abs-pointer"
+#define XENKBD_FIELD_FEAT_RAW_POINTER "feature-raw-pointer"
+#define XENKBD_FIELD_FEAT_MTOUCH "feature-multi-touch"
+#define XENKBD_FIELD_REQ_ABS_POINTER "request-abs-pointer"
+#define XENKBD_FIELD_REQ_RAW_POINTER "request-raw-pointer"
+#define XENKBD_FIELD_REQ_MTOUCH "request-multi-touch"
+#define XENKBD_FIELD_RING_GREF "page-gref"
+#define XENKBD_FIELD_EVT_CHANNEL "event-channel"
+#define XENKBD_FIELD_WIDTH "width"
+#define XENKBD_FIELD_HEIGHT "height"
+#define XENKBD_FIELD_MT_WIDTH "multi-touch-width"
+#define XENKBD_FIELD_MT_HEIGHT "multi-touch-height"
+#define XENKBD_FIELD_MT_NUM_CONTACTS "multi-touch-num-contacts"
+#define XENKBD_FIELD_UNIQUE_ID "unique-id"
+
+/* OBSOLETE, not recommended for use */
+#define XENKBD_FIELD_RING_REF "page-ref"
+
+/*
+ *****************************************************************************
+ * Description of the protocol between frontend and backend driver.
+ *****************************************************************************
+ *
+ * The two halves of a Para-virtual driver communicate with
+ * each other using a shared page and an event channel.
+ * Shared page contains a ring with event structures.
+ *
+ * All reserved fields in the structures below must be 0.
+ *
+ *****************************************************************************
+ * Backend to frontend events
+ *****************************************************************************
+ *
+ * Frontends should ignore unknown in events.
+ * All event packets have the same length (40 octets)
+ * All event packets have common header:
+ *
+ * 0 octet
+ * +-----------------+
+ * | type |
+ * +-----------------+
+ * type - uint8_t, event code, XENKBD_TYPE_???
+ *
+ *
+ * Pointer relative movement event
+ * 0 1 2 3 octet
+ * +----------------+----------------+----------------+----------------+
+ * | _TYPE_MOTION | reserved | 4
+ * +----------------+----------------+----------------+----------------+
+ * | rel_x | 8
+ * +----------------+----------------+----------------+----------------+
+ * | rel_y | 12
+ * +----------------+----------------+----------------+----------------+
+ * | rel_z | 16
+ * +----------------+----------------+----------------+----------------+
+ * | reserved | 20
+ * +----------------+----------------+----------------+----------------+
+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
+ * +----------------+----------------+----------------+----------------+
+ * | reserved | 40
+ * +----------------+----------------+----------------+----------------+
+ *
+ * rel_x - int32_t, relative X motion
+ * rel_y - int32_t, relative Y motion
+ * rel_z - int32_t, relative Z motion (wheel)
+ */
+
+struct xenkbd_motion
+{
+ uint8_t type;
+ int32_t rel_x;
+ int32_t rel_y;
+ int32_t rel_z;
+};
+
+/*
+ * Key event (includes pointer buttons)
+ * 0 1 2 3 octet
+ * +----------------+----------------+----------------+----------------+
+ * | _TYPE_KEY | pressed | reserved | 4
+ * +----------------+----------------+----------------+----------------+
+ * | keycode | 8
+ * +----------------+----------------+----------------+----------------+
+ * | reserved | 12
+ * +----------------+----------------+----------------+----------------+
+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
+ * +----------------+----------------+----------------+----------------+
+ * | reserved | 40
+ * +----------------+----------------+----------------+----------------+
+ *
+ * pressed - uint8_t, 1 if pressed; 0 otherwise
+ * keycode - uint32_t, KEY_* from linux/input.h
+ */
+
+struct xenkbd_key
+{
+ uint8_t type;
+ uint8_t pressed;
+ uint32_t keycode;
+};
+
+/*
+ * Pointer absolute position event
+ * 0 1 2 3 octet
+ * +----------------+----------------+----------------+----------------+
+ * | _TYPE_POS | reserved | 4
+ * +----------------+----------------+----------------+----------------+
+ * | abs_x | 8
+ * +----------------+----------------+----------------+----------------+
+ * | abs_y | 12
+ * +----------------+----------------+----------------+----------------+
+ * | rel_z | 16
+ * +----------------+----------------+----------------+----------------+
+ * | reserved | 20
+ * +----------------+----------------+----------------+----------------+
+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
+ * +----------------+----------------+----------------+----------------+
+ * | reserved | 40
+ * +----------------+----------------+----------------+----------------+
+ *
+ * abs_x - int32_t, absolute X position (in FB pixels)
+ * abs_y - int32_t, absolute Y position (in FB pixels)
+ * rel_z - int32_t, relative Z motion (wheel)
+ */
+
+struct xenkbd_position
+{
+ uint8_t type;
+ int32_t abs_x;
+ int32_t abs_y;
+ int32_t rel_z;
+};
+
+/*
+ * Multi-touch event and its sub-types
+ *
+ * All multi-touch event packets have common header:
+ *
+ * 0 1 2 3 octet
+ * +----------------+----------------+----------------+----------------+
+ * | _TYPE_MTOUCH | event_type | contact_id | reserved | 4
+ * +----------------+----------------+----------------+----------------+
+ * | reserved | 8
+ * +----------------+----------------+----------------+----------------+
+ *
+ * event_type - unt8_t, multi-touch event sub-type, XENKBD_MT_EV_???
+ * contact_id - unt8_t, ID of the contact
+ *
+ * Touch interactions can consist of one or more contacts.
+ * For each contact, a series of events is generated, starting
+ * with a down event, followed by zero or more motion events,
+ * and ending with an up event. Events relating to the same
+ * contact point can be identified by the ID of the sequence: contact ID.
+ * Contact ID may be reused after XENKBD_MT_EV_UP event and
+ * is in the [0; XENKBD_FIELD_NUM_CONTACTS - 1] range.
+ *
+ * For further information please refer to documentation on Wayland [1],
+ * Linux [2] and Windows [3] multi-touch support.
+ *
+ * [1] https://cgit.freedesktop.org/wayland/wayland/tree/protocol/wayland.xml
+ * [2] https://www.kernel.org/doc/Documentation/input/multi-touch-protocol.txt
+ * [3] https://msdn.microsoft.com/en-us/library/jj151564(v=vs.85).aspx
+ *
+ *
+ * Multi-touch down event - sent when a new touch is made: touch is assigned
+ * a unique contact ID, sent with this and consequent events related
+ * to this touch.
+ * 0 1 2 3 octet
+ * +----------------+----------------+----------------+----------------+
+ * | _TYPE_MTOUCH | _MT_EV_DOWN | contact_id | reserved | 4
+ * +----------------+----------------+----------------+----------------+
+ * | reserved | 8
+ * +----------------+----------------+----------------+----------------+
+ * | abs_x | 12
+ * +----------------+----------------+----------------+----------------+
+ * | abs_y | 16
+ * +----------------+----------------+----------------+----------------+
+ * | reserved | 20
+ * +----------------+----------------+----------------+----------------+
+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
+ * +----------------+----------------+----------------+----------------+
+ * | reserved | 40
+ * +----------------+----------------+----------------+----------------+
+ *
+ * abs_x - int32_t, absolute X position, in pixels
+ * abs_y - int32_t, absolute Y position, in pixels
+ *
+ * Multi-touch contact release event
+ * 0 1 2 3 octet
+ * +----------------+----------------+----------------+----------------+
+ * | _TYPE_MTOUCH | _MT_EV_UP | contact_id | reserved | 4
+ * +----------------+----------------+----------------+----------------+
+ * | reserved | 8
+ * +----------------+----------------+----------------+----------------+
+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
+ * +----------------+----------------+----------------+----------------+
+ * | reserved | 40
+ * +----------------+----------------+----------------+----------------+
+ *
+ * Multi-touch motion event
+ * 0 1 2 3 octet
+ * +----------------+----------------+----------------+----------------+
+ * | _TYPE_MTOUCH | _MT_EV_MOTION | contact_id | reserved | 4
+ * +----------------+----------------+----------------+----------------+
+ * | reserved | 8
+ * +----------------+----------------+----------------+----------------+
+ * | abs_x | 12
+ * +----------------+----------------+----------------+----------------+
+ * | abs_y | 16
+ * +----------------+----------------+----------------+----------------+
+ * | reserved | 20
+ * +----------------+----------------+----------------+----------------+
+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
+ * +----------------+----------------+----------------+----------------+
+ * | reserved | 40
+ * +----------------+----------------+----------------+----------------+
+ *
+ * abs_x - int32_t, absolute X position, in pixels,
+ * abs_y - int32_t, absolute Y position, in pixels,
+ *
+ * Multi-touch input synchronization event - shows end of a set of events
+ * which logically belong together.
+ * 0 1 2 3 octet
+ * +----------------+----------------+----------------+----------------+
+ * | _TYPE_MTOUCH | _MT_EV_SYN | contact_id | reserved | 4
+ * +----------------+----------------+----------------+----------------+
+ * | reserved | 8
+ * +----------------+----------------+----------------+----------------+
+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
+ * +----------------+----------------+----------------+----------------+
+ * | reserved | 40
+ * +----------------+----------------+----------------+----------------+
+ *
+ * Multi-touch shape event - touch point's shape has changed its shape.
+ * Shape is approximated by an ellipse through the major and minor axis
+ * lengths: major is the longer diameter of the ellipse and minor is the
+ * shorter one. Center of the ellipse is reported via
+ * XENKBD_MT_EV_DOWN/XENKBD_MT_EV_MOTION events.
+ * 0 1 2 3 octet
+ * +----------------+----------------+----------------+----------------+
+ * | _TYPE_MTOUCH | _MT_EV_SHAPE | contact_id | reserved | 4
+ * +----------------+----------------+----------------+----------------+
+ * | reserved | 8
+ * +----------------+----------------+----------------+----------------+
+ * | major | 12
+ * +----------------+----------------+----------------+----------------+
+ * | minor | 16
+ * +----------------+----------------+----------------+----------------+
+ * | reserved | 20
+ * +----------------+----------------+----------------+----------------+
+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
+ * +----------------+----------------+----------------+----------------+
+ * | reserved | 40
+ * +----------------+----------------+----------------+----------------+
+ *
+ * major - unt32_t, length of the major axis, pixels
+ * minor - unt32_t, length of the minor axis, pixels
+ *
+ * Multi-touch orientation event - touch point's shape has changed
+ * its orientation: calculated as a clockwise angle between the major axis
+ * of the ellipse and positive Y axis in degrees, [-180; +180].
+ * 0 1 2 3 octet
+ * +----------------+----------------+----------------+----------------+
+ * | _TYPE_MTOUCH | _MT_EV_ORIENT | contact_id | reserved | 4
+ * +----------------+----------------+----------------+----------------+
+ * | reserved | 8
+ * +----------------+----------------+----------------+----------------+
+ * | orientation | reserved | 12
+ * +----------------+----------------+----------------+----------------+
+ * | reserved | 16
+ * +----------------+----------------+----------------+----------------+
+ * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
+ * +----------------+----------------+----------------+----------------+
+ * | reserved | 40
+ * +----------------+----------------+----------------+----------------+
+ *
+ * orientation - int16_t, clockwise angle of the major axis
+ */
+
+struct xenkbd_mtouch {
+ uint8_t type; /* XENKBD_TYPE_MTOUCH */
+ uint8_t event_type; /* XENKBD_MT_EV_??? */
+ uint8_t contact_id;
+ uint8_t reserved[5]; /* reserved for the future use */
+ union {
+ struct {
+ int32_t abs_x; /* absolute X position, pixels */
+ int32_t abs_y; /* absolute Y position, pixels */
+ } pos;
+ struct {
+ uint32_t major; /* length of the major axis, pixels */
+ uint32_t minor; /* length of the minor axis, pixels */
+ } shape;
+ int16_t orientation; /* clockwise angle of the major axis */
+ } u;
+};
+
+#define XENKBD_IN_EVENT_SIZE 40
+
+union xenkbd_in_event
+{
+ uint8_t type;
+ struct xenkbd_motion motion;
+ struct xenkbd_key key;
+ struct xenkbd_position pos;
+ struct xenkbd_mtouch mtouch;
+ char pad[XENKBD_IN_EVENT_SIZE];
+};
+
+/*
+ *****************************************************************************
+ * Frontend to backend events
+ *****************************************************************************
+ *
+ * Out events may be sent only when requested by backend, and receipt
+ * of an unknown out event is an error.
+ * No out events currently defined.
+
+ * All event packets have the same length (40 octets)
+ * All event packets have common header:
+ * 0 octet
+ * +-----------------+
+ * | type |
+ * +-----------------+
+ * type - uint8_t, event code
+ */
+
+#define XENKBD_OUT_EVENT_SIZE 40
+
+union xenkbd_out_event
+{
+ uint8_t type;
+ char pad[XENKBD_OUT_EVENT_SIZE];
+};
+
+/*
+ *****************************************************************************
+ * Shared page
+ *****************************************************************************
+ */
+
+#define XENKBD_IN_RING_SIZE 2048
+#define XENKBD_IN_RING_LEN (XENKBD_IN_RING_SIZE / XENKBD_IN_EVENT_SIZE)
+#define XENKBD_IN_RING_OFFS 1024
+#define XENKBD_IN_RING(page) \
+ ((union xenkbd_in_event *)((char *)(page) + XENKBD_IN_RING_OFFS))
+#define XENKBD_IN_RING_REF(page, idx) \
+ (XENKBD_IN_RING((page))[(idx) % XENKBD_IN_RING_LEN])
+
+#define XENKBD_OUT_RING_SIZE 1024
+#define XENKBD_OUT_RING_LEN (XENKBD_OUT_RING_SIZE / XENKBD_OUT_EVENT_SIZE)
+#define XENKBD_OUT_RING_OFFS (XENKBD_IN_RING_OFFS + XENKBD_IN_RING_SIZE)
+#define XENKBD_OUT_RING(page) \
+ ((union xenkbd_out_event *)((char *)(page) + XENKBD_OUT_RING_OFFS))
+#define XENKBD_OUT_RING_REF(page, idx) \
+ (XENKBD_OUT_RING((page))[(idx) % XENKBD_OUT_RING_LEN])
+
+struct xenkbd_page
+{
+ uint32_t in_cons, in_prod;
+ uint32_t out_cons, out_prod;
+};
+
+#endif /* __XEN_PUBLIC_IO_KBDIF_H__ */
diff --git a/include/hw/xen/interface/io/netif.h b/include/hw/xen/interface/io/netif.h
new file mode 100644
index 0000000000..48fa530950
--- /dev/null
+++ b/include/hw/xen/interface/io/netif.h
@@ -0,0 +1,1010 @@
+/******************************************************************************
+ * netif.h
+ *
+ * Unified network-device I/O interface for Xen guest OSes.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2003-2004, Keir Fraser
+ */
+
+#ifndef __XEN_PUBLIC_IO_NETIF_H__
+#define __XEN_PUBLIC_IO_NETIF_H__
+
+#include "ring.h"
+#include "../grant_table.h"
+
+/*
+ * Older implementation of Xen network frontend / backend has an
+ * implicit dependency on the MAX_SKB_FRAGS as the maximum number of
+ * ring slots a skb can use. Netfront / netback may not work as
+ * expected when frontend and backend have different MAX_SKB_FRAGS.
+ *
+ * A better approach is to add mechanism for netfront / netback to
+ * negotiate this value. However we cannot fix all possible
+ * frontends, so we need to define a value which states the minimum
+ * slots backend must support.
+ *
+ * The minimum value derives from older Linux kernel's MAX_SKB_FRAGS
+ * (18), which is proved to work with most frontends. Any new backend
+ * which doesn't negotiate with frontend should expect frontend to
+ * send a valid packet using slots up to this value.
+ */
+#define XEN_NETIF_NR_SLOTS_MIN 18
+
+/*
+ * Notifications after enqueuing any type of message should be conditional on
+ * the appropriate req_event or rsp_event field in the shared ring.
+ * If the client sends notification for rx requests then it should specify
+ * feature 'feature-rx-notify' via xenbus. Otherwise the backend will assume
+ * that it cannot safely queue packets (as it may not be kicked to send them).
+ */
+
+/*
+ * "feature-split-event-channels" is introduced to separate guest TX
+ * and RX notification. Backend either doesn't support this feature or
+ * advertises it via xenstore as 0 (disabled) or 1 (enabled).
+ *
+ * To make use of this feature, frontend should allocate two event
+ * channels for TX and RX, advertise them to backend as
+ * "event-channel-tx" and "event-channel-rx" respectively. If frontend
+ * doesn't want to use this feature, it just writes "event-channel"
+ * node as before.
+ */
+
+/*
+ * Multiple transmit and receive queues:
+ * If supported, the backend will write the key "multi-queue-max-queues" to
+ * the directory for that vif, and set its value to the maximum supported
+ * number of queues.
+ * Frontends that are aware of this feature and wish to use it can write the
+ * key "multi-queue-num-queues", set to the number they wish to use, which
+ * must be greater than zero, and no more than the value reported by the backend
+ * in "multi-queue-max-queues".
+ *
+ * Queues replicate the shared rings and event channels.
+ * "feature-split-event-channels" may optionally be used when using
+ * multiple queues, but is not mandatory.
+ *
+ * Each queue consists of one shared ring pair, i.e. there must be the same
+ * number of tx and rx rings.
+ *
+ * For frontends requesting just one queue, the usual event-channel and
+ * ring-ref keys are written as before, simplifying the backend processing
+ * to avoid distinguishing between a frontend that doesn't understand the
+ * multi-queue feature, and one that does, but requested only one queue.
+ *
+ * Frontends requesting two or more queues must not write the toplevel
+ * event-channel (or event-channel-{tx,rx}) and {tx,rx}-ring-ref keys,
+ * instead writing those keys under sub-keys having the name "queue-N" where
+ * N is the integer ID of the queue for which those keys belong. Queues
+ * are indexed from zero. For example, a frontend with two queues and split
+ * event channels must write the following set of queue-related keys:
+ *
+ * /local/domain/1/device/vif/0/multi-queue-num-queues = "2"
+ * /local/domain/1/device/vif/0/queue-0 = ""
+ * /local/domain/1/device/vif/0/queue-0/tx-ring-ref = "<ring-ref-tx0>"
+ * /local/domain/1/device/vif/0/queue-0/rx-ring-ref = "<ring-ref-rx0>"
+ * /local/domain/1/device/vif/0/queue-0/event-channel-tx = "<evtchn-tx0>"
+ * /local/domain/1/device/vif/0/queue-0/event-channel-rx = "<evtchn-rx0>"
+ * /local/domain/1/device/vif/0/queue-1 = ""
+ * /local/domain/1/device/vif/0/queue-1/tx-ring-ref = "<ring-ref-tx1>"
+ * /local/domain/1/device/vif/0/queue-1/rx-ring-ref = "<ring-ref-rx1"
+ * /local/domain/1/device/vif/0/queue-1/event-channel-tx = "<evtchn-tx1>"
+ * /local/domain/1/device/vif/0/queue-1/event-channel-rx = "<evtchn-rx1>"
+ *
+ * If there is any inconsistency in the XenStore data, the backend may
+ * choose not to connect any queues, instead treating the request as an
+ * error. This includes scenarios where more (or fewer) queues were
+ * requested than the frontend provided details for.
+ *
+ * Mapping of packets to queues is considered to be a function of the
+ * transmitting system (backend or frontend) and is not negotiated
+ * between the two. Guests are free to transmit packets on any queue
+ * they choose, provided it has been set up correctly. Guests must be
+ * prepared to receive packets on any queue they have requested be set up.
+ */
+
+/*
+ * "feature-no-csum-offload" should be used to turn IPv4 TCP/UDP checksum
+ * offload off or on. If it is missing then the feature is assumed to be on.
+ * "feature-ipv6-csum-offload" should be used to turn IPv6 TCP/UDP checksum
+ * offload on or off. If it is missing then the feature is assumed to be off.
+ */
+
+/*
+ * "feature-gso-tcpv4" and "feature-gso-tcpv6" advertise the capability to
+ * handle large TCP packets (in IPv4 or IPv6 form respectively). Neither
+ * frontends nor backends are assumed to be capable unless the flags are
+ * present.
+ */
+
+/*
+ * "feature-multicast-control" and "feature-dynamic-multicast-control"
+ * advertise the capability to filter ethernet multicast packets in the
+ * backend. If the frontend wishes to take advantage of this feature then
+ * it may set "request-multicast-control". If the backend only advertises
+ * "feature-multicast-control" then "request-multicast-control" must be set
+ * before the frontend moves into the connected state. The backend will
+ * sample the value on this state transition and any subsequent change in
+ * value will have no effect. However, if the backend also advertises
+ * "feature-dynamic-multicast-control" then "request-multicast-control"
+ * may be set by the frontend at any time. In this case, the backend will
+ * watch the value and re-sample on watch events.
+ *
+ * If the sampled value of "request-multicast-control" is set then the
+ * backend transmit side should no longer flood multicast packets to the
+ * frontend, it should instead drop any multicast packet that does not
+ * match in a filter list.
+ * The list is amended by the frontend by sending dummy transmit requests
+ * containing XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL} extra-info fragments as
+ * specified below.
+ * Note that the filter list may be amended even if the sampled value of
+ * "request-multicast-control" is not set, however the filter should only
+ * be applied if it is set.
+ */
+
+/*
+ * Control ring
+ * ============
+ *
+ * Some features, such as hashing (detailed below), require a
+ * significant amount of out-of-band data to be passed from frontend to
+ * backend. Use of xenstore is not suitable for large quantities of data
+ * because of quota limitations and so a dedicated 'control ring' is used.
+ * The ability of the backend to use a control ring is advertised by
+ * setting:
+ *
+ * /local/domain/X/backend/<domid>/<vif>/feature-ctrl-ring = "1"
+ *
+ * The frontend provides a control ring to the backend by setting:
+ *
+ * /local/domain/<domid>/device/vif/<vif>/ctrl-ring-ref = <gref>
+ * /local/domain/<domid>/device/vif/<vif>/event-channel-ctrl = <port>
+ *
+ * where <gref> is the grant reference of the shared page used to
+ * implement the control ring and <port> is an event channel to be used
+ * as a mailbox interrupt. These keys must be set before the frontend
+ * moves into the connected state.
+ *
+ * The control ring uses a fixed request/response message size and is
+ * balanced (i.e. one request to one response), so operationally it is much
+ * the same as a transmit or receive ring.
+ * Note that there is no requirement that responses are issued in the same
+ * order as requests.
+ */
+
+/*
+ * Hash types
+ * ==========
+ *
+ * For the purposes of the definitions below, 'Packet[]' is an array of
+ * octets containing an IP packet without options, 'Array[X..Y]' means a
+ * sub-array of 'Array' containing bytes X thru Y inclusive, and '+' is
+ * used to indicate concatenation of arrays.
+ */
+
+/*
+ * A hash calculated over an IP version 4 header as follows:
+ *
+ * Buffer[0..8] = Packet[12..15] (source address) +
+ * Packet[16..19] (destination address)
+ *
+ * Result = Hash(Buffer, 8)
+ */
+#define _XEN_NETIF_CTRL_HASH_TYPE_IPV4 0
+#define XEN_NETIF_CTRL_HASH_TYPE_IPV4 \
+ (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV4)
+
+/*
+ * A hash calculated over an IP version 4 header and TCP header as
+ * follows:
+ *
+ * Buffer[0..12] = Packet[12..15] (source address) +
+ * Packet[16..19] (destination address) +
+ * Packet[20..21] (source port) +
+ * Packet[22..23] (destination port)
+ *
+ * Result = Hash(Buffer, 12)
+ */
+#define _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP 1
+#define XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP \
+ (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP)
+
+/*
+ * A hash calculated over an IP version 6 header as follows:
+ *
+ * Buffer[0..32] = Packet[8..23] (source address ) +
+ * Packet[24..39] (destination address)
+ *
+ * Result = Hash(Buffer, 32)
+ */
+#define _XEN_NETIF_CTRL_HASH_TYPE_IPV6 2
+#define XEN_NETIF_CTRL_HASH_TYPE_IPV6 \
+ (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV6)
+
+/*
+ * A hash calculated over an IP version 6 header and TCP header as
+ * follows:
+ *
+ * Buffer[0..36] = Packet[8..23] (source address) +
+ * Packet[24..39] (destination address) +
+ * Packet[40..41] (source port) +
+ * Packet[42..43] (destination port)
+ *
+ * Result = Hash(Buffer, 36)
+ */
+#define _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP 3
+#define XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP \
+ (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP)
+
+/*
+ * Hash algorithms
+ * ===============
+ */
+
+#define XEN_NETIF_CTRL_HASH_ALGORITHM_NONE 0
+
+/*
+ * Toeplitz hash:
+ */
+
+#define XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ 1
+
+/*
+ * Control requests (struct xen_netif_ctrl_request)
+ * ================================================
+ *
+ * All requests have the following format:
+ *
+ * 0 1 2 3 4 5 6 7 octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | id | type | data[0] |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | data[1] | data[2] |
+ * +-----+-----+-----+-----+-----------------------+
+ *
+ * id: the request identifier, echoed in response.
+ * type: the type of request (see below)
+ * data[]: any data associated with the request (determined by type)
+ */
+
+struct xen_netif_ctrl_request {
+ uint16_t id;
+ uint16_t type;
+
+#define XEN_NETIF_CTRL_TYPE_INVALID 0
+#define XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS 1
+#define XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS 2
+#define XEN_NETIF_CTRL_TYPE_SET_HASH_KEY 3
+#define XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE 4
+#define XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE 5
+#define XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING 6
+#define XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM 7
+#define XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE 8
+#define XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING 9
+#define XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING 10
+
+ uint32_t data[3];
+};
+
+/*
+ * Control responses (struct xen_netif_ctrl_response)
+ * ==================================================
+ *
+ * All responses have the following format:
+ *
+ * 0 1 2 3 4 5 6 7 octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | id | type | status |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | data |
+ * +-----+-----+-----+-----+
+ *
+ * id: the corresponding request identifier
+ * type: the type of the corresponding request
+ * status: the status of request processing
+ * data: any data associated with the response (determined by type and
+ * status)
+ */
+
+struct xen_netif_ctrl_response {
+ uint16_t id;
+ uint16_t type;
+ uint32_t status;
+
+#define XEN_NETIF_CTRL_STATUS_SUCCESS 0
+#define XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED 1
+#define XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER 2
+#define XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW 3
+
+ uint32_t data;
+};
+
+/*
+ * Static Grants (struct xen_netif_gref)
+ * =====================================
+ *
+ * A frontend may provide a fixed set of grant references to be mapped on
+ * the backend. The message of type XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING
+ * prior its usage in the command ring allows for creation of these mappings.
+ * The backend will maintain a fixed amount of these mappings.
+ *
+ * XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE lets a frontend query how many
+ * of these mappings can be kept.
+ *
+ * Each entry in the XEN_NETIF_CTRL_TYPE_{ADD,DEL}_GREF_MAPPING input table has
+ * the following format:
+ *
+ * 0 1 2 3 4 5 6 7 octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | grant ref | flags | status |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ *
+ * grant ref: grant reference (IN)
+ * flags: flags describing the control operation (IN)
+ * status: XEN_NETIF_CTRL_STATUS_* (OUT)
+ *
+ * 'status' is an output parameter which does not require to be set to zero
+ * prior to its usage in the corresponding control messages.
+ */
+
+struct xen_netif_gref {
+ grant_ref_t ref;
+ uint16_t flags;
+
+#define _XEN_NETIF_CTRLF_GREF_readonly 0
+#define XEN_NETIF_CTRLF_GREF_readonly (1U<<_XEN_NETIF_CTRLF_GREF_readonly)
+
+ uint16_t status;
+};
+
+/*
+ * Control messages
+ * ================
+ *
+ * XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM
+ * --------------------------------------
+ *
+ * This is sent by the frontend to set the desired hash algorithm.
+ *
+ * Request:
+ *
+ * type = XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM
+ * data[0] = a XEN_NETIF_CTRL_HASH_ALGORITHM_* value
+ * data[1] = 0
+ * data[2] = 0
+ *
+ * Response:
+ *
+ * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not
+ * supported
+ * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - The algorithm is not
+ * supported
+ * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful
+ *
+ * NOTE: Setting data[0] to XEN_NETIF_CTRL_HASH_ALGORITHM_NONE disables
+ * hashing and the backend is free to choose how it steers packets
+ * to queues (which is the default behaviour).
+ *
+ * XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS
+ * ----------------------------------
+ *
+ * This is sent by the frontend to query the types of hash supported by
+ * the backend.
+ *
+ * Request:
+ *
+ * type = XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS
+ * data[0] = 0
+ * data[1] = 0
+ * data[2] = 0
+ *
+ * Response:
+ *
+ * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not supported
+ * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful
+ * data = supported hash types (if operation was successful)
+ *
+ * NOTE: A valid hash algorithm must be selected before this operation can
+ * succeed.
+ *
+ * XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS
+ * ----------------------------------
+ *
+ * This is sent by the frontend to set the types of hash that the backend
+ * should calculate. (See above for hash type definitions).
+ * Note that the 'maximal' type of hash should always be chosen. For
+ * example, if the frontend sets both IPV4 and IPV4_TCP hash types then
+ * the latter hash type should be calculated for any TCP packet and the
+ * former only calculated for non-TCP packets.
+ *
+ * Request:
+ *
+ * type = XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS
+ * data[0] = bitwise OR of XEN_NETIF_CTRL_HASH_TYPE_* values
+ * data[1] = 0
+ * data[2] = 0
+ *
+ * Response:
+ *
+ * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not
+ * supported
+ * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - One or more flag
+ * value is invalid or
+ * unsupported
+ * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful
+ * data = 0
+ *
+ * NOTE: A valid hash algorithm must be selected before this operation can
+ * succeed.
+ * Also, setting data[0] to zero disables hashing and the backend
+ * is free to choose how it steers packets to queues.
+ *
+ * XEN_NETIF_CTRL_TYPE_SET_HASH_KEY
+ * --------------------------------
+ *
+ * This is sent by the frontend to set the key of the hash if the algorithm
+ * requires it. (See hash algorithms above).
+ *
+ * Request:
+ *
+ * type = XEN_NETIF_CTRL_TYPE_SET_HASH_KEY
+ * data[0] = grant reference of page containing the key (assumed to
+ * start at beginning of grant)
+ * data[1] = size of key in octets
+ * data[2] = 0
+ *
+ * Response:
+ *
+ * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not
+ * supported
+ * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Key size is invalid
+ * XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW - Key size is larger
+ * than the backend
+ * supports
+ * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful
+ * data = 0
+ *
+ * NOTE: Any key octets not specified are assumed to be zero (the key
+ * is assumed to be empty by default) and specifying a new key
+ * invalidates any previous key, hence specifying a key size of
+ * zero will clear the key (which ensures that the calculated hash
+ * will always be zero).
+ * The maximum size of key is algorithm and backend specific, but
+ * is also limited by the single grant reference.
+ * The grant reference may be read-only and must remain valid until
+ * the response has been processed.
+ *
+ * XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE
+ * -----------------------------------------
+ *
+ * This is sent by the frontend to query the maximum size of mapping
+ * table supported by the backend. The size is specified in terms of
+ * table entries.
+ *
+ * Request:
+ *
+ * type = XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE
+ * data[0] = 0
+ * data[1] = 0
+ * data[2] = 0
+ *
+ * Response:
+ *
+ * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not supported
+ * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful
+ * data = maximum number of entries allowed in the mapping table
+ * (if operation was successful) or zero if a mapping table is
+ * not supported (i.e. hash mapping is done only by modular
+ * arithmetic).
+ *
+ * XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE
+ * -------------------------------------
+ *
+ * This is sent by the frontend to set the actual size of the mapping
+ * table to be used by the backend. The size is specified in terms of
+ * table entries.
+ * Any previous table is invalidated by this message and any new table
+ * is assumed to be zero filled.
+ *
+ * Request:
+ *
+ * type = XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE
+ * data[0] = number of entries in mapping table
+ * data[1] = 0
+ * data[2] = 0
+ *
+ * Response:
+ *
+ * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not
+ * supported
+ * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Table size is invalid
+ * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful
+ * data = 0
+ *
+ * NOTE: Setting data[0] to 0 means that hash mapping should be done
+ * using modular arithmetic.
+ *
+ * XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING
+ * ------------------------------------
+ *
+ * This is sent by the frontend to set the content of the table mapping
+ * hash value to queue number. The backend should calculate the hash from
+ * the packet header, use it as an index into the table (modulo the size
+ * of the table) and then steer the packet to the queue number found at
+ * that index.
+ *
+ * Request:
+ *
+ * type = XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING
+ * data[0] = grant reference of page containing the mapping (sub-)table
+ * (assumed to start at beginning of grant)
+ * data[1] = size of (sub-)table in entries
+ * data[2] = offset, in entries, of sub-table within overall table
+ *
+ * Response:
+ *
+ * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not
+ * supported
+ * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Table size or content
+ * is invalid
+ * XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW - Table size is larger
+ * than the backend
+ * supports
+ * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful
+ * data = 0
+ *
+ * NOTE: The overall table has the following format:
+ *
+ * 0 1 2 3 4 5 6 7 octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | mapping[0] | mapping[1] |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | . |
+ * | . |
+ * | . |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | mapping[N-2] | mapping[N-1] |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ *
+ * where N is specified by a XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE
+ * message and each mapping must specifies a queue between 0 and
+ * "multi-queue-num-queues" (see above).
+ * The backend may support a mapping table larger than can be
+ * mapped by a single grant reference. Thus sub-tables within a
+ * larger table can be individually set by sending multiple messages
+ * with differing offset values. Specifying a new sub-table does not
+ * invalidate any table data outside that range.
+ * The grant reference may be read-only and must remain valid until
+ * the response has been processed.
+ *
+ * XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE
+ * -----------------------------------------
+ *
+ * This is sent by the frontend to fetch the number of grefs that can be kept
+ * mapped in the backend.
+ *
+ * Request:
+ *
+ * type = XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE
+ * data[0] = queue index (assumed 0 for single queue)
+ * data[1] = 0
+ * data[2] = 0
+ *
+ * Response:
+ *
+ * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not
+ * supported
+ * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - The queue index is
+ * out of range
+ * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful
+ * data = maximum number of entries allowed in the gref mapping table
+ * (if operation was successful) or zero if it is not supported.
+ *
+ * XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING
+ * ------------------------------------
+ *
+ * This is sent by the frontend for backend to map a list of grant
+ * references.
+ *
+ * Request:
+ *
+ * type = XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING
+ * data[0] = queue index
+ * data[1] = grant reference of page containing the mapping list
+ * (r/w and assumed to start at beginning of page)
+ * data[2] = size of list in entries
+ *
+ * Response:
+ *
+ * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not
+ * supported
+ * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Operation failed
+ * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful
+ *
+ * NOTE: Each entry in the input table has the format outlined
+ * in struct xen_netif_gref.
+ * Contrary to XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING, the struct
+ * xen_netif_gref 'status' field is not used and therefore the response
+ * 'status' determines the success of this operation. In case of
+ * failure none of grants mappings get added in the backend.
+ *
+ * XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING
+ * ------------------------------------
+ *
+ * This is sent by the frontend for backend to unmap a list of grant
+ * references.
+ *
+ * Request:
+ *
+ * type = XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING
+ * data[0] = queue index
+ * data[1] = grant reference of page containing the mapping list
+ * (r/w and assumed to start at beginning of page)
+ * data[2] = size of list in entries
+ *
+ * Response:
+ *
+ * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not
+ * supported
+ * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Operation failed
+ * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful
+ * data = number of entries that were unmapped
+ *
+ * NOTE: Each entry in the input table has the format outlined in struct
+ * xen_netif_gref.
+ * The struct xen_netif_gref 'status' field determines if the entry
+ * was successfully removed.
+ * The entries used are only the ones representing grant references that
+ * were previously the subject of a XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING
+ * operation. Any other entries will have their status set to
+ * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER upon completion.
+ */
+
+DEFINE_RING_TYPES(xen_netif_ctrl,
+ struct xen_netif_ctrl_request,
+ struct xen_netif_ctrl_response);
+
+/*
+ * Guest transmit
+ * ==============
+ *
+ * This is the 'wire' format for transmit (frontend -> backend) packets:
+ *
+ * Fragment 1: netif_tx_request_t - flags = NETTXF_*
+ * size = total packet size
+ * [Extra 1: netif_extra_info_t] - (only if fragment 1 flags include
+ * NETTXF_extra_info)
+ * ...
+ * [Extra N: netif_extra_info_t] - (only if extra N-1 flags include
+ * XEN_NETIF_EXTRA_MORE)
+ * ...
+ * Fragment N: netif_tx_request_t - (only if fragment N-1 flags include
+ * NETTXF_more_data - flags on preceding
+ * extras are not relevant here)
+ * flags = 0
+ * size = fragment size
+ *
+ * NOTE:
+ *
+ * This format slightly is different from that used for receive
+ * (backend -> frontend) packets. Specifically, in a multi-fragment
+ * packet the actual size of fragment 1 can only be determined by
+ * subtracting the sizes of fragments 2..N from the total packet size.
+ *
+ * Ring slot size is 12 octets, however not all request/response
+ * structs use the full size.
+ *
+ * tx request data (netif_tx_request_t)
+ * ------------------------------------
+ *
+ * 0 1 2 3 4 5 6 7 octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | grant ref | offset | flags |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | id | size |
+ * +-----+-----+-----+-----+
+ *
+ * grant ref: Reference to buffer page.
+ * offset: Offset within buffer page.
+ * flags: NETTXF_*.
+ * id: request identifier, echoed in response.
+ * size: packet size in bytes.
+ *
+ * tx response (netif_tx_response_t)
+ * ---------------------------------
+ *
+ * 0 1 2 3 4 5 6 7 octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | id | status | unused |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | unused |
+ * +-----+-----+-----+-----+
+ *
+ * id: reflects id in transmit request
+ * status: NETIF_RSP_*
+ *
+ * Guest receive
+ * =============
+ *
+ * This is the 'wire' format for receive (backend -> frontend) packets:
+ *
+ * Fragment 1: netif_rx_request_t - flags = NETRXF_*
+ * size = fragment size
+ * [Extra 1: netif_extra_info_t] - (only if fragment 1 flags include
+ * NETRXF_extra_info)
+ * ...
+ * [Extra N: netif_extra_info_t] - (only if extra N-1 flags include
+ * XEN_NETIF_EXTRA_MORE)
+ * ...
+ * Fragment N: netif_rx_request_t - (only if fragment N-1 flags include
+ * NETRXF_more_data - flags on preceding
+ * extras are not relevant here)
+ * flags = 0
+ * size = fragment size
+ *
+ * NOTE:
+ *
+ * This format slightly is different from that used for transmit
+ * (frontend -> backend) packets. Specifically, in a multi-fragment
+ * packet the size of the packet can only be determined by summing the
+ * sizes of fragments 1..N.
+ *
+ * Ring slot size is 8 octets.
+ *
+ * rx request (netif_rx_request_t)
+ * -------------------------------
+ *
+ * 0 1 2 3 4 5 6 7 octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | id | pad | gref |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ *
+ * id: request identifier, echoed in response.
+ * gref: reference to incoming granted frame.
+ *
+ * rx response (netif_rx_response_t)
+ * ---------------------------------
+ *
+ * 0 1 2 3 4 5 6 7 octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | id | offset | flags | status |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ *
+ * id: reflects id in receive request
+ * offset: offset in page of start of received packet
+ * flags: NETRXF_*
+ * status: -ve: NETIF_RSP_*; +ve: Rx'ed pkt size.
+ *
+ * NOTE: Historically, to support GSO on the frontend receive side, Linux
+ * netfront does not make use of the rx response id (because, as
+ * described below, extra info structures overlay the id field).
+ * Instead it assumes that responses always appear in the same ring
+ * slot as their corresponding request. Thus, to maintain
+ * compatibility, backends must make sure this is the case.
+ *
+ * Extra Info
+ * ==========
+ *
+ * Can be present if initial request or response has NET{T,R}XF_extra_info,
+ * or previous extra request has XEN_NETIF_EXTRA_MORE.
+ *
+ * The struct therefore needs to fit into either a tx or rx slot and
+ * is therefore limited to 8 octets.
+ *
+ * NOTE: Because extra info data overlays the usual request/response
+ * structures, there is no id information in the opposite direction.
+ * So, if an extra info overlays an rx response the frontend can
+ * assume that it is in the same ring slot as the request that was
+ * consumed to make the slot available, and the backend must ensure
+ * this assumption is true.
+ *
+ * extra info (netif_extra_info_t)
+ * -------------------------------
+ *
+ * General format:
+ *
+ * 0 1 2 3 4 5 6 7 octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * |type |flags| type specific data |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * | padding for tx |
+ * +-----+-----+-----+-----+
+ *
+ * type: XEN_NETIF_EXTRA_TYPE_*
+ * flags: XEN_NETIF_EXTRA_FLAG_*
+ * padding for tx: present only in the tx case due to 8 octet limit
+ * from rx case. Not shown in type specific entries
+ * below.
+ *
+ * XEN_NETIF_EXTRA_TYPE_GSO:
+ *
+ * 0 1 2 3 4 5 6 7 octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * |type |flags| size |type | pad | features |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ *
+ * type: Must be XEN_NETIF_EXTRA_TYPE_GSO
+ * flags: XEN_NETIF_EXTRA_FLAG_*
+ * size: Maximum payload size of each segment. For example,
+ * for TCP this is just the path MSS.
+ * type: XEN_NETIF_GSO_TYPE_*: This determines the protocol of
+ * the packet and any extra features required to segment the
+ * packet properly.
+ * features: EN_NETIF_GSO_FEAT_*: This specifies any extra GSO
+ * features required to process this packet, such as ECN
+ * support for TCPv4.
+ *
+ * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}:
+ *
+ * 0 1 2 3 4 5 6 7 octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * |type |flags| addr |
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ *
+ * type: Must be XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}
+ * flags: XEN_NETIF_EXTRA_FLAG_*
+ * addr: address to add/remove
+ *
+ * XEN_NETIF_EXTRA_TYPE_HASH:
+ *
+ * A backend that supports teoplitz hashing is assumed to accept
+ * this type of extra info in transmit packets.
+ * A frontend that enables hashing is assumed to accept
+ * this type of extra info in receive packets.
+ *
+ * 0 1 2 3 4 5 6 7 octet
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ * |type |flags|htype| alg |LSB ---- value ---- MSB|
+ * +-----+-----+-----+-----+-----+-----+-----+-----+
+ *
+ * type: Must be XEN_NETIF_EXTRA_TYPE_HASH
+ * flags: XEN_NETIF_EXTRA_FLAG_*
+ * htype: Hash type (one of _XEN_NETIF_CTRL_HASH_TYPE_* - see above)
+ * alg: The algorithm used to calculate the hash (one of
+ * XEN_NETIF_CTRL_HASH_TYPE_ALGORITHM_* - see above)
+ * value: Hash value
+ */
+
+/* Protocol checksum field is blank in the packet (hardware offload)? */
+#define _NETTXF_csum_blank (0)
+#define NETTXF_csum_blank (1U<<_NETTXF_csum_blank)
+
+/* Packet data has been validated against protocol checksum. */
+#define _NETTXF_data_validated (1)
+#define NETTXF_data_validated (1U<<_NETTXF_data_validated)
+
+/* Packet continues in the next request descriptor. */
+#define _NETTXF_more_data (2)
+#define NETTXF_more_data (1U<<_NETTXF_more_data)
+
+/* Packet to be followed by extra descriptor(s). */
+#define _NETTXF_extra_info (3)
+#define NETTXF_extra_info (1U<<_NETTXF_extra_info)
+
+#define XEN_NETIF_MAX_TX_SIZE 0xFFFF
+struct netif_tx_request {
+ grant_ref_t gref;
+ uint16_t offset;
+ uint16_t flags;
+ uint16_t id;
+ uint16_t size;
+};
+typedef struct netif_tx_request netif_tx_request_t;
+
+/* Types of netif_extra_info descriptors. */
+#define XEN_NETIF_EXTRA_TYPE_NONE (0) /* Never used - invalid */
+#define XEN_NETIF_EXTRA_TYPE_GSO (1) /* u.gso */
+#define XEN_NETIF_EXTRA_TYPE_MCAST_ADD (2) /* u.mcast */
+#define XEN_NETIF_EXTRA_TYPE_MCAST_DEL (3) /* u.mcast */
+#define XEN_NETIF_EXTRA_TYPE_HASH (4) /* u.hash */
+#define XEN_NETIF_EXTRA_TYPE_MAX (5)
+
+/* netif_extra_info_t flags. */
+#define _XEN_NETIF_EXTRA_FLAG_MORE (0)
+#define XEN_NETIF_EXTRA_FLAG_MORE (1U<<_XEN_NETIF_EXTRA_FLAG_MORE)
+
+/* GSO types */
+#define XEN_NETIF_GSO_TYPE_NONE (0)
+#define XEN_NETIF_GSO_TYPE_TCPV4 (1)
+#define XEN_NETIF_GSO_TYPE_TCPV6 (2)
+
+/*
+ * This structure needs to fit within both netif_tx_request_t and
+ * netif_rx_response_t for compatibility.
+ */
+struct netif_extra_info {
+ uint8_t type;
+ uint8_t flags;
+ union {
+ struct {
+ uint16_t size;
+ uint8_t type;
+ uint8_t pad;
+ uint16_t features;
+ } gso;
+ struct {
+ uint8_t addr[6];
+ } mcast;
+ struct {
+ uint8_t type;
+ uint8_t algorithm;
+ uint8_t value[4];
+ } hash;
+ uint16_t pad[3];
+ } u;
+};
+typedef struct netif_extra_info netif_extra_info_t;
+
+struct netif_tx_response {
+ uint16_t id;
+ int16_t status;
+};
+typedef struct netif_tx_response netif_tx_response_t;
+
+struct netif_rx_request {
+ uint16_t id; /* Echoed in response message. */
+ uint16_t pad;
+ grant_ref_t gref;
+};
+typedef struct netif_rx_request netif_rx_request_t;
+
+/* Packet data has been validated against protocol checksum. */
+#define _NETRXF_data_validated (0)
+#define NETRXF_data_validated (1U<<_NETRXF_data_validated)
+
+/* Protocol checksum field is blank in the packet (hardware offload)? */
+#define _NETRXF_csum_blank (1)
+#define NETRXF_csum_blank (1U<<_NETRXF_csum_blank)
+
+/* Packet continues in the next request descriptor. */
+#define _NETRXF_more_data (2)
+#define NETRXF_more_data (1U<<_NETRXF_more_data)
+
+/* Packet to be followed by extra descriptor(s). */
+#define _NETRXF_extra_info (3)
+#define NETRXF_extra_info (1U<<_NETRXF_extra_info)
+
+/* Packet has GSO prefix. Deprecated but included for compatibility */
+#define _NETRXF_gso_prefix (4)
+#define NETRXF_gso_prefix (1U<<_NETRXF_gso_prefix)
+
+struct netif_rx_response {
+ uint16_t id;
+ uint16_t offset;
+ uint16_t flags;
+ int16_t status;
+};
+typedef struct netif_rx_response netif_rx_response_t;
+
+/*
+ * Generate netif ring structures and types.
+ */
+
+DEFINE_RING_TYPES(netif_tx, struct netif_tx_request, struct netif_tx_response);
+DEFINE_RING_TYPES(netif_rx, struct netif_rx_request, struct netif_rx_response);
+
+#define NETIF_RSP_DROPPED -2
+#define NETIF_RSP_ERROR -1
+#define NETIF_RSP_OKAY 0
+/* No response: used for auxiliary requests (e.g., netif_extra_info_t). */
+#define NETIF_RSP_NULL 1
+
+#endif
diff --git a/include/hw/xen/interface/io/protocols.h b/include/hw/xen/interface/io/protocols.h
new file mode 100644
index 0000000000..52b4de0f81
--- /dev/null
+++ b/include/hw/xen/interface/io/protocols.h
@@ -0,0 +1,42 @@
+/******************************************************************************
+ * protocols.h
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2008, Keir Fraser
+ */
+
+#ifndef __XEN_PROTOCOLS_H__
+#define __XEN_PROTOCOLS_H__
+
+#define XEN_IO_PROTO_ABI_X86_32 "x86_32-abi"
+#define XEN_IO_PROTO_ABI_X86_64 "x86_64-abi"
+#define XEN_IO_PROTO_ABI_ARM "arm-abi"
+
+#if defined(__i386__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32
+#elif defined(__x86_64__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64
+#elif defined(__arm__) || defined(__aarch64__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_ARM
+#else
+# error arch fixup needed here
+#endif
+
+#endif
diff --git a/include/hw/xen/io/ring.h b/include/hw/xen/interface/io/ring.h
index 62abfd7a6e..1adacf09f9 100644
--- a/include/hw/xen/io/ring.h
+++ b/include/hw/xen/interface/io/ring.h
@@ -24,8 +24,8 @@
* Tim Deegan and Andrew Warfield November 2004.
*/
-#ifndef XEN_PUBLIC_IO_RING_H
-#define XEN_PUBLIC_IO_RING_H
+#ifndef __XEN_PUBLIC_IO_RING_H__
+#define __XEN_PUBLIC_IO_RING_H__
/*
* When #include'ing this header, you need to provide the following
@@ -469,7 +469,7 @@ struct name##_data_intf { \
}; \
DEFINE_XEN_FLEX_RING(name)
-#endif /* XEN_PUBLIC_IO_RING_H */
+#endif /* __XEN_PUBLIC_IO_RING_H__ */
/*
* Local variables:
diff --git a/include/hw/xen/interface/io/usbif.h b/include/hw/xen/interface/io/usbif.h
new file mode 100644
index 0000000000..c6a58639d6
--- /dev/null
+++ b/include/hw/xen/interface/io/usbif.h
@@ -0,0 +1,254 @@
+/*
+ * usbif.h
+ *
+ * USB I/O interface for Xen guest OSes.
+ *
+ * Copyright (C) 2009, FUJITSU LABORATORIES LTD.
+ * Author: Noboru Iwamatsu <n_iwamatsu@jp.fujitsu.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_IO_USBIF_H__
+#define __XEN_PUBLIC_IO_USBIF_H__
+
+#include "ring.h"
+#include "../grant_table.h"
+
+/*
+ * Feature and Parameter Negotiation
+ * =================================
+ * The two halves of a Xen pvUSB driver utilize nodes within the XenStore to
+ * communicate capabilities and to negotiate operating parameters. This
+ * section enumerates these nodes which reside in the respective front and
+ * backend portions of the XenStore, following the XenBus convention.
+ *
+ * Any specified default value is in effect if the corresponding XenBus node
+ * is not present in the XenStore.
+ *
+ * XenStore nodes in sections marked "PRIVATE" are solely for use by the
+ * driver side whose XenBus tree contains them.
+ *
+ *****************************************************************************
+ * Backend XenBus Nodes
+ *****************************************************************************
+ *
+ *------------------ Backend Device Identification (PRIVATE) ------------------
+ *
+ * num-ports
+ * Values: unsigned [1...31]
+ *
+ * Number of ports for this (virtual) USB host connector.
+ *
+ * usb-ver
+ * Values: unsigned [1...2]
+ *
+ * USB version of this host connector: 1 = USB 1.1, 2 = USB 2.0.
+ *
+ * port/[1...31]
+ * Values: string
+ *
+ * Physical USB device connected to the given port, e.g. "3-1.5".
+ *
+ *****************************************************************************
+ * Frontend XenBus Nodes
+ *****************************************************************************
+ *
+ *----------------------- Request Transport Parameters -----------------------
+ *
+ * event-channel
+ * Values: unsigned
+ *
+ * The identifier of the Xen event channel used to signal activity
+ * in the ring buffer.
+ *
+ * urb-ring-ref
+ * Values: unsigned
+ *
+ * The Xen grant reference granting permission for the backend to map
+ * the sole page in a single page sized ring buffer. This is the ring
+ * buffer for urb requests.
+ *
+ * conn-ring-ref
+ * Values: unsigned
+ *
+ * The Xen grant reference granting permission for the backend to map
+ * the sole page in a single page sized ring buffer. This is the ring
+ * buffer for connection/disconnection requests.
+ *
+ * protocol
+ * Values: string (XEN_IO_PROTO_ABI_*)
+ * Default Value: XEN_IO_PROTO_ABI_NATIVE
+ *
+ * The machine ABI rules governing the format of all ring request and
+ * response structures.
+ *
+ */
+
+enum usb_spec_version {
+ USB_VER_UNKNOWN = 0,
+ USB_VER_USB11,
+ USB_VER_USB20,
+ USB_VER_USB30, /* not supported yet */
+};
+
+/*
+ * USB pipe in usbif_request
+ *
+ * - port number: bits 0-4
+ * (USB_MAXCHILDREN is 31)
+ *
+ * - operation flag: bit 5
+ * (0 = submit urb,
+ * 1 = unlink urb)
+ *
+ * - direction: bit 7
+ * (0 = Host-to-Device [Out]
+ * 1 = Device-to-Host [In])
+ *
+ * - device address: bits 8-14
+ *
+ * - endpoint: bits 15-18
+ *
+ * - pipe type: bits 30-31
+ * (00 = isochronous, 01 = interrupt,
+ * 10 = control, 11 = bulk)
+ */
+
+#define USBIF_PIPE_PORT_MASK 0x0000001f
+#define USBIF_PIPE_UNLINK 0x00000020
+#define USBIF_PIPE_DIR 0x00000080
+#define USBIF_PIPE_DEV_MASK 0x0000007f
+#define USBIF_PIPE_DEV_SHIFT 8
+#define USBIF_PIPE_EP_MASK 0x0000000f
+#define USBIF_PIPE_EP_SHIFT 15
+#define USBIF_PIPE_TYPE_MASK 0x00000003
+#define USBIF_PIPE_TYPE_SHIFT 30
+#define USBIF_PIPE_TYPE_ISOC 0
+#define USBIF_PIPE_TYPE_INT 1
+#define USBIF_PIPE_TYPE_CTRL 2
+#define USBIF_PIPE_TYPE_BULK 3
+
+#define usbif_pipeportnum(pipe) ((pipe) & USBIF_PIPE_PORT_MASK)
+#define usbif_setportnum_pipe(pipe, portnum) ((pipe) | (portnum))
+
+#define usbif_pipeunlink(pipe) ((pipe) & USBIF_PIPE_UNLINK)
+#define usbif_pipesubmit(pipe) (!usbif_pipeunlink(pipe))
+#define usbif_setunlink_pipe(pipe) ((pipe) | USBIF_PIPE_UNLINK)
+
+#define usbif_pipein(pipe) ((pipe) & USBIF_PIPE_DIR)
+#define usbif_pipeout(pipe) (!usbif_pipein(pipe))
+
+#define usbif_pipedevice(pipe) \
+ (((pipe) >> USBIF_PIPE_DEV_SHIFT) & USBIF_PIPE_DEV_MASK)
+
+#define usbif_pipeendpoint(pipe) \
+ (((pipe) >> USBIF_PIPE_EP_SHIFT) & USBIF_PIPE_EP_MASK)
+
+#define usbif_pipetype(pipe) \
+ (((pipe) >> USBIF_PIPE_TYPE_SHIFT) & USBIF_PIPE_TYPE_MASK)
+#define usbif_pipeisoc(pipe) (usbif_pipetype(pipe) == USBIF_PIPE_TYPE_ISOC)
+#define usbif_pipeint(pipe) (usbif_pipetype(pipe) == USBIF_PIPE_TYPE_INT)
+#define usbif_pipectrl(pipe) (usbif_pipetype(pipe) == USBIF_PIPE_TYPE_CTRL)
+#define usbif_pipebulk(pipe) (usbif_pipetype(pipe) == USBIF_PIPE_TYPE_BULK)
+
+#define USBIF_MAX_SEGMENTS_PER_REQUEST (16)
+#define USBIF_MAX_PORTNR 31
+#define USBIF_RING_SIZE 4096
+
+/*
+ * RING for transferring urbs.
+ */
+struct usbif_request_segment {
+ grant_ref_t gref;
+ uint16_t offset;
+ uint16_t length;
+};
+
+struct usbif_urb_request {
+ uint16_t id; /* request id */
+ uint16_t nr_buffer_segs; /* number of urb->transfer_buffer segments */
+
+ /* basic urb parameter */
+ uint32_t pipe;
+ uint16_t transfer_flags;
+#define USBIF_SHORT_NOT_OK 0x0001
+ uint16_t buffer_length;
+ union {
+ uint8_t ctrl[8]; /* setup_packet (Ctrl) */
+
+ struct {
+ uint16_t interval; /* maximum (1024*8) in usb core */
+ uint16_t start_frame; /* start frame */
+ uint16_t number_of_packets; /* number of ISO packet */
+ uint16_t nr_frame_desc_segs; /* number of iso_frame_desc segments */
+ } isoc;
+
+ struct {
+ uint16_t interval; /* maximum (1024*8) in usb core */
+ uint16_t pad[3];
+ } intr;
+
+ struct {
+ uint16_t unlink_id; /* unlink request id */
+ uint16_t pad[3];
+ } unlink;
+
+ } u;
+
+ /* urb data segments */
+ struct usbif_request_segment seg[USBIF_MAX_SEGMENTS_PER_REQUEST];
+};
+typedef struct usbif_urb_request usbif_urb_request_t;
+
+struct usbif_urb_response {
+ uint16_t id; /* request id */
+ uint16_t start_frame; /* start frame (ISO) */
+ int32_t status; /* status (non-ISO) */
+ int32_t actual_length; /* actual transfer length */
+ int32_t error_count; /* number of ISO errors */
+};
+typedef struct usbif_urb_response usbif_urb_response_t;
+
+DEFINE_RING_TYPES(usbif_urb, struct usbif_urb_request, struct usbif_urb_response);
+#define USB_URB_RING_SIZE __CONST_RING_SIZE(usbif_urb, USBIF_RING_SIZE)
+
+/*
+ * RING for notifying connect/disconnect events to frontend
+ */
+struct usbif_conn_request {
+ uint16_t id;
+};
+typedef struct usbif_conn_request usbif_conn_request_t;
+
+struct usbif_conn_response {
+ uint16_t id; /* request id */
+ uint8_t portnum; /* port number */
+ uint8_t speed; /* usb_device_speed */
+#define USBIF_SPEED_NONE 0
+#define USBIF_SPEED_LOW 1
+#define USBIF_SPEED_FULL 2
+#define USBIF_SPEED_HIGH 3
+};
+typedef struct usbif_conn_response usbif_conn_response_t;
+
+DEFINE_RING_TYPES(usbif_conn, struct usbif_conn_request, struct usbif_conn_response);
+#define USB_CONN_RING_SIZE __CONST_RING_SIZE(usbif_conn, USBIF_RING_SIZE)
+
+#endif /* __XEN_PUBLIC_IO_USBIF_H__ */
diff --git a/include/hw/xen/interface/io/xenbus.h b/include/hw/xen/interface/io/xenbus.h
new file mode 100644
index 0000000000..2fbf2a7fdc
--- /dev/null
+++ b/include/hw/xen/interface/io/xenbus.h
@@ -0,0 +1,70 @@
+/*****************************************************************************
+ * xenbus.h
+ *
+ * Xenbus protocol details.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (C) 2005 XenSource Ltd.
+ */
+
+#ifndef _XEN_PUBLIC_IO_XENBUS_H
+#define _XEN_PUBLIC_IO_XENBUS_H
+
+/*
+ * The state of either end of the Xenbus, i.e. the current communication
+ * status of initialisation across the bus. States here imply nothing about
+ * the state of the connection between the driver and the kernel's device
+ * layers.
+ */
+enum xenbus_state {
+ XenbusStateUnknown = 0,
+
+ XenbusStateInitialising = 1,
+
+ /*
+ * InitWait: Finished early initialisation but waiting for information
+ * from the peer or hotplug scripts.
+ */
+ XenbusStateInitWait = 2,
+
+ /*
+ * Initialised: Waiting for a connection from the peer.
+ */
+ XenbusStateInitialised = 3,
+
+ XenbusStateConnected = 4,
+
+ /*
+ * Closing: The device is being closed due to an error or an unplug event.
+ */
+ XenbusStateClosing = 5,
+
+ XenbusStateClosed = 6,
+
+ /*
+ * Reconfiguring: The device is being reconfigured.
+ */
+ XenbusStateReconfiguring = 7,
+
+ XenbusStateReconfigured = 8
+};
+typedef enum xenbus_state XenbusState;
+
+#endif /* _XEN_PUBLIC_IO_XENBUS_H */
diff --git a/include/hw/xen/xen-bus.h b/include/hw/xen/xen-bus.h
index 3183f10e3c..1c2d9dfdb8 100644
--- a/include/hw/xen/xen-bus.h
+++ b/include/hw/xen/xen-bus.h
@@ -15,6 +15,7 @@
typedef void (*XenWatchHandler)(void *opaque);
typedef struct XenWatch XenWatch;
+typedef struct XenEventChannel XenEventChannel;
typedef struct XenDevice {
DeviceState qdev;
@@ -28,8 +29,7 @@ typedef struct XenDevice {
XenWatch *backend_online_watch;
xengnttab_handle *xgth;
bool feature_grant_copy;
- xenevtchn_handle *xeh;
- NotifierList event_notifiers;
+ QLIST_HEAD(, XenEventChannel) event_channels;
} XenDevice;
typedef char *(*XenDeviceGetName)(XenDevice *xendev, Error **errp);
@@ -119,11 +119,10 @@ void xen_device_copy_grant_refs(XenDevice *xendev, bool to_domain,
XenDeviceGrantCopySegment segs[],
unsigned int nr_segs, Error **errp);
-typedef struct XenEventChannel XenEventChannel;
-
-typedef void (*XenEventHandler)(void *opaque);
+typedef bool (*XenEventHandler)(void *opaque);
XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev,
+ AioContext *ctx,
unsigned int port,
XenEventHandler handler,
void *opaque, Error **errp);
diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_common.h
index 9a8155e172..0504b43659 100644
--- a/include/hw/xen/xen_common.h
+++ b/include/hw/xen/xen_common.h
@@ -12,7 +12,7 @@
#include <xenctrl.h>
#include <xenstore.h>
-#include <xen/io/xenbus.h>
+#include "hw/xen/interface/io/xenbus.h"
#include "hw/hw.h"
#include "hw/xen/xen.h"
diff --git a/tests/docker/dockerfiles/debian-win32-cross.docker b/tests/docker/dockerfiles/debian-win32-cross.docker
index dd021f2df0..0a4970c068 100644
--- a/tests/docker/dockerfiles/debian-win32-cross.docker
+++ b/tests/docker/dockerfiles/debian-win32-cross.docker
@@ -15,7 +15,6 @@ RUN DEBIAN_FRONTEND=noninteractive eatmydata \
mxe-$TARGET-w64-mingw32.shared-curl \
mxe-$TARGET-w64-mingw32.shared-glib \
mxe-$TARGET-w64-mingw32.shared-libgcrypt \
- mxe-$TARGET-w64-mingw32.shared-libssh2 \
mxe-$TARGET-w64-mingw32.shared-libusb1 \
mxe-$TARGET-w64-mingw32.shared-lzo \
mxe-$TARGET-w64-mingw32.shared-nettle \
diff --git a/tests/docker/dockerfiles/debian-win64-cross.docker b/tests/docker/dockerfiles/debian-win64-cross.docker
index 4542bcc821..b27985b1b1 100644
--- a/tests/docker/dockerfiles/debian-win64-cross.docker
+++ b/tests/docker/dockerfiles/debian-win64-cross.docker
@@ -15,7 +15,6 @@ RUN DEBIAN_FRONTEND=noninteractive eatmydata \
mxe-$TARGET-w64-mingw32.shared-curl \
mxe-$TARGET-w64-mingw32.shared-glib \
mxe-$TARGET-w64-mingw32.shared-libgcrypt \
- mxe-$TARGET-w64-mingw32.shared-libssh2 \
mxe-$TARGET-w64-mingw32.shared-libusb1 \
mxe-$TARGET-w64-mingw32.shared-lzo \
mxe-$TARGET-w64-mingw32.shared-nettle \
diff --git a/tests/docker/dockerfiles/fedora.docker b/tests/docker/dockerfiles/fedora.docker
index 12c460597e..619d1b5656 100644
--- a/tests/docker/dockerfiles/fedora.docker
+++ b/tests/docker/dockerfiles/fedora.docker
@@ -35,7 +35,7 @@ ENV PACKAGES \
libpng-devel \
librbd-devel \
libseccomp-devel \
- libssh2-devel \
+ libssh-devel \
libubsan \
libusbx-devel \
libxml2-devel \
@@ -50,7 +50,6 @@ ENV PACKAGES \
mingw32-gtk3 \
mingw32-libjpeg-turbo \
mingw32-libpng \
- mingw32-libssh2 \
mingw32-libtasn1 \
mingw32-nettle \
mingw32-pixman \
@@ -64,7 +63,6 @@ ENV PACKAGES \
mingw64-gtk3 \
mingw64-libjpeg-turbo \
mingw64-libpng \
- mingw64-libssh2 \
mingw64-libtasn1 \
mingw64-nettle \
mingw64-pixman \
diff --git a/tests/docker/dockerfiles/ubuntu.docker b/tests/docker/dockerfiles/ubuntu.docker
index 8d256961f0..d3b72209c8 100644
--- a/tests/docker/dockerfiles/ubuntu.docker
+++ b/tests/docker/dockerfiles/ubuntu.docker
@@ -53,7 +53,7 @@ ENV PACKAGES flex bison \
libsnappy-dev \
libspice-protocol-dev \
libspice-server-dev \
- libssh2-1-dev \
+ libssh-dev \
libusb-1.0-0-dev \
libusbredirhost-dev \
libvdeplug-dev \
diff --git a/tests/docker/dockerfiles/ubuntu1804.docker b/tests/docker/dockerfiles/ubuntu1804.docker
index 2e2900150b..9d80b11500 100644
--- a/tests/docker/dockerfiles/ubuntu1804.docker
+++ b/tests/docker/dockerfiles/ubuntu1804.docker
@@ -40,7 +40,7 @@ ENV PACKAGES flex bison \
libsnappy-dev \
libspice-protocol-dev \
libspice-server-dev \
- libssh2-1-dev \
+ libssh-dev \
libusb-1.0-0-dev \
libusbredirhost-dev \
libvdeplug-dev \
diff --git a/tests/qemu-iotests/059.out b/tests/qemu-iotests/059.out
index f51394ae8e..4fab42a28c 100644
--- a/tests/qemu-iotests/059.out
+++ b/tests/qemu-iotests/059.out
@@ -2358,5 +2358,5 @@ Offset Length Mapped to File
0x140000000 0x10000 0x50000 TEST_DIR/t-s003.vmdk
=== Testing afl image with a very large capacity ===
-qemu-img: Can't get image size 'TEST_DIR/afl9.IMGFMT': File too large
+qemu-img: Could not open 'TEST_DIR/afl9.IMGFMT': L1 size too big
*** done
diff --git a/tests/qemu-iotests/134 b/tests/qemu-iotests/134
index 141a2eaa7e..5f0fb86211 100755
--- a/tests/qemu-iotests/134
+++ b/tests/qemu-iotests/134
@@ -57,6 +57,15 @@ echo "== reading whole image =="
$QEMU_IO --object $SECRET -c "read 0 $size" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
echo
+echo "== rewriting cluster part =="
+$QEMU_IO --object $SECRET -c "write -P 0xb 512 512" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
+
+echo
+echo "== verify pattern =="
+$QEMU_IO --object $SECRET -c "read -P 0 0 512" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
+$QEMU_IO --object $SECRET -c "read -P 0xb 512 512" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
+
+echo
echo "== rewriting whole image =="
$QEMU_IO --object $SECRET -c "write -P 0xa 0 $size" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
diff --git a/tests/qemu-iotests/134.out b/tests/qemu-iotests/134.out
index 972be49d91..09d46f6b17 100644
--- a/tests/qemu-iotests/134.out
+++ b/tests/qemu-iotests/134.out
@@ -5,6 +5,16 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 encryption=on encrypt.
read 134217728/134217728 bytes at offset 0
128 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== rewriting cluster part ==
+wrote 512/512 bytes at offset 512
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== verify pattern ==
+read 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 512
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
== rewriting whole image ==
wrote 134217728/134217728 bytes at offset 0
128 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
diff --git a/tests/qemu-iotests/205 b/tests/qemu-iotests/205
index 69f2c1d392..b8a86c446e 100755
--- a/tests/qemu-iotests/205
+++ b/tests/qemu-iotests/205
@@ -24,7 +24,7 @@ import iotests
import time
from iotests import qemu_img_create, qemu_io, filter_qemu_io, QemuIoInteractive
-nbd_sock = 'nbd_sock'
+nbd_sock = os.path.join(iotests.test_dir, 'nbd_sock')
nbd_uri = 'nbd+unix:///exp?socket=' + nbd_sock
disk = os.path.join(iotests.test_dir, 'disk')
diff --git a/tests/qemu-iotests/207 b/tests/qemu-iotests/207
index b3816136f7..ec8c1d06f0 100755
--- a/tests/qemu-iotests/207
+++ b/tests/qemu-iotests/207
@@ -110,12 +110,49 @@ with iotests.FilePath('t.img') as disk_path, \
iotests.img_info_log(remote_path)
- md5_key = subprocess.check_output(
- 'ssh-keyscan -t rsa 127.0.0.1 2>/dev/null | grep -v "\\^#" | ' +
- 'cut -d" " -f3 | base64 -d | md5sum -b | cut -d" " -f1',
- shell=True).rstrip().decode('ascii')
+ keys = subprocess.check_output(
+ 'ssh-keyscan 127.0.0.1 2>/dev/null | grep -v "\\^#" | ' +
+ 'cut -d" " -f3',
+ shell=True).rstrip().decode('ascii').split('\n')
+
+ # Mappings of base64 representations to digests
+ md5_keys = {}
+ sha1_keys = {}
+
+ for key in keys:
+ md5_keys[key] = subprocess.check_output(
+ 'echo %s | base64 -d | md5sum -b | cut -d" " -f1' % key,
+ shell=True).rstrip().decode('ascii')
+
+ sha1_keys[key] = subprocess.check_output(
+ 'echo %s | base64 -d | sha1sum -b | cut -d" " -f1' % key,
+ shell=True).rstrip().decode('ascii')
vm.launch()
+
+ # Find correct key first
+ matching_key = None
+ for key in keys:
+ result = vm.qmp('blockdev-add',
+ driver='ssh', node_name='node0', path=disk_path,
+ server={
+ 'host': '127.0.0.1',
+ 'port': '22',
+ }, host_key_check={
+ 'mode': 'hash',
+ 'type': 'md5',
+ 'hash': md5_keys[key],
+ })
+
+ if 'error' not in result:
+ vm.qmp('blockdev-del', node_name='node0')
+ matching_key = key
+ break
+
+ if matching_key is None:
+ vm.shutdown()
+ iotests.notrun('Did not find a key that fits 127.0.0.1')
+
blockdev_create(vm, { 'driver': 'ssh',
'location': {
'path': disk_path,
@@ -140,7 +177,7 @@ with iotests.FilePath('t.img') as disk_path, \
'host-key-check': {
'mode': 'hash',
'type': 'md5',
- 'hash': md5_key,
+ 'hash': md5_keys[matching_key],
}
},
'size': 8388608 })
@@ -148,11 +185,6 @@ with iotests.FilePath('t.img') as disk_path, \
iotests.img_info_log(remote_path)
- sha1_key = subprocess.check_output(
- 'ssh-keyscan -t rsa 127.0.0.1 2>/dev/null | grep -v "\\^#" | ' +
- 'cut -d" " -f3 | base64 -d | sha1sum -b | cut -d" " -f1',
- shell=True).rstrip().decode('ascii')
-
vm.launch()
blockdev_create(vm, { 'driver': 'ssh',
'location': {
@@ -178,7 +210,7 @@ with iotests.FilePath('t.img') as disk_path, \
'host-key-check': {
'mode': 'hash',
'type': 'sha1',
- 'hash': sha1_key,
+ 'hash': sha1_keys[matching_key],
}
},
'size': 4194304 })
diff --git a/tests/qemu-iotests/207.out b/tests/qemu-iotests/207.out
index ec9823793a..1239d9d648 100644
--- a/tests/qemu-iotests/207.out
+++ b/tests/qemu-iotests/207.out
@@ -68,7 +68,7 @@ virtual size: 4 MiB (4194304 bytes)
{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "ssh", "location": {"host-key-check": {"mode": "none"}, "path": "/this/is/not/an/existing/path", "server": {"host": "127.0.0.1", "port": "22"}}, "size": 4194304}}}
{"return": {}}
-Job failed: failed to open remote file '/this/is/not/an/existing/path': Failed opening remote file (libssh2 error code: -31)
+Job failed: failed to open remote file '/this/is/not/an/existing/path': SFTP server: No such file (libssh error code: 1, sftp error code: 2)
{"execute": "job-dismiss", "arguments": {"id": "job0"}}
{"return": {}}