diff options
45 files changed, 3846 insertions, 490 deletions
diff --git a/.travis.yml b/.travis.yml index aeb9b211cd..279658b116 100644 --- a/.travis.yml +++ b/.travis.yml @@ -31,7 +31,7 @@ addons: - libseccomp-dev - libspice-protocol-dev - libspice-server-dev - - libssh2-1-dev + - libssh-dev - liburcu-dev - libusb-1.0-0-dev - libvte-2.91-dev @@ -270,7 +270,7 @@ matrix: - libseccomp-dev - libspice-protocol-dev - libspice-server-dev - - libssh2-1-dev + - libssh-dev - liburcu-dev - libusb-1.0-0-dev - libvte-2.91-dev diff --git a/block/Makefile.objs b/block/Makefile.objs index dbd1522722..35f3bca4d9 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -31,7 +31,7 @@ block-obj-$(CONFIG_CURL) += curl.o block-obj-$(CONFIG_RBD) += rbd.o block-obj-$(CONFIG_GLUSTERFS) += gluster.o block-obj-$(CONFIG_VXHS) += vxhs.o -block-obj-$(CONFIG_LIBSSH2) += ssh.o +block-obj-$(CONFIG_LIBSSH) += ssh.o block-obj-y += accounting.o dirty-bitmap.o block-obj-y += write-threshold.o block-obj-y += backup.o @@ -52,8 +52,8 @@ rbd.o-libs := $(RBD_LIBS) gluster.o-cflags := $(GLUSTERFS_CFLAGS) gluster.o-libs := $(GLUSTERFS_LIBS) vxhs.o-libs := $(VXHS_LIBS) -ssh.o-cflags := $(LIBSSH2_CFLAGS) -ssh.o-libs := $(LIBSSH2_LIBS) +ssh.o-cflags := $(LIBSSH_CFLAGS) +ssh.o-libs := $(LIBSSH_LIBS) block-obj-dmg-bz2-$(CONFIG_BZIP2) += dmg-bz2.o block-obj-$(if $(CONFIG_DMG),m,n) += $(block-obj-dmg-bz2-y) dmg-bz2.o-libs := $(BZIP2_LIBS) diff --git a/block/ssh.c b/block/ssh.c index 6da7b9cbfe..501933b855 100644 --- a/block/ssh.c +++ b/block/ssh.c @@ -24,8 +24,8 @@ #include "qemu/osdep.h" -#include <libssh2.h> -#include <libssh2_sftp.h> +#include <libssh/libssh.h> +#include <libssh/sftp.h> #include "block/block_int.h" #include "block/qdict.h" @@ -46,13 +46,11 @@ #include "trace.h" /* - * TRACE_LIBSSH2=<bitmask> enables tracing in libssh2 itself. Note - * that this requires that libssh2 was specially compiled with the - * `./configure --enable-debug' option, so most likely you will have - * to compile it yourself. The meaning of <bitmask> is described - * here: http://www.libssh2.org/libssh2_trace.html + * TRACE_LIBSSH=<level> enables tracing in libssh itself. + * The meaning of <level> is described here: + * http://api.libssh.org/master/group__libssh__log.html */ -#define TRACE_LIBSSH2 0 /* or try: LIBSSH2_TRACE_SFTP */ +#define TRACE_LIBSSH 0 /* see: SSH_LOG_* */ typedef struct BDRVSSHState { /* Coroutine. */ @@ -60,18 +58,15 @@ typedef struct BDRVSSHState { /* SSH connection. */ int sock; /* socket */ - LIBSSH2_SESSION *session; /* ssh session */ - LIBSSH2_SFTP *sftp; /* sftp session */ - LIBSSH2_SFTP_HANDLE *sftp_handle; /* sftp remote file handle */ + ssh_session session; /* ssh session */ + sftp_session sftp; /* sftp session */ + sftp_file sftp_handle; /* sftp remote file handle */ - /* See ssh_seek() function below. */ - int64_t offset; - bool offset_op_read; - - /* File attributes at open. We try to keep the .filesize field + /* + * File attributes at open. We try to keep the .size field * updated if it changes (eg by writing at the end of the file). */ - LIBSSH2_SFTP_ATTRIBUTES attrs; + sftp_attributes attrs; InetSocketAddress *inet; @@ -91,7 +86,6 @@ static void ssh_state_init(BDRVSSHState *s) { memset(s, 0, sizeof *s); s->sock = -1; - s->offset = -1; qemu_co_mutex_init(&s->lock); } @@ -99,20 +93,18 @@ static void ssh_state_free(BDRVSSHState *s) { g_free(s->user); + if (s->attrs) { + sftp_attributes_free(s->attrs); + } if (s->sftp_handle) { - libssh2_sftp_close(s->sftp_handle); + sftp_close(s->sftp_handle); } if (s->sftp) { - libssh2_sftp_shutdown(s->sftp); + sftp_free(s->sftp); } if (s->session) { - libssh2_session_disconnect(s->session, - "from qemu ssh client: " - "user closed the connection"); - libssh2_session_free(s->session); - } - if (s->sock >= 0) { - close(s->sock); + ssh_disconnect(s->session); + ssh_free(s->session); /* This frees s->sock */ } } @@ -127,13 +119,13 @@ session_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...) va_end(args); if (s->session) { - char *ssh_err; + const char *ssh_err; int ssh_err_code; - /* This is not an errno. See <libssh2.h>. */ - ssh_err_code = libssh2_session_last_error(s->session, - &ssh_err, NULL, 0); - error_setg(errp, "%s: %s (libssh2 error code: %d)", + /* This is not an errno. See <libssh/libssh.h>. */ + ssh_err = ssh_get_error(s->session); + ssh_err_code = ssh_get_error_code(s->session); + error_setg(errp, "%s: %s (libssh error code: %d)", msg, ssh_err, ssh_err_code); } else { error_setg(errp, "%s", msg); @@ -152,18 +144,18 @@ sftp_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...) va_end(args); if (s->sftp) { - char *ssh_err; + const char *ssh_err; int ssh_err_code; - unsigned long sftp_err_code; + int sftp_err_code; - /* This is not an errno. See <libssh2.h>. */ - ssh_err_code = libssh2_session_last_error(s->session, - &ssh_err, NULL, 0); - /* See <libssh2_sftp.h>. */ - sftp_err_code = libssh2_sftp_last_error((s)->sftp); + /* This is not an errno. See <libssh/libssh.h>. */ + ssh_err = ssh_get_error(s->session); + ssh_err_code = ssh_get_error_code(s->session); + /* See <libssh/sftp.h>. */ + sftp_err_code = sftp_get_error(s->sftp); error_setg(errp, - "%s: %s (libssh2 error code: %d, sftp error code: %lu)", + "%s: %s (libssh error code: %d, sftp error code: %d)", msg, ssh_err, ssh_err_code, sftp_err_code); } else { error_setg(errp, "%s", msg); @@ -173,15 +165,15 @@ sftp_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...) static void sftp_error_trace(BDRVSSHState *s, const char *op) { - char *ssh_err; + const char *ssh_err; int ssh_err_code; - unsigned long sftp_err_code; + int sftp_err_code; - /* This is not an errno. See <libssh2.h>. */ - ssh_err_code = libssh2_session_last_error(s->session, - &ssh_err, NULL, 0); - /* See <libssh2_sftp.h>. */ - sftp_err_code = libssh2_sftp_last_error((s)->sftp); + /* This is not an errno. See <libssh/libssh.h>. */ + ssh_err = ssh_get_error(s->session); + ssh_err_code = ssh_get_error_code(s->session); + /* See <libssh/sftp.h>. */ + sftp_err_code = sftp_get_error(s->sftp); trace_sftp_error(op, ssh_err, ssh_err_code, sftp_err_code); } @@ -282,82 +274,120 @@ static void ssh_parse_filename(const char *filename, QDict *options, parse_uri(filename, options, errp); } -static int check_host_key_knownhosts(BDRVSSHState *s, - const char *host, int port, Error **errp) +static int check_host_key_knownhosts(BDRVSSHState *s, Error **errp) { - const char *home; - char *knh_file = NULL; - LIBSSH2_KNOWNHOSTS *knh = NULL; - struct libssh2_knownhost *found; - int ret, r; - const char *hostkey; - size_t len; - int type; - - hostkey = libssh2_session_hostkey(s->session, &len, &type); - if (!hostkey) { + int ret; +#ifdef HAVE_LIBSSH_0_8 + enum ssh_known_hosts_e state; + int r; + ssh_key pubkey; + enum ssh_keytypes_e pubkey_type; + unsigned char *server_hash = NULL; + size_t server_hash_len; + char *fingerprint = NULL; + + state = ssh_session_is_known_server(s->session); + trace_ssh_server_status(state); + + switch (state) { + case SSH_KNOWN_HOSTS_OK: + /* OK */ + trace_ssh_check_host_key_knownhosts(); + break; + case SSH_KNOWN_HOSTS_CHANGED: ret = -EINVAL; - session_error_setg(errp, s, "failed to read remote host key"); + r = ssh_get_server_publickey(s->session, &pubkey); + if (r == 0) { + r = ssh_get_publickey_hash(pubkey, SSH_PUBLICKEY_HASH_SHA256, + &server_hash, &server_hash_len); + pubkey_type = ssh_key_type(pubkey); + ssh_key_free(pubkey); + } + if (r == 0) { + fingerprint = ssh_get_fingerprint_hash(SSH_PUBLICKEY_HASH_SHA256, + server_hash, + server_hash_len); + ssh_clean_pubkey_hash(&server_hash); + } + if (fingerprint) { + error_setg(errp, + "host key (%s key with fingerprint %s) does not match " + "the one in known_hosts; this may be a possible attack", + ssh_key_type_to_char(pubkey_type), fingerprint); + ssh_string_free_char(fingerprint); + } else { + error_setg(errp, + "host key does not match the one in known_hosts; this " + "may be a possible attack"); + } goto out; - } - - knh = libssh2_knownhost_init(s->session); - if (!knh) { + case SSH_KNOWN_HOSTS_OTHER: ret = -EINVAL; - session_error_setg(errp, s, - "failed to initialize known hosts support"); + error_setg(errp, + "host key for this server not found, another type exists"); + goto out; + case SSH_KNOWN_HOSTS_UNKNOWN: + ret = -EINVAL; + error_setg(errp, "no host key was found in known_hosts"); + goto out; + case SSH_KNOWN_HOSTS_NOT_FOUND: + ret = -ENOENT; + error_setg(errp, "known_hosts file not found"); + goto out; + case SSH_KNOWN_HOSTS_ERROR: + ret = -EINVAL; + error_setg(errp, "error while checking the host"); + goto out; + default: + ret = -EINVAL; + error_setg(errp, "error while checking for known server (%d)", state); goto out; } +#else /* !HAVE_LIBSSH_0_8 */ + int state; - home = getenv("HOME"); - if (home) { - knh_file = g_strdup_printf("%s/.ssh/known_hosts", home); - } else { - knh_file = g_strdup_printf("/root/.ssh/known_hosts"); - } - - /* Read all known hosts from OpenSSH-style known_hosts file. */ - libssh2_knownhost_readfile(knh, knh_file, LIBSSH2_KNOWNHOST_FILE_OPENSSH); + state = ssh_is_server_known(s->session); + trace_ssh_server_status(state); - r = libssh2_knownhost_checkp(knh, host, port, hostkey, len, - LIBSSH2_KNOWNHOST_TYPE_PLAIN| - LIBSSH2_KNOWNHOST_KEYENC_RAW, - &found); - switch (r) { - case LIBSSH2_KNOWNHOST_CHECK_MATCH: + switch (state) { + case SSH_SERVER_KNOWN_OK: /* OK */ - trace_ssh_check_host_key_knownhosts(found->key); + trace_ssh_check_host_key_knownhosts(); break; - case LIBSSH2_KNOWNHOST_CHECK_MISMATCH: + case SSH_SERVER_KNOWN_CHANGED: ret = -EINVAL; - session_error_setg(errp, s, - "host key does not match the one in known_hosts" - " (found key %s)", found->key); + error_setg(errp, + "host key does not match the one in known_hosts; this " + "may be a possible attack"); goto out; - case LIBSSH2_KNOWNHOST_CHECK_NOTFOUND: + case SSH_SERVER_FOUND_OTHER: ret = -EINVAL; - session_error_setg(errp, s, "no host key was found in known_hosts"); + error_setg(errp, + "host key for this server not found, another type exists"); + goto out; + case SSH_SERVER_FILE_NOT_FOUND: + ret = -ENOENT; + error_setg(errp, "known_hosts file not found"); goto out; - case LIBSSH2_KNOWNHOST_CHECK_FAILURE: + case SSH_SERVER_NOT_KNOWN: ret = -EINVAL; - session_error_setg(errp, s, - "failure matching the host key with known_hosts"); + error_setg(errp, "no host key was found in known_hosts"); + goto out; + case SSH_SERVER_ERROR: + ret = -EINVAL; + error_setg(errp, "server error"); goto out; default: ret = -EINVAL; - session_error_setg(errp, s, "unknown error matching the host key" - " with known_hosts (%d)", r); + error_setg(errp, "error while checking for known server (%d)", state); goto out; } +#endif /* !HAVE_LIBSSH_0_8 */ /* known_hosts checking successful. */ ret = 0; out: - if (knh != NULL) { - libssh2_knownhost_free(knh); - } - g_free(knh_file); return ret; } @@ -401,18 +431,34 @@ static int compare_fingerprint(const unsigned char *fingerprint, size_t len, static int check_host_key_hash(BDRVSSHState *s, const char *hash, - int hash_type, size_t fingerprint_len, Error **errp) + enum ssh_publickey_hash_type type, Error **errp) { - const char *fingerprint; - - fingerprint = libssh2_hostkey_hash(s->session, hash_type); - if (!fingerprint) { + int r; + ssh_key pubkey; + unsigned char *server_hash; + size_t server_hash_len; + +#ifdef HAVE_LIBSSH_0_8 + r = ssh_get_server_publickey(s->session, &pubkey); +#else + r = ssh_get_publickey(s->session, &pubkey); +#endif + if (r != SSH_OK) { session_error_setg(errp, s, "failed to read remote host key"); return -EINVAL; } - if(compare_fingerprint((unsigned char *) fingerprint, fingerprint_len, - hash) != 0) { + r = ssh_get_publickey_hash(pubkey, type, &server_hash, &server_hash_len); + ssh_key_free(pubkey); + if (r != 0) { + session_error_setg(errp, s, + "failed reading the hash of the server SSH key"); + return -EINVAL; + } + + r = compare_fingerprint(server_hash, server_hash_len, hash); + ssh_clean_pubkey_hash(&server_hash); + if (r != 0) { error_setg(errp, "remote host key does not match host_key_check '%s'", hash); return -EPERM; @@ -421,8 +467,7 @@ check_host_key_hash(BDRVSSHState *s, const char *hash, return 0; } -static int check_host_key(BDRVSSHState *s, const char *host, int port, - SshHostKeyCheck *hkc, Error **errp) +static int check_host_key(BDRVSSHState *s, SshHostKeyCheck *hkc, Error **errp) { SshHostKeyCheckMode mode; @@ -438,15 +483,15 @@ static int check_host_key(BDRVSSHState *s, const char *host, int port, case SSH_HOST_KEY_CHECK_MODE_HASH: if (hkc->u.hash.type == SSH_HOST_KEY_CHECK_HASH_TYPE_MD5) { return check_host_key_hash(s, hkc->u.hash.hash, - LIBSSH2_HOSTKEY_HASH_MD5, 16, errp); + SSH_PUBLICKEY_HASH_MD5, errp); } else if (hkc->u.hash.type == SSH_HOST_KEY_CHECK_HASH_TYPE_SHA1) { return check_host_key_hash(s, hkc->u.hash.hash, - LIBSSH2_HOSTKEY_HASH_SHA1, 20, errp); + SSH_PUBLICKEY_HASH_SHA1, errp); } g_assert_not_reached(); break; case SSH_HOST_KEY_CHECK_MODE_KNOWN_HOSTS: - return check_host_key_knownhosts(s, host, port, errp); + return check_host_key_knownhosts(s, errp); default: g_assert_not_reached(); } @@ -454,60 +499,43 @@ static int check_host_key(BDRVSSHState *s, const char *host, int port, return -EINVAL; } -static int authenticate(BDRVSSHState *s, const char *user, Error **errp) +static int authenticate(BDRVSSHState *s, Error **errp) { int r, ret; - const char *userauthlist; - LIBSSH2_AGENT *agent = NULL; - struct libssh2_agent_publickey *identity; - struct libssh2_agent_publickey *prev_identity = NULL; + int method; - userauthlist = libssh2_userauth_list(s->session, user, strlen(user)); - if (strstr(userauthlist, "publickey") == NULL) { + /* Try to authenticate with the "none" method. */ + r = ssh_userauth_none(s->session, NULL); + if (r == SSH_AUTH_ERROR) { ret = -EPERM; - error_setg(errp, - "remote server does not support \"publickey\" authentication"); + session_error_setg(errp, s, "failed to authenticate using none " + "authentication"); goto out; - } - - /* Connect to ssh-agent and try each identity in turn. */ - agent = libssh2_agent_init(s->session); - if (!agent) { - ret = -EINVAL; - session_error_setg(errp, s, "failed to initialize ssh-agent support"); - goto out; - } - if (libssh2_agent_connect(agent)) { - ret = -ECONNREFUSED; - session_error_setg(errp, s, "failed to connect to ssh-agent"); - goto out; - } - if (libssh2_agent_list_identities(agent)) { - ret = -EINVAL; - session_error_setg(errp, s, - "failed requesting identities from ssh-agent"); + } else if (r == SSH_AUTH_SUCCESS) { + /* Authenticated! */ + ret = 0; goto out; } - for(;;) { - r = libssh2_agent_get_identity(agent, &identity, prev_identity); - if (r == 1) { /* end of list */ - break; - } - if (r < 0) { + method = ssh_userauth_list(s->session, NULL); + trace_ssh_auth_methods(method); + + /* + * Try to authenticate with publickey, using the ssh-agent + * if available. + */ + if (method & SSH_AUTH_METHOD_PUBLICKEY) { + r = ssh_userauth_publickey_auto(s->session, NULL, NULL); + if (r == SSH_AUTH_ERROR) { ret = -EINVAL; - session_error_setg(errp, s, - "failed to obtain identity from ssh-agent"); + session_error_setg(errp, s, "failed to authenticate using " + "publickey authentication"); goto out; - } - r = libssh2_agent_userauth(agent, user, identity); - if (r == 0) { + } else if (r == SSH_AUTH_SUCCESS) { /* Authenticated! */ ret = 0; goto out; } - /* Failed to authenticate with this identity, try the next one. */ - prev_identity = identity; } ret = -EPERM; @@ -515,13 +543,6 @@ static int authenticate(BDRVSSHState *s, const char *user, Error **errp) "and the identities held by your ssh-agent"); out: - if (agent != NULL) { - /* Note: libssh2 implementation implicitly calls - * libssh2_agent_disconnect if necessary. - */ - libssh2_agent_free(agent); - } - return ret; } @@ -640,7 +661,8 @@ static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts, int ssh_flags, int creat_mode, Error **errp) { int r, ret; - long port = 0; + unsigned int port = 0; + int new_sock = -1; if (opts->has_user) { s->user = g_strdup(opts->user); @@ -657,71 +679,147 @@ static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts, s->inet = opts->server; opts->server = NULL; - if (qemu_strtol(s->inet->port, NULL, 10, &port) < 0) { + if (qemu_strtoui(s->inet->port, NULL, 10, &port) < 0) { error_setg(errp, "Use only numeric port value"); ret = -EINVAL; goto err; } /* Open the socket and connect. */ - s->sock = inet_connect_saddr(s->inet, errp); - if (s->sock < 0) { + new_sock = inet_connect_saddr(s->inet, errp); + if (new_sock < 0) { ret = -EIO; goto err; } + /* + * Try to disable the Nagle algorithm on TCP sockets to reduce latency, + * but do not fail if it cannot be disabled. + */ + r = socket_set_nodelay(new_sock); + if (r < 0) { + warn_report("can't set TCP_NODELAY for the ssh server %s: %s", + s->inet->host, strerror(errno)); + } + /* Create SSH session. */ - s->session = libssh2_session_init(); + s->session = ssh_new(); if (!s->session) { ret = -EINVAL; - session_error_setg(errp, s, "failed to initialize libssh2 session"); + session_error_setg(errp, s, "failed to initialize libssh session"); goto err; } -#if TRACE_LIBSSH2 != 0 - libssh2_trace(s->session, TRACE_LIBSSH2); -#endif + /* + * Make sure we are in blocking mode during the connection and + * authentication phases. + */ + ssh_set_blocking(s->session, 1); - r = libssh2_session_handshake(s->session, s->sock); - if (r != 0) { + r = ssh_options_set(s->session, SSH_OPTIONS_USER, s->user); + if (r < 0) { + ret = -EINVAL; + session_error_setg(errp, s, + "failed to set the user in the libssh session"); + goto err; + } + + r = ssh_options_set(s->session, SSH_OPTIONS_HOST, s->inet->host); + if (r < 0) { + ret = -EINVAL; + session_error_setg(errp, s, + "failed to set the host in the libssh session"); + goto err; + } + + if (port > 0) { + r = ssh_options_set(s->session, SSH_OPTIONS_PORT, &port); + if (r < 0) { + ret = -EINVAL; + session_error_setg(errp, s, + "failed to set the port in the libssh session"); + goto err; + } + } + + r = ssh_options_set(s->session, SSH_OPTIONS_COMPRESSION, "none"); + if (r < 0) { + ret = -EINVAL; + session_error_setg(errp, s, + "failed to disable the compression in the libssh " + "session"); + goto err; + } + + /* Read ~/.ssh/config. */ + r = ssh_options_parse_config(s->session, NULL); + if (r < 0) { + ret = -EINVAL; + session_error_setg(errp, s, "failed to parse ~/.ssh/config"); + goto err; + } + + r = ssh_options_set(s->session, SSH_OPTIONS_FD, &new_sock); + if (r < 0) { + ret = -EINVAL; + session_error_setg(errp, s, + "failed to set the socket in the libssh session"); + goto err; + } + /* libssh took ownership of the socket. */ + s->sock = new_sock; + new_sock = -1; + + /* Connect. */ + r = ssh_connect(s->session); + if (r != SSH_OK) { ret = -EINVAL; session_error_setg(errp, s, "failed to establish SSH session"); goto err; } /* Check the remote host's key against known_hosts. */ - ret = check_host_key(s, s->inet->host, port, opts->host_key_check, errp); + ret = check_host_key(s, opts->host_key_check, errp); if (ret < 0) { goto err; } /* Authenticate. */ - ret = authenticate(s, s->user, errp); + ret = authenticate(s, errp); if (ret < 0) { goto err; } /* Start SFTP. */ - s->sftp = libssh2_sftp_init(s->session); + s->sftp = sftp_new(s->session); if (!s->sftp) { - session_error_setg(errp, s, "failed to initialize sftp handle"); + session_error_setg(errp, s, "failed to create sftp handle"); + ret = -EINVAL; + goto err; + } + + r = sftp_init(s->sftp); + if (r < 0) { + sftp_error_setg(errp, s, "failed to initialize sftp handle"); ret = -EINVAL; goto err; } /* Open the remote file. */ trace_ssh_connect_to_ssh(opts->path, ssh_flags, creat_mode); - s->sftp_handle = libssh2_sftp_open(s->sftp, opts->path, ssh_flags, - creat_mode); + s->sftp_handle = sftp_open(s->sftp, opts->path, ssh_flags, creat_mode); if (!s->sftp_handle) { - session_error_setg(errp, s, "failed to open remote file '%s'", - opts->path); + sftp_error_setg(errp, s, "failed to open remote file '%s'", + opts->path); ret = -EINVAL; goto err; } - r = libssh2_sftp_fstat(s->sftp_handle, &s->attrs); - if (r < 0) { + /* Make sure the SFTP file is handled in blocking mode. */ + sftp_file_set_blocking(s->sftp_handle); + + s->attrs = sftp_fstat(s->sftp_handle); + if (!s->attrs) { sftp_error_setg(errp, s, "failed to read file attributes"); return -EINVAL; } @@ -729,21 +827,27 @@ static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts, return 0; err: + if (s->attrs) { + sftp_attributes_free(s->attrs); + } + s->attrs = NULL; if (s->sftp_handle) { - libssh2_sftp_close(s->sftp_handle); + sftp_close(s->sftp_handle); } s->sftp_handle = NULL; if (s->sftp) { - libssh2_sftp_shutdown(s->sftp); + sftp_free(s->sftp); } s->sftp = NULL; if (s->session) { - libssh2_session_disconnect(s->session, - "from qemu ssh client: " - "error opening connection"); - libssh2_session_free(s->session); + ssh_disconnect(s->session); + ssh_free(s->session); } s->session = NULL; + s->sock = -1; + if (new_sock >= 0) { + close(new_sock); + } return ret; } @@ -758,9 +862,11 @@ static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags, ssh_state_init(s); - ssh_flags = LIBSSH2_FXF_READ; + ssh_flags = 0; if (bdrv_flags & BDRV_O_RDWR) { - ssh_flags |= LIBSSH2_FXF_WRITE; + ssh_flags |= O_RDWR; + } else { + ssh_flags |= O_RDONLY; } opts = ssh_parse_options(options, errp); @@ -775,18 +881,13 @@ static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags, } /* Go non-blocking. */ - libssh2_session_set_blocking(s->session, 0); + ssh_set_blocking(s->session, 0); qapi_free_BlockdevOptionsSsh(opts); return 0; err: - if (s->sock >= 0) { - close(s->sock); - } - s->sock = -1; - qapi_free_BlockdevOptionsSsh(opts); return ret; @@ -797,25 +898,25 @@ static int ssh_grow_file(BDRVSSHState *s, int64_t offset, Error **errp) { ssize_t ret; char c[1] = { '\0' }; - int was_blocking = libssh2_session_get_blocking(s->session); + int was_blocking = ssh_is_blocking(s->session); /* offset must be strictly greater than the current size so we do * not overwrite anything */ - assert(offset > 0 && offset > s->attrs.filesize); + assert(offset > 0 && offset > s->attrs->size); - libssh2_session_set_blocking(s->session, 1); + ssh_set_blocking(s->session, 1); - libssh2_sftp_seek64(s->sftp_handle, offset - 1); - ret = libssh2_sftp_write(s->sftp_handle, c, 1); + sftp_seek64(s->sftp_handle, offset - 1); + ret = sftp_write(s->sftp_handle, c, 1); - libssh2_session_set_blocking(s->session, was_blocking); + ssh_set_blocking(s->session, was_blocking); if (ret < 0) { sftp_error_setg(errp, s, "Failed to grow file"); return -EIO; } - s->attrs.filesize = offset; + s->attrs->size = offset; return 0; } @@ -843,8 +944,7 @@ static int ssh_co_create(BlockdevCreateOptions *options, Error **errp) ssh_state_init(&s); ret = connect_to_ssh(&s, opts->location, - LIBSSH2_FXF_READ|LIBSSH2_FXF_WRITE| - LIBSSH2_FXF_CREAT|LIBSSH2_FXF_TRUNC, + O_RDWR | O_CREAT | O_TRUNC, 0644, errp); if (ret < 0) { goto fail; @@ -913,10 +1013,8 @@ static int ssh_has_zero_init(BlockDriverState *bs) /* Assume false, unless we can positively prove it's true. */ int has_zero_init = 0; - if (s->attrs.flags & LIBSSH2_SFTP_ATTR_PERMISSIONS) { - if (s->attrs.permissions & LIBSSH2_SFTP_S_IFREG) { - has_zero_init = 1; - } + if (s->attrs->type == SSH_FILEXFER_TYPE_REGULAR) { + has_zero_init = 1; } return has_zero_init; @@ -953,12 +1051,12 @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs) .co = qemu_coroutine_self() }; - r = libssh2_session_block_directions(s->session); + r = ssh_get_poll_flags(s->session); - if (r & LIBSSH2_SESSION_BLOCK_INBOUND) { + if (r & SSH_READ_PENDING) { rd_handler = restart_coroutine; } - if (r & LIBSSH2_SESSION_BLOCK_OUTBOUND) { + if (r & SSH_WRITE_PENDING) { wr_handler = restart_coroutine; } @@ -970,33 +1068,6 @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs) trace_ssh_co_yield_back(s->sock); } -/* SFTP has a function `libssh2_sftp_seek64' which seeks to a position - * in the remote file. Notice that it just updates a field in the - * sftp_handle structure, so there is no network traffic and it cannot - * fail. - * - * However, `libssh2_sftp_seek64' does have a catastrophic effect on - * performance since it causes the handle to throw away all in-flight - * reads and buffered readahead data. Therefore this function tries - * to be intelligent about when to call the underlying libssh2 function. - */ -#define SSH_SEEK_WRITE 0 -#define SSH_SEEK_READ 1 -#define SSH_SEEK_FORCE 2 - -static void ssh_seek(BDRVSSHState *s, int64_t offset, int flags) -{ - bool op_read = (flags & SSH_SEEK_READ) != 0; - bool force = (flags & SSH_SEEK_FORCE) != 0; - - if (force || op_read != s->offset_op_read || offset != s->offset) { - trace_ssh_seek(offset); - libssh2_sftp_seek64(s->sftp_handle, offset); - s->offset = offset; - s->offset_op_read = op_read; - } -} - static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs, int64_t offset, size_t size, QEMUIOVector *qiov) @@ -1008,7 +1079,8 @@ static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs, trace_ssh_read(offset, size); - ssh_seek(s, offset, SSH_SEEK_READ); + trace_ssh_seek(offset); + sftp_seek64(s->sftp_handle, offset); /* This keeps track of the current iovec element ('i'), where we * will write to next ('buf'), and the end of the current iovec @@ -1018,35 +1090,35 @@ static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs, buf = i->iov_base; end_of_vec = i->iov_base + i->iov_len; - /* libssh2 has a hard-coded limit of 2000 bytes per request, - * although it will also do readahead behind our backs. Therefore - * we may have to do repeated reads here until we have read 'size' - * bytes. - */ for (got = 0; got < size; ) { + size_t request_read_size; again: - trace_ssh_read_buf(buf, end_of_vec - buf); - r = libssh2_sftp_read(s->sftp_handle, buf, end_of_vec - buf); - trace_ssh_read_return(r); + /* + * The size of SFTP packets is limited to 32K bytes, so limit + * the amount of data requested to 16K, as libssh currently + * does not handle multiple requests on its own. + */ + request_read_size = MIN(end_of_vec - buf, 16384); + trace_ssh_read_buf(buf, end_of_vec - buf, request_read_size); + r = sftp_read(s->sftp_handle, buf, request_read_size); + trace_ssh_read_return(r, sftp_get_error(s->sftp)); - if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) { + if (r == SSH_AGAIN) { co_yield(s, bs); goto again; } - if (r < 0) { - sftp_error_trace(s, "read"); - s->offset = -1; - return -EIO; - } - if (r == 0) { + if (r == SSH_EOF || (r == 0 && sftp_get_error(s->sftp) == SSH_FX_EOF)) { /* EOF: Short read so pad the buffer with zeroes and return it. */ qemu_iovec_memset(qiov, got, 0, size - got); return 0; } + if (r <= 0) { + sftp_error_trace(s, "read"); + return -EIO; + } got += r; buf += r; - s->offset += r; if (buf >= end_of_vec && got < size) { i++; buf = i->iov_base; @@ -1083,7 +1155,8 @@ static int ssh_write(BDRVSSHState *s, BlockDriverState *bs, trace_ssh_write(offset, size); - ssh_seek(s, offset, SSH_SEEK_WRITE); + trace_ssh_seek(offset); + sftp_seek64(s->sftp_handle, offset); /* This keeps track of the current iovec element ('i'), where we * will read from next ('buf'), and the end of the current iovec @@ -1094,46 +1167,37 @@ static int ssh_write(BDRVSSHState *s, BlockDriverState *bs, end_of_vec = i->iov_base + i->iov_len; for (written = 0; written < size; ) { + size_t request_write_size; again: - trace_ssh_write_buf(buf, end_of_vec - buf); - r = libssh2_sftp_write(s->sftp_handle, buf, end_of_vec - buf); - trace_ssh_write_return(r); + /* + * Avoid too large data packets, as libssh currently does not + * handle multiple requests on its own. + */ + request_write_size = MIN(end_of_vec - buf, 131072); + trace_ssh_write_buf(buf, end_of_vec - buf, request_write_size); + r = sftp_write(s->sftp_handle, buf, request_write_size); + trace_ssh_write_return(r, sftp_get_error(s->sftp)); - if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) { + if (r == SSH_AGAIN) { co_yield(s, bs); goto again; } if (r < 0) { sftp_error_trace(s, "write"); - s->offset = -1; return -EIO; } - /* The libssh2 API is very unclear about this. A comment in - * the code says "nothing was acked, and no EAGAIN was - * received!" which apparently means that no data got sent - * out, and the underlying channel didn't return any EAGAIN - * indication. I think this is a bug in either libssh2 or - * OpenSSH (server-side). In any case, forcing a seek (to - * discard libssh2 internal buffers), and then trying again - * works for me. - */ - if (r == 0) { - ssh_seek(s, offset + written, SSH_SEEK_WRITE|SSH_SEEK_FORCE); - co_yield(s, bs); - goto again; - } written += r; buf += r; - s->offset += r; if (buf >= end_of_vec && written < size) { i++; buf = i->iov_base; end_of_vec = i->iov_base + i->iov_len; } - if (offset + written > s->attrs.filesize) - s->attrs.filesize = offset + written; + if (offset + written > s->attrs->size) { + s->attrs->size = offset + written; + } } return 0; @@ -1168,24 +1232,24 @@ static void unsafe_flush_warning(BDRVSSHState *s, const char *what) } } -#ifdef HAS_LIBSSH2_SFTP_FSYNC +#ifdef HAVE_LIBSSH_0_8 static coroutine_fn int ssh_flush(BDRVSSHState *s, BlockDriverState *bs) { int r; trace_ssh_flush(); + + if (!sftp_extension_supported(s->sftp, "fsync@openssh.com", "1")) { + unsafe_flush_warning(s, "OpenSSH >= 6.3"); + return 0; + } again: - r = libssh2_sftp_fsync(s->sftp_handle); - if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) { + r = sftp_fsync(s->sftp_handle); + if (r == SSH_AGAIN) { co_yield(s, bs); goto again; } - if (r == LIBSSH2_ERROR_SFTP_PROTOCOL && - libssh2_sftp_last_error(s->sftp) == LIBSSH2_FX_OP_UNSUPPORTED) { - unsafe_flush_warning(s, "OpenSSH >= 6.3"); - return 0; - } if (r < 0) { sftp_error_trace(s, "fsync"); return -EIO; @@ -1206,25 +1270,25 @@ static coroutine_fn int ssh_co_flush(BlockDriverState *bs) return ret; } -#else /* !HAS_LIBSSH2_SFTP_FSYNC */ +#else /* !HAVE_LIBSSH_0_8 */ static coroutine_fn int ssh_co_flush(BlockDriverState *bs) { BDRVSSHState *s = bs->opaque; - unsafe_flush_warning(s, "libssh2 >= 1.4.4"); + unsafe_flush_warning(s, "libssh >= 0.8.0"); return 0; } -#endif /* !HAS_LIBSSH2_SFTP_FSYNC */ +#endif /* !HAVE_LIBSSH_0_8 */ static int64_t ssh_getlength(BlockDriverState *bs) { BDRVSSHState *s = bs->opaque; int64_t length; - /* Note we cannot make a libssh2 call here. */ - length = (int64_t) s->attrs.filesize; + /* Note we cannot make a libssh call here. */ + length = (int64_t) s->attrs->size; trace_ssh_getlength(length); return length; @@ -1241,12 +1305,12 @@ static int coroutine_fn ssh_co_truncate(BlockDriverState *bs, int64_t offset, return -ENOTSUP; } - if (offset < s->attrs.filesize) { + if (offset < s->attrs->size) { error_setg(errp, "ssh driver does not support shrinking files"); return -ENOTSUP; } - if (offset == s->attrs.filesize) { + if (offset == s->attrs->size) { return 0; } @@ -1341,12 +1405,16 @@ static void bdrv_ssh_init(void) { int r; - r = libssh2_init(0); + r = ssh_init(); if (r != 0) { - fprintf(stderr, "libssh2 initialization failed, %d\n", r); + fprintf(stderr, "libssh initialization failed, %d\n", r); exit(EXIT_FAILURE); } +#if TRACE_LIBSSH != 0 + ssh_set_log_level(TRACE_LIBSSH); +#endif + bdrv_register(&bdrv_ssh); } diff --git a/block/trace-events b/block/trace-events index 9ccea755da..d724df0117 100644 --- a/block/trace-events +++ b/block/trace-events @@ -171,19 +171,21 @@ nbd_client_connect_success(const char *export_name) "export '%s'" # ssh.c ssh_restart_coroutine(void *co) "co=%p" ssh_flush(void) "fsync" -ssh_check_host_key_knownhosts(const char *key) "host key OK: %s" +ssh_check_host_key_knownhosts(void) "host key OK" ssh_connect_to_ssh(char *path, int flags, int mode) "opening file %s flags=0x%x creat_mode=0%o" ssh_co_yield(int sock, void *rd_handler, void *wr_handler) "s->sock=%d rd_handler=%p wr_handler=%p" ssh_co_yield_back(int sock) "s->sock=%d - back" ssh_getlength(int64_t length) "length=%" PRIi64 ssh_co_create_opts(uint64_t size) "total_size=%" PRIu64 ssh_read(int64_t offset, size_t size) "offset=%" PRIi64 " size=%zu" -ssh_read_buf(void *buf, size_t size) "sftp_read buf=%p size=%zu" -ssh_read_return(ssize_t ret) "sftp_read returned %zd" +ssh_read_buf(void *buf, size_t size, size_t actual_size) "sftp_read buf=%p size=%zu (actual size=%zu)" +ssh_read_return(ssize_t ret, int sftp_err) "sftp_read returned %zd (sftp error=%d)" ssh_write(int64_t offset, size_t size) "offset=%" PRIi64 " size=%zu" -ssh_write_buf(void *buf, size_t size) "sftp_write buf=%p size=%zu" -ssh_write_return(ssize_t ret) "sftp_write returned %zd" +ssh_write_buf(void *buf, size_t size, size_t actual_size) "sftp_write buf=%p size=%zu (actual size=%zu)" +ssh_write_return(ssize_t ret, int sftp_err) "sftp_write returned %zd (sftp error=%d)" ssh_seek(int64_t offset) "seeking to offset=%" PRIi64 +ssh_auth_methods(int methods) "auth methods=0x%x" +ssh_server_status(int status) "server status=%d" # curl.c curl_timer_cb(long timeout_ms) "timer callback timeout_ms %ld" @@ -216,4 +218,4 @@ sheepdog_snapshot_create(const char *sn_name, const char *id) "%s %s" sheepdog_snapshot_create_inode(const char *name, uint32_t snap, uint32_t vdi) "s->inode: name %s snap_id 0x%" PRIx32 " vdi 0x%" PRIx32 # ssh.c -sftp_error(const char *op, const char *ssh_err, int ssh_err_code, unsigned long sftp_err_code) "%s failed: %s (libssh2 error code: %d, sftp error code: %lu)" +sftp_error(const char *op, const char *ssh_err, int ssh_err_code, int sftp_err_code) "%s failed: %s (libssh error code: %d, sftp error code: %d)" diff --git a/block/vmdk.c b/block/vmdk.c index 51067c774f..bd36ece125 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -91,6 +91,44 @@ typedef struct { uint16_t compressAlgorithm; } QEMU_PACKED VMDK4Header; +typedef struct VMDKSESparseConstHeader { + uint64_t magic; + uint64_t version; + uint64_t capacity; + uint64_t grain_size; + uint64_t grain_table_size; + uint64_t flags; + uint64_t reserved1; + uint64_t reserved2; + uint64_t reserved3; + uint64_t reserved4; + uint64_t volatile_header_offset; + uint64_t volatile_header_size; + uint64_t journal_header_offset; + uint64_t journal_header_size; + uint64_t journal_offset; + uint64_t journal_size; + uint64_t grain_dir_offset; + uint64_t grain_dir_size; + uint64_t grain_tables_offset; + uint64_t grain_tables_size; + uint64_t free_bitmap_offset; + uint64_t free_bitmap_size; + uint64_t backmap_offset; + uint64_t backmap_size; + uint64_t grains_offset; + uint64_t grains_size; + uint8_t pad[304]; +} QEMU_PACKED VMDKSESparseConstHeader; + +typedef struct VMDKSESparseVolatileHeader { + uint64_t magic; + uint64_t free_gt_number; + uint64_t next_txn_seq_number; + uint64_t replay_journal; + uint8_t pad[480]; +} QEMU_PACKED VMDKSESparseVolatileHeader; + #define L2_CACHE_SIZE 16 typedef struct VmdkExtent { @@ -99,19 +137,23 @@ typedef struct VmdkExtent { bool compressed; bool has_marker; bool has_zero_grain; + bool sesparse; + uint64_t sesparse_l2_tables_offset; + uint64_t sesparse_clusters_offset; + int32_t entry_size; int version; int64_t sectors; int64_t end_sector; int64_t flat_start_offset; int64_t l1_table_offset; int64_t l1_backup_table_offset; - uint32_t *l1_table; + void *l1_table; uint32_t *l1_backup_table; unsigned int l1_size; uint32_t l1_entry_sectors; unsigned int l2_size; - uint32_t *l2_cache; + void *l2_cache; uint32_t l2_cache_offsets[L2_CACHE_SIZE]; uint32_t l2_cache_counts[L2_CACHE_SIZE]; @@ -425,11 +467,22 @@ static int vmdk_add_extent(BlockDriverState *bs, error_setg(errp, "Invalid granularity, image may be corrupt"); return -EFBIG; } - if (l1_size > 512 * 1024 * 1024) { - /* Although with big capacity and small l1_entry_sectors, we can get a + if (l1_size > 32 * 1024 * 1024) { + /* + * Although with big capacity and small l1_entry_sectors, we can get a * big l1_size, we don't want unbounded value to allocate the table. - * Limit it to 512M, which is 16PB for default cluster and L2 table - * size */ + * Limit it to 32M, which is enough to store: + * 8TB - for both VMDK3 & VMDK4 with + * minimal cluster size: 512B + * minimal L2 table size: 512 entries + * 8 TB is still more than the maximal value supported for + * VMDK3 & VMDK4 which is 2TB. + * 64TB - for "ESXi seSparse Extent" + * minimal cluster size: 512B (default is 4KB) + * L2 table size: 4096 entries (const). + * 64TB is more than the maximal value supported for + * seSparse VMDKs (which is slightly less than 64TB) + */ error_setg(errp, "L1 size too big"); return -EFBIG; } @@ -454,6 +507,7 @@ static int vmdk_add_extent(BlockDriverState *bs, extent->l2_size = l2_size; extent->cluster_sectors = flat ? sectors : cluster_sectors; extent->next_cluster_sector = ROUND_UP(nb_sectors, cluster_sectors); + extent->entry_size = sizeof(uint32_t); if (s->num_extents > 1) { extent->end_sector = (*(extent - 1)).end_sector + extent->sectors; @@ -475,7 +529,7 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent, int i; /* read the L1 table */ - l1_size = extent->l1_size * sizeof(uint32_t); + l1_size = extent->l1_size * extent->entry_size; extent->l1_table = g_try_malloc(l1_size); if (l1_size && extent->l1_table == NULL) { return -ENOMEM; @@ -493,10 +547,16 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent, goto fail_l1; } for (i = 0; i < extent->l1_size; i++) { - le32_to_cpus(&extent->l1_table[i]); + if (extent->entry_size == sizeof(uint64_t)) { + le64_to_cpus((uint64_t *)extent->l1_table + i); + } else { + assert(extent->entry_size == sizeof(uint32_t)); + le32_to_cpus((uint32_t *)extent->l1_table + i); + } } if (extent->l1_backup_table_offset) { + assert(!extent->sesparse); extent->l1_backup_table = g_try_malloc(l1_size); if (l1_size && extent->l1_backup_table == NULL) { ret = -ENOMEM; @@ -519,7 +579,7 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent, } extent->l2_cache = - g_new(uint32_t, extent->l2_size * L2_CACHE_SIZE); + g_malloc(extent->entry_size * extent->l2_size * L2_CACHE_SIZE); return 0; fail_l1b: g_free(extent->l1_backup_table); @@ -565,6 +625,205 @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs, return ret; } +#define SESPARSE_CONST_HEADER_MAGIC UINT64_C(0x00000000cafebabe) +#define SESPARSE_VOLATILE_HEADER_MAGIC UINT64_C(0x00000000cafecafe) + +/* Strict checks - format not officially documented */ +static int check_se_sparse_const_header(VMDKSESparseConstHeader *header, + Error **errp) +{ + header->magic = le64_to_cpu(header->magic); + header->version = le64_to_cpu(header->version); + header->grain_size = le64_to_cpu(header->grain_size); + header->grain_table_size = le64_to_cpu(header->grain_table_size); + header->flags = le64_to_cpu(header->flags); + header->reserved1 = le64_to_cpu(header->reserved1); + header->reserved2 = le64_to_cpu(header->reserved2); + header->reserved3 = le64_to_cpu(header->reserved3); + header->reserved4 = le64_to_cpu(header->reserved4); + + header->volatile_header_offset = + le64_to_cpu(header->volatile_header_offset); + header->volatile_header_size = le64_to_cpu(header->volatile_header_size); + + header->journal_header_offset = le64_to_cpu(header->journal_header_offset); + header->journal_header_size = le64_to_cpu(header->journal_header_size); + + header->journal_offset = le64_to_cpu(header->journal_offset); + header->journal_size = le64_to_cpu(header->journal_size); + + header->grain_dir_offset = le64_to_cpu(header->grain_dir_offset); + header->grain_dir_size = le64_to_cpu(header->grain_dir_size); + + header->grain_tables_offset = le64_to_cpu(header->grain_tables_offset); + header->grain_tables_size = le64_to_cpu(header->grain_tables_size); + + header->free_bitmap_offset = le64_to_cpu(header->free_bitmap_offset); + header->free_bitmap_size = le64_to_cpu(header->free_bitmap_size); + + header->backmap_offset = le64_to_cpu(header->backmap_offset); + header->backmap_size = le64_to_cpu(header->backmap_size); + + header->grains_offset = le64_to_cpu(header->grains_offset); + header->grains_size = le64_to_cpu(header->grains_size); + + if (header->magic != SESPARSE_CONST_HEADER_MAGIC) { + error_setg(errp, "Bad const header magic: 0x%016" PRIx64, + header->magic); + return -EINVAL; + } + + if (header->version != 0x0000000200000001) { + error_setg(errp, "Unsupported version: 0x%016" PRIx64, + header->version); + return -ENOTSUP; + } + + if (header->grain_size != 8) { + error_setg(errp, "Unsupported grain size: %" PRIu64, + header->grain_size); + return -ENOTSUP; + } + + if (header->grain_table_size != 64) { + error_setg(errp, "Unsupported grain table size: %" PRIu64, + header->grain_table_size); + return -ENOTSUP; + } + + if (header->flags != 0) { + error_setg(errp, "Unsupported flags: 0x%016" PRIx64, + header->flags); + return -ENOTSUP; + } + + if (header->reserved1 != 0 || header->reserved2 != 0 || + header->reserved3 != 0 || header->reserved4 != 0) { + error_setg(errp, "Unsupported reserved bits:" + " 0x%016" PRIx64 " 0x%016" PRIx64 + " 0x%016" PRIx64 " 0x%016" PRIx64, + header->reserved1, header->reserved2, + header->reserved3, header->reserved4); + return -ENOTSUP; + } + + /* check that padding is 0 */ + if (!buffer_is_zero(header->pad, sizeof(header->pad))) { + error_setg(errp, "Unsupported non-zero const header padding"); + return -ENOTSUP; + } + + return 0; +} + +static int check_se_sparse_volatile_header(VMDKSESparseVolatileHeader *header, + Error **errp) +{ + header->magic = le64_to_cpu(header->magic); + header->free_gt_number = le64_to_cpu(header->free_gt_number); + header->next_txn_seq_number = le64_to_cpu(header->next_txn_seq_number); + header->replay_journal = le64_to_cpu(header->replay_journal); + + if (header->magic != SESPARSE_VOLATILE_HEADER_MAGIC) { + error_setg(errp, "Bad volatile header magic: 0x%016" PRIx64, + header->magic); + return -EINVAL; + } + + if (header->replay_journal) { + error_setg(errp, "Image is dirty, Replaying journal not supported"); + return -ENOTSUP; + } + + /* check that padding is 0 */ + if (!buffer_is_zero(header->pad, sizeof(header->pad))) { + error_setg(errp, "Unsupported non-zero volatile header padding"); + return -ENOTSUP; + } + + return 0; +} + +static int vmdk_open_se_sparse(BlockDriverState *bs, + BdrvChild *file, + int flags, Error **errp) +{ + int ret; + VMDKSESparseConstHeader const_header; + VMDKSESparseVolatileHeader volatile_header; + VmdkExtent *extent; + + ret = bdrv_apply_auto_read_only(bs, + "No write support for seSparse images available", errp); + if (ret < 0) { + return ret; + } + + assert(sizeof(const_header) == SECTOR_SIZE); + + ret = bdrv_pread(file, 0, &const_header, sizeof(const_header)); + if (ret < 0) { + bdrv_refresh_filename(file->bs); + error_setg_errno(errp, -ret, + "Could not read const header from file '%s'", + file->bs->filename); + return ret; + } + + /* check const header */ + ret = check_se_sparse_const_header(&const_header, errp); + if (ret < 0) { + return ret; + } + + assert(sizeof(volatile_header) == SECTOR_SIZE); + + ret = bdrv_pread(file, + const_header.volatile_header_offset * SECTOR_SIZE, + &volatile_header, sizeof(volatile_header)); + if (ret < 0) { + bdrv_refresh_filename(file->bs); + error_setg_errno(errp, -ret, + "Could not read volatile header from file '%s'", + file->bs->filename); + return ret; + } + + /* check volatile header */ + ret = check_se_sparse_volatile_header(&volatile_header, errp); + if (ret < 0) { + return ret; + } + + ret = vmdk_add_extent(bs, file, false, + const_header.capacity, + const_header.grain_dir_offset * SECTOR_SIZE, + 0, + const_header.grain_dir_size * + SECTOR_SIZE / sizeof(uint64_t), + const_header.grain_table_size * + SECTOR_SIZE / sizeof(uint64_t), + const_header.grain_size, + &extent, + errp); + if (ret < 0) { + return ret; + } + + extent->sesparse = true; + extent->sesparse_l2_tables_offset = const_header.grain_tables_offset; + extent->sesparse_clusters_offset = const_header.grains_offset; + extent->entry_size = sizeof(uint64_t); + + ret = vmdk_init_tables(bs, extent, errp); + if (ret) { + /* free extent allocated by vmdk_add_extent */ + vmdk_free_last_extent(bs); + } + + return ret; +} + static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf, QDict *options, Error **errp); @@ -842,6 +1101,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, * RW [size in sectors] SPARSE "file-name.vmdk" * RW [size in sectors] VMFS "file-name.vmdk" * RW [size in sectors] VMFSSPARSE "file-name.vmdk" + * RW [size in sectors] SESPARSE "file-name.vmdk" */ flat_offset = -1; matches = sscanf(p, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64, @@ -864,7 +1124,8 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, if (sectors <= 0 || (strcmp(type, "FLAT") && strcmp(type, "SPARSE") && - strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE")) || + strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE") && + strcmp(type, "SESPARSE")) || (strcmp(access, "RW"))) { continue; } @@ -917,6 +1178,13 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, return ret; } extent = &s->extents[s->num_extents - 1]; + } else if (!strcmp(type, "SESPARSE")) { + ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp); + if (ret) { + bdrv_unref_child(bs, extent_file); + return ret; + } + extent = &s->extents[s->num_extents - 1]; } else { error_setg(errp, "Unsupported extent type '%s'", type); bdrv_unref_child(bs, extent_file); @@ -951,6 +1219,7 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf, if (strcmp(ct, "monolithicFlat") && strcmp(ct, "vmfs") && strcmp(ct, "vmfsSparse") && + strcmp(ct, "seSparse") && strcmp(ct, "twoGbMaxExtentSparse") && strcmp(ct, "twoGbMaxExtentFlat")) { error_setg(errp, "Unsupported image type '%s'", ct); @@ -1201,10 +1470,12 @@ static int get_cluster_offset(BlockDriverState *bs, { unsigned int l1_index, l2_offset, l2_index; int min_index, i, j; - uint32_t min_count, *l2_table; + uint32_t min_count; + void *l2_table; bool zeroed = false; int64_t ret; int64_t cluster_sector; + unsigned int l2_size_bytes = extent->l2_size * extent->entry_size; if (m_data) { m_data->valid = 0; @@ -1219,7 +1490,36 @@ static int get_cluster_offset(BlockDriverState *bs, if (l1_index >= extent->l1_size) { return VMDK_ERROR; } - l2_offset = extent->l1_table[l1_index]; + if (extent->sesparse) { + uint64_t l2_offset_u64; + + assert(extent->entry_size == sizeof(uint64_t)); + + l2_offset_u64 = ((uint64_t *)extent->l1_table)[l1_index]; + if (l2_offset_u64 == 0) { + l2_offset = 0; + } else if ((l2_offset_u64 & 0xffffffff00000000) != 0x1000000000000000) { + /* + * Top most nibble is 0x1 if grain table is allocated. + * strict check - top most 4 bytes must be 0x10000000 since max + * supported size is 64TB for disk - so no more than 64TB / 16MB + * grain directories which is smaller than uint32, + * where 16MB is the only supported default grain table coverage. + */ + return VMDK_ERROR; + } else { + l2_offset_u64 = l2_offset_u64 & 0x00000000ffffffff; + l2_offset_u64 = extent->sesparse_l2_tables_offset + + l2_offset_u64 * l2_size_bytes / SECTOR_SIZE; + if (l2_offset_u64 > 0x00000000ffffffff) { + return VMDK_ERROR; + } + l2_offset = (unsigned int)(l2_offset_u64); + } + } else { + assert(extent->entry_size == sizeof(uint32_t)); + l2_offset = ((uint32_t *)extent->l1_table)[l1_index]; + } if (!l2_offset) { return VMDK_UNALLOC; } @@ -1231,7 +1531,7 @@ static int get_cluster_offset(BlockDriverState *bs, extent->l2_cache_counts[j] >>= 1; } } - l2_table = extent->l2_cache + (i * extent->l2_size); + l2_table = (char *)extent->l2_cache + (i * l2_size_bytes); goto found; } } @@ -1244,13 +1544,13 @@ static int get_cluster_offset(BlockDriverState *bs, min_index = i; } } - l2_table = extent->l2_cache + (min_index * extent->l2_size); + l2_table = (char *)extent->l2_cache + (min_index * l2_size_bytes); BLKDBG_EVENT(extent->file, BLKDBG_L2_LOAD); if (bdrv_pread(extent->file, (int64_t)l2_offset * 512, l2_table, - extent->l2_size * sizeof(uint32_t) - ) != extent->l2_size * sizeof(uint32_t)) { + l2_size_bytes + ) != l2_size_bytes) { return VMDK_ERROR; } @@ -1258,16 +1558,45 @@ static int get_cluster_offset(BlockDriverState *bs, extent->l2_cache_counts[min_index] = 1; found: l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size; - cluster_sector = le32_to_cpu(l2_table[l2_index]); - if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) { - zeroed = true; + if (extent->sesparse) { + cluster_sector = le64_to_cpu(((uint64_t *)l2_table)[l2_index]); + switch (cluster_sector & 0xf000000000000000) { + case 0x0000000000000000: + /* unallocated grain */ + if (cluster_sector != 0) { + return VMDK_ERROR; + } + break; + case 0x1000000000000000: + /* scsi-unmapped grain - fallthrough */ + case 0x2000000000000000: + /* zero grain */ + zeroed = true; + break; + case 0x3000000000000000: + /* allocated grain */ + cluster_sector = (((cluster_sector & 0x0fff000000000000) >> 48) | + ((cluster_sector & 0x0000ffffffffffff) << 12)); + cluster_sector = extent->sesparse_clusters_offset + + cluster_sector * extent->cluster_sectors; + break; + default: + return VMDK_ERROR; + } + } else { + cluster_sector = le32_to_cpu(((uint32_t *)l2_table)[l2_index]); + + if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) { + zeroed = true; + } } if (!cluster_sector || zeroed) { if (!allocate) { return zeroed ? VMDK_ZEROED : VMDK_UNALLOC; } + assert(!extent->sesparse); if (extent->next_cluster_sector >= VMDK_EXTENT_MAX_SECTORS) { return VMDK_ERROR; @@ -1291,7 +1620,7 @@ static int get_cluster_offset(BlockDriverState *bs, m_data->l1_index = l1_index; m_data->l2_index = l2_index; m_data->l2_offset = l2_offset; - m_data->l2_cache_entry = &l2_table[l2_index]; + m_data->l2_cache_entry = ((uint32_t *)l2_table) + l2_index; } } *cluster_offset = cluster_sector << BDRV_SECTOR_BITS; @@ -1617,6 +1946,9 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset, if (!extent) { return -EIO; } + if (extent->sesparse) { + return -ENOTSUP; + } offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset); n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE - offset_in_cluster); diff --git a/blockdev.c b/blockdev.c index 5d6a13dea9..4d141e9a1f 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1774,7 +1774,7 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp) assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP); backup = common->action->u.drive_backup.data; - bs = qmp_get_root_bs(backup->device, errp); + bs = bdrv_lookup_bs(backup->device, backup->device, errp); if (!bs) { return; } @@ -472,7 +472,7 @@ auth_pam="" vte="" virglrenderer="" tpm="" -libssh2="" +libssh="" live_block_migration="yes" numa="" tcmalloc="no" @@ -1439,9 +1439,9 @@ for opt do ;; --enable-tpm) tpm="yes" ;; - --disable-libssh2) libssh2="no" + --disable-libssh) libssh="no" ;; - --enable-libssh2) libssh2="yes" + --enable-libssh) libssh="yes" ;; --disable-live-block-migration) live_block_migration="no" ;; @@ -1810,7 +1810,7 @@ disabled with --disable-FEATURE, default is enabled if available: coroutine-pool coroutine freelist (better performance) glusterfs GlusterFS backend tpm TPM support - libssh2 ssh block device support + libssh ssh block device support numa libnuma support libxml2 for Parallels image format tcmalloc tcmalloc support @@ -3914,43 +3914,34 @@ EOF fi ########################################## -# libssh2 probe -min_libssh2_version=1.2.8 -if test "$libssh2" != "no" ; then - if $pkg_config --atleast-version=$min_libssh2_version libssh2; then - libssh2_cflags=$($pkg_config libssh2 --cflags) - libssh2_libs=$($pkg_config libssh2 --libs) - libssh2=yes +# libssh probe +if test "$libssh" != "no" ; then + if $pkg_config --exists libssh; then + libssh_cflags=$($pkg_config libssh --cflags) + libssh_libs=$($pkg_config libssh --libs) + libssh=yes else - if test "$libssh2" = "yes" ; then - error_exit "libssh2 >= $min_libssh2_version required for --enable-libssh2" + if test "$libssh" = "yes" ; then + error_exit "libssh required for --enable-libssh" fi - libssh2=no + libssh=no fi fi ########################################## -# libssh2_sftp_fsync probe +# Check for libssh 0.8 +# This is done like this instead of using the LIBSSH_VERSION_* and +# SSH_VERSION_* macros because some distributions in the past shipped +# snapshots of the future 0.8 from Git, and those snapshots did not +# have updated version numbers (still referring to 0.7.0). -if test "$libssh2" = "yes"; then +if test "$libssh" = "yes"; then cat > $TMPC <<EOF -#include <stdio.h> -#include <libssh2.h> -#include <libssh2_sftp.h> -int main(void) { - LIBSSH2_SESSION *session; - LIBSSH2_SFTP *sftp; - LIBSSH2_SFTP_HANDLE *sftp_handle; - session = libssh2_session_init (); - sftp = libssh2_sftp_init (session); - sftp_handle = libssh2_sftp_open (sftp, "/", 0, 0); - libssh2_sftp_fsync (sftp_handle); - return 0; -} +#include <libssh/libssh.h> +int main(void) { return ssh_get_server_publickey(NULL, NULL); } EOF - # libssh2_cflags/libssh2_libs defined in previous test. - if compile_prog "$libssh2_cflags" "$libssh2_libs" ; then - QEMU_CFLAGS="-DHAS_LIBSSH2_SFTP_FSYNC $QEMU_CFLAGS" + if compile_prog "$libssh_cflags" "$libssh_libs"; then + libssh_cflags="-DHAVE_LIBSSH_0_8 $libssh_cflags" fi fi @@ -6451,7 +6442,7 @@ echo "GlusterFS support $glusterfs" echo "gcov $gcov_tool" echo "gcov enabled $gcov" echo "TPM support $tpm" -echo "libssh2 support $libssh2" +echo "libssh support $libssh" echo "QOM debugging $qom_cast_debug" echo "Live block migration $live_block_migration" echo "lzo support $lzo" @@ -7144,10 +7135,10 @@ if test "$glusterfs_iocb_has_stat" = "yes" ; then echo "CONFIG_GLUSTERFS_IOCB_HAS_STAT=y" >> $config_host_mak fi -if test "$libssh2" = "yes" ; then - echo "CONFIG_LIBSSH2=m" >> $config_host_mak - echo "LIBSSH2_CFLAGS=$libssh2_cflags" >> $config_host_mak - echo "LIBSSH2_LIBS=$libssh2_libs" >> $config_host_mak +if test "$libssh" = "yes" ; then + echo "CONFIG_LIBSSH=m" >> $config_host_mak + echo "LIBSSH_CFLAGS=$libssh_cflags" >> $config_host_mak + echo "LIBSSH_LIBS=$libssh_libs" >> $config_host_mak fi if test "$live_block_migration" = "yes" ; then diff --git a/docs/qemu-block-drivers.texi b/docs/qemu-block-drivers.texi index da06a9bc83..91ab0eceae 100644 --- a/docs/qemu-block-drivers.texi +++ b/docs/qemu-block-drivers.texi @@ -782,7 +782,7 @@ print a warning when @code{fsync} is not supported: warning: ssh server @code{ssh.example.com:22} does not support fsync -With sufficiently new versions of libssh2 and OpenSSH, @code{fsync} is +With sufficiently new versions of libssh and OpenSSH, @code{fsync} is supported. @node disk_images_nvme diff --git a/hw/9pfs/xen-9pfs.h b/hw/9pfs/xen-9pfs.h index fbdee3d843..241e2216a4 100644 --- a/hw/9pfs/xen-9pfs.h +++ b/hw/9pfs/xen-9pfs.h @@ -13,8 +13,8 @@ #ifndef HW_9PFS_XEN_9PFS_H #define HW_9PFS_XEN_9PFS_H -#include <xen/io/protocols.h> -#include "hw/xen/io/ring.h" +#include "hw/xen/interface/io/protocols.h" +#include "hw/xen/interface/io/ring.h" /* * Do not merge into xen-9p-backend.c: clang doesn't allow unused static diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c index f7ad452bbd..0f200c5fb0 100644 --- a/hw/block/dataplane/xen-block.c +++ b/hw/block/dataplane/xen-block.c @@ -58,6 +58,7 @@ struct XenBlockDataPlane { int requests_inflight; unsigned int max_requests; BlockBackend *blk; + unsigned int sector_size; QEMUBH *bh; IOThread *iothread; AioContext *ctx; @@ -167,7 +168,7 @@ static int xen_block_parse_request(XenBlockRequest *request) goto err; } - request->start = request->req.sector_number * XEN_BLKIF_SECTOR_SIZE; + request->start = request->req.sector_number * dataplane->sector_size; for (i = 0; i < request->req.nr_segments; i++) { if (i == BLKIF_MAX_SEGMENTS_PER_REQUEST) { error_report("error: nr_segments too big"); @@ -177,14 +178,14 @@ static int xen_block_parse_request(XenBlockRequest *request) error_report("error: first > last sector"); goto err; } - if (request->req.seg[i].last_sect * XEN_BLKIF_SECTOR_SIZE >= + if (request->req.seg[i].last_sect * dataplane->sector_size >= XC_PAGE_SIZE) { error_report("error: page crossing"); goto err; } len = (request->req.seg[i].last_sect - - request->req.seg[i].first_sect + 1) * XEN_BLKIF_SECTOR_SIZE; + request->req.seg[i].first_sect + 1) * dataplane->sector_size; request->size += len; } if (request->start + request->size > blk_getlength(dataplane->blk)) { @@ -218,17 +219,17 @@ static int xen_block_copy_request(XenBlockRequest *request) if (to_domain) { segs[i].dest.foreign.ref = request->req.seg[i].gref; segs[i].dest.foreign.offset = request->req.seg[i].first_sect * - XEN_BLKIF_SECTOR_SIZE; + dataplane->sector_size; segs[i].source.virt = virt; } else { segs[i].source.foreign.ref = request->req.seg[i].gref; segs[i].source.foreign.offset = request->req.seg[i].first_sect * - XEN_BLKIF_SECTOR_SIZE; + dataplane->sector_size; segs[i].dest.virt = virt; } segs[i].len = (request->req.seg[i].last_sect - request->req.seg[i].first_sect + 1) * - XEN_BLKIF_SECTOR_SIZE; + dataplane->sector_size; virt += segs[i].len; } @@ -317,7 +318,9 @@ static void xen_block_complete_aio(void *opaque, int ret) } xen_block_release_request(request); - qemu_bh_schedule(dataplane->bh); + if (dataplane->more_work) { + qemu_bh_schedule(dataplane->bh); + } done: aio_context_release(dataplane->ctx); @@ -336,12 +339,12 @@ static bool xen_block_split_discard(XenBlockRequest *request, /* Wrap around, or overflowing byte limit? */ if (sec_start + sec_count < sec_count || - sec_start + sec_count > INT64_MAX / XEN_BLKIF_SECTOR_SIZE) { + sec_start + sec_count > INT64_MAX / dataplane->sector_size) { return false; } - byte_offset = sec_start * XEN_BLKIF_SECTOR_SIZE; - byte_remaining = sec_count * XEN_BLKIF_SECTOR_SIZE; + byte_offset = sec_start * dataplane->sector_size; + byte_remaining = sec_count * dataplane->sector_size; do { byte_chunk = byte_remaining > BDRV_REQUEST_MAX_BYTES ? @@ -514,12 +517,13 @@ static int xen_block_get_request(XenBlockDataPlane *dataplane, */ #define IO_PLUG_THRESHOLD 1 -static void xen_block_handle_requests(XenBlockDataPlane *dataplane) +static bool xen_block_handle_requests(XenBlockDataPlane *dataplane) { RING_IDX rc, rp; XenBlockRequest *request; int inflight_atstart = dataplane->requests_inflight; int batched = 0; + bool done_something = false; dataplane->more_work = 0; @@ -551,6 +555,7 @@ static void xen_block_handle_requests(XenBlockDataPlane *dataplane) } xen_block_get_request(dataplane, request, rc); dataplane->rings.common.req_cons = ++rc; + done_something = true; /* parse them */ if (xen_block_parse_request(request) != 0) { @@ -602,10 +607,7 @@ static void xen_block_handle_requests(XenBlockDataPlane *dataplane) blk_io_unplug(dataplane->blk); } - if (dataplane->more_work && - dataplane->requests_inflight < dataplane->max_requests) { - qemu_bh_schedule(dataplane->bh); - } + return done_something; } static void xen_block_dataplane_bh(void *opaque) @@ -617,21 +619,23 @@ static void xen_block_dataplane_bh(void *opaque) aio_context_release(dataplane->ctx); } -static void xen_block_dataplane_event(void *opaque) +static bool xen_block_dataplane_event(void *opaque) { XenBlockDataPlane *dataplane = opaque; - qemu_bh_schedule(dataplane->bh); + return xen_block_handle_requests(dataplane); } XenBlockDataPlane *xen_block_dataplane_create(XenDevice *xendev, - BlockConf *conf, + BlockBackend *blk, + unsigned int sector_size, IOThread *iothread) { XenBlockDataPlane *dataplane = g_new0(XenBlockDataPlane, 1); dataplane->xendev = xendev; - dataplane->blk = conf->blk; + dataplane->blk = blk; + dataplane->sector_size = sector_size; QLIST_INIT(&dataplane->inflight); QLIST_INIT(&dataplane->freelist); @@ -803,7 +807,7 @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane, } dataplane->event_channel = - xen_device_bind_event_channel(xendev, event_channel, + xen_device_bind_event_channel(xendev, dataplane->ctx, event_channel, xen_block_dataplane_event, dataplane, &local_err); if (local_err) { diff --git a/hw/block/dataplane/xen-block.h b/hw/block/dataplane/xen-block.h index d6fa6d26dd..76dcd51c3d 100644 --- a/hw/block/dataplane/xen-block.h +++ b/hw/block/dataplane/xen-block.h @@ -15,7 +15,8 @@ typedef struct XenBlockDataPlane XenBlockDataPlane; XenBlockDataPlane *xen_block_dataplane_create(XenDevice *xendev, - BlockConf *conf, + BlockBackend *blk, + unsigned int sector_size, IOThread *iothread); void xen_block_dataplane_destroy(XenBlockDataPlane *dataplane); void xen_block_dataplane_start(XenBlockDataPlane *dataplane, diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 107a719b95..36d6a8bb3a 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -1384,7 +1384,6 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) n->bar.cap = 0; NVME_CAP_SET_MQES(n->bar.cap, 0x7ff); NVME_CAP_SET_CQR(n->bar.cap, 1); - NVME_CAP_SET_AMS(n->bar.cap, 1); NVME_CAP_SET_TO(n->bar.cap, 0xf); NVME_CAP_SET_CSS(n->bar.cap, 1); NVME_CAP_SET_MPSMAX(n->bar.cap, 4); diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c index 4de537aef4..8f224ef81d 100644 --- a/hw/block/xen-block.c +++ b/hw/block/xen-block.c @@ -52,11 +52,25 @@ static void xen_block_connect(XenDevice *xendev, Error **errp) XenBlockDevice *blockdev = XEN_BLOCK_DEVICE(xendev); const char *type = object_get_typename(OBJECT(blockdev)); XenBlockVdev *vdev = &blockdev->props.vdev; + BlockConf *conf = &blockdev->props.conf; + unsigned int feature_large_sector_size; unsigned int order, nr_ring_ref, *ring_ref, event_channel, protocol; char *str; trace_xen_block_connect(type, vdev->disk, vdev->partition); + if (xen_device_frontend_scanf(xendev, "feature-large-sector-size", "%u", + &feature_large_sector_size) != 1) { + feature_large_sector_size = 0; + } + + if (feature_large_sector_size != 1 && + conf->logical_block_size != XEN_BLKIF_SECTOR_SIZE) { + error_setg(errp, "logical_block_size != %u not supported by frontend", + XEN_BLKIF_SECTOR_SIZE); + return; + } + if (xen_device_frontend_scanf(xendev, "ring-page-order", "%u", &order) != 1) { nr_ring_ref = 1; @@ -150,7 +164,7 @@ static void xen_block_set_size(XenBlockDevice *blockdev) const char *type = object_get_typename(OBJECT(blockdev)); XenBlockVdev *vdev = &blockdev->props.vdev; BlockConf *conf = &blockdev->props.conf; - int64_t sectors = blk_getlength(conf->blk) / XEN_BLKIF_SECTOR_SIZE; + int64_t sectors = blk_getlength(conf->blk) / conf->logical_block_size; XenDevice *xendev = XEN_DEVICE(blockdev); trace_xen_block_size(type, vdev->disk, vdev->partition, sectors); @@ -185,6 +199,7 @@ static void xen_block_realize(XenDevice *xendev, Error **errp) const char *type = object_get_typename(OBJECT(blockdev)); XenBlockVdev *vdev = &blockdev->props.vdev; BlockConf *conf = &blockdev->props.conf; + BlockBackend *blk = conf->blk; Error *local_err = NULL; if (vdev->type == XEN_BLOCK_VDEV_TYPE_INVALID) { @@ -206,8 +221,8 @@ static void xen_block_realize(XenDevice *xendev, Error **errp) * The blkif protocol does not deal with removable media, so it must * always be present, even for CDRom devices. */ - assert(conf->blk); - if (!blk_is_inserted(conf->blk)) { + assert(blk); + if (!blk_is_inserted(blk)) { error_setg(errp, "device needs media, but drive is empty"); return; } @@ -224,26 +239,20 @@ static void xen_block_realize(XenDevice *xendev, Error **errp) blkconf_blocksizes(conf); - if (conf->logical_block_size != XEN_BLKIF_SECTOR_SIZE) { - error_setg(errp, "logical_block_size != %u not supported", - XEN_BLKIF_SECTOR_SIZE); - return; - } - if (conf->logical_block_size > conf->physical_block_size) { error_setg( errp, "logical_block_size > physical_block_size not supported"); return; } - blk_set_dev_ops(conf->blk, &xen_block_dev_ops, blockdev); - blk_set_guest_block_size(conf->blk, conf->logical_block_size); + blk_set_dev_ops(blk, &xen_block_dev_ops, blockdev); + blk_set_guest_block_size(blk, conf->logical_block_size); if (conf->discard_granularity == -1) { conf->discard_granularity = conf->physical_block_size; } - if (blk_get_flags(conf->blk) & BDRV_O_UNMAP) { + if (blk_get_flags(blk) & BDRV_O_UNMAP) { xen_device_backend_printf(xendev, "feature-discard", "%u", 1); xen_device_backend_printf(xendev, "discard-granularity", "%u", conf->discard_granularity); @@ -260,12 +269,13 @@ static void xen_block_realize(XenDevice *xendev, Error **errp) blockdev->device_type); xen_device_backend_printf(xendev, "sector-size", "%u", - XEN_BLKIF_SECTOR_SIZE); + conf->logical_block_size); xen_block_set_size(blockdev); blockdev->dataplane = - xen_block_dataplane_create(xendev, conf, blockdev->props.iothread); + xen_block_dataplane_create(xendev, blk, conf->logical_block_size, + blockdev->props.iothread); } static void xen_block_frontend_changed(XenDevice *xendev, diff --git a/hw/block/xen_blkif.h b/hw/block/xen_blkif.h index a353693ea0..99733529c1 100644 --- a/hw/block/xen_blkif.h +++ b/hw/block/xen_blkif.h @@ -1,9 +1,8 @@ #ifndef XEN_BLKIF_H #define XEN_BLKIF_H -#include "hw/xen/io/ring.h" -#include <xen/io/blkif.h> -#include <xen/io/protocols.h> +#include "hw/xen/interface/io/blkif.h" +#include "hw/xen/interface/io/protocols.h" /* * Not a real protocol. Used to generate ring structs which contain diff --git a/hw/char/xen_console.c b/hw/char/xen_console.c index 91f34ef06c..47e1092263 100644 --- a/hw/char/xen_console.c +++ b/hw/char/xen_console.c @@ -28,7 +28,7 @@ #include "chardev/char-fe.h" #include "hw/xen/xen-legacy-backend.h" -#include <xen/io/console.h> +#include "hw/xen/interface/io/console.h" struct buffer { uint8_t *data; diff --git a/hw/display/xenfb.c b/hw/display/xenfb.c index 6202f1150e..3c79913b31 100644 --- a/hw/display/xenfb.c +++ b/hw/display/xenfb.c @@ -32,10 +32,9 @@ #include "ui/console.h" #include "hw/xen/xen-legacy-backend.h" -#include <xen/event_channel.h> -#include <xen/io/fbif.h> -#include <xen/io/kbdif.h> -#include <xen/io/protocols.h> +#include "hw/xen/interface/io/fbif.h" +#include "hw/xen/interface/io/kbdif.h" +#include "hw/xen/interface/io/protocols.h" #include "trace.h" diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c index 2939122e7c..469f1260a4 100644 --- a/hw/i386/xen/xen-hvm.c +++ b/hw/i386/xen/xen-hvm.c @@ -27,7 +27,6 @@ #include "exec/address-spaces.h" #include <xen/hvm/ioreq.h> -#include <xen/hvm/params.h> #include <xen/hvm/e820.h> //#define DEBUG_XEN_HVM @@ -120,6 +119,8 @@ typedef struct XenIOState { DeviceListener device_listener; hwaddr free_phys_offset; const XenPhysmap *log_for_dirtybit; + /* Buffer used by xen_sync_dirty_bitmap */ + unsigned long *dirty_bitmap; Notifier exit; Notifier suspend; @@ -465,6 +466,8 @@ static int xen_remove_from_physmap(XenIOState *state, QLIST_REMOVE(physmap, list); if (state->log_for_dirtybit == physmap) { state->log_for_dirtybit = NULL; + g_free(state->dirty_bitmap); + state->dirty_bitmap = NULL; } g_free(physmap); @@ -615,7 +618,7 @@ static void xen_sync_dirty_bitmap(XenIOState *state, { hwaddr npages = size >> TARGET_PAGE_BITS; const int width = sizeof(unsigned long) * 8; - unsigned long bitmap[DIV_ROUND_UP(npages, width)]; + size_t bitmap_size = DIV_ROUND_UP(npages, width); int rc, i, j; const XenPhysmap *physmap = NULL; @@ -627,13 +630,14 @@ static void xen_sync_dirty_bitmap(XenIOState *state, if (state->log_for_dirtybit == NULL) { state->log_for_dirtybit = physmap; + state->dirty_bitmap = g_new(unsigned long, bitmap_size); } else if (state->log_for_dirtybit != physmap) { /* Only one range for dirty bitmap can be tracked. */ return; } rc = xen_track_dirty_vram(xen_domid, start_addr >> TARGET_PAGE_BITS, - npages, bitmap); + npages, state->dirty_bitmap); if (rc < 0) { #ifndef ENODATA #define ENODATA ENOENT @@ -647,8 +651,8 @@ static void xen_sync_dirty_bitmap(XenIOState *state, return; } - for (i = 0; i < ARRAY_SIZE(bitmap); i++) { - unsigned long map = bitmap[i]; + for (i = 0; i < bitmap_size; i++) { + unsigned long map = state->dirty_bitmap[i]; while (map != 0) { j = ctzl(map); map &= ~(1ul << j); @@ -678,6 +682,8 @@ static void xen_log_stop(MemoryListener *listener, MemoryRegionSection *section, if (old & ~new & (1 << DIRTY_MEMORY_VGA)) { state->log_for_dirtybit = NULL; + g_free(state->dirty_bitmap); + state->dirty_bitmap = NULL; /* Disable dirty bit tracking */ xen_track_dirty_vram(xen_domid, 0, 0, NULL); } diff --git a/hw/i386/xen/xen-mapcache.c b/hw/i386/xen/xen-mapcache.c index 254759f776..dc73c86c61 100644 --- a/hw/i386/xen/xen-mapcache.c +++ b/hw/i386/xen/xen-mapcache.c @@ -17,8 +17,6 @@ #include "hw/xen/xen-legacy-backend.h" #include "qemu/bitmap.h" -#include <xen/hvm/params.h> - #include "sysemu/xen-mapcache.h" #include "trace.h" diff --git a/hw/net/xen_nic.c b/hw/net/xen_nic.c index 37cda8e4be..ffb3b5898d 100644 --- a/hw/net/xen_nic.c +++ b/hw/net/xen_nic.c @@ -30,7 +30,7 @@ #include "net/util.h" #include "hw/xen/xen-legacy-backend.h" -#include <xen/io/netif.h> +#include "hw/xen/interface/io/netif.h" /* ------------------------------------------------------------- */ diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c index b20d0cfadf..dfbb418e77 100644 --- a/hw/usb/xen-usb.c +++ b/hw/usb/xen-usb.c @@ -32,8 +32,7 @@ #include "qapi/qmp/qdict.h" #include "qapi/qmp/qstring.h" -#include "hw/xen/io/ring.h" -#include <xen/io/usbif.h> +#include "hw/xen/interface/io/usbif.h" /* * Check for required support of usbif.h: USBIF_SHORT_NOT_OK was the last diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c index a4416d0bcf..7503eea9e9 100644 --- a/hw/xen/xen-bus.c +++ b/hw/xen/xen-bus.c @@ -924,23 +924,35 @@ done: } struct XenEventChannel { + QLIST_ENTRY(XenEventChannel) list; + AioContext *ctx; + xenevtchn_handle *xeh; evtchn_port_t local_port; XenEventHandler handler; void *opaque; - Notifier notifier; }; -static void event_notify(Notifier *n, void *data) +static bool xen_device_poll(void *opaque) +{ + XenEventChannel *channel = opaque; + + return channel->handler(channel->opaque); +} + +static void xen_device_event(void *opaque) { - XenEventChannel *channel = container_of(n, XenEventChannel, notifier); - unsigned long port = (unsigned long)data; + XenEventChannel *channel = opaque; + unsigned long port = xenevtchn_pending(channel->xeh); if (port == channel->local_port) { - channel->handler(channel->opaque); + xen_device_poll(channel); + + xenevtchn_unmask(channel->xeh, port); } } XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev, + AioContext *ctx, unsigned int port, XenEventHandler handler, void *opaque, Error **errp) @@ -948,24 +960,40 @@ XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev, XenEventChannel *channel = g_new0(XenEventChannel, 1); xenevtchn_port_or_error_t local_port; - local_port = xenevtchn_bind_interdomain(xendev->xeh, + channel->xeh = xenevtchn_open(NULL, 0); + if (!channel->xeh) { + error_setg_errno(errp, errno, "failed xenevtchn_open"); + goto fail; + } + + local_port = xenevtchn_bind_interdomain(channel->xeh, xendev->frontend_id, port); if (local_port < 0) { error_setg_errno(errp, errno, "xenevtchn_bind_interdomain failed"); - - g_free(channel); - return NULL; + goto fail; } channel->local_port = local_port; channel->handler = handler; channel->opaque = opaque; - channel->notifier.notify = event_notify; - notifier_list_add(&xendev->event_notifiers, &channel->notifier); + channel->ctx = ctx; + aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true, + xen_device_event, NULL, xen_device_poll, channel); + + QLIST_INSERT_HEAD(&xendev->event_channels, channel, list); return channel; + +fail: + if (channel->xeh) { + xenevtchn_close(channel->xeh); + } + + g_free(channel); + + return NULL; } void xen_device_notify_event_channel(XenDevice *xendev, @@ -977,7 +1005,7 @@ void xen_device_notify_event_channel(XenDevice *xendev, return; } - if (xenevtchn_notify(xendev->xeh, channel->local_port) < 0) { + if (xenevtchn_notify(channel->xeh, channel->local_port) < 0) { error_setg_errno(errp, errno, "xenevtchn_notify failed"); } } @@ -991,12 +1019,16 @@ void xen_device_unbind_event_channel(XenDevice *xendev, return; } - notifier_remove(&channel->notifier); + QLIST_REMOVE(channel, list); + + aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true, + NULL, NULL, NULL, NULL); - if (xenevtchn_unbind(xendev->xeh, channel->local_port) < 0) { + if (xenevtchn_unbind(channel->xeh, channel->local_port) < 0) { error_setg_errno(errp, errno, "xenevtchn_unbind failed"); } + xenevtchn_close(channel->xeh); g_free(channel); } @@ -1005,6 +1037,7 @@ static void xen_device_unrealize(DeviceState *dev, Error **errp) XenDevice *xendev = XEN_DEVICE(dev); XenDeviceClass *xendev_class = XEN_DEVICE_GET_CLASS(xendev); const char *type = object_get_typename(OBJECT(xendev)); + XenEventChannel *channel, *next; if (!xendev->name) { return; @@ -1021,15 +1054,14 @@ static void xen_device_unrealize(DeviceState *dev, Error **errp) xendev_class->unrealize(xendev, errp); } + /* Make sure all event channels are cleaned up */ + QLIST_FOREACH_SAFE(channel, &xendev->event_channels, list, next) { + xen_device_unbind_event_channel(xendev, channel, NULL); + } + xen_device_frontend_destroy(xendev); xen_device_backend_destroy(xendev); - if (xendev->xeh) { - qemu_set_fd_handler(xenevtchn_fd(xendev->xeh), NULL, NULL, NULL); - xenevtchn_close(xendev->xeh); - xendev->xeh = NULL; - } - if (xendev->xgth) { xengnttab_close(xendev->xgth); xendev->xgth = NULL; @@ -1046,16 +1078,6 @@ static void xen_device_exit(Notifier *n, void *data) xen_device_unrealize(DEVICE(xendev), &error_abort); } -static void xen_device_event(void *opaque) -{ - XenDevice *xendev = opaque; - unsigned long port = xenevtchn_pending(xendev->xeh); - - notifier_list_notify(&xendev->event_notifiers, (void *)port); - - xenevtchn_unmask(xendev->xeh, port); -} - static void xen_device_realize(DeviceState *dev, Error **errp) { XenDevice *xendev = XEN_DEVICE(dev); @@ -1096,16 +1118,6 @@ static void xen_device_realize(DeviceState *dev, Error **errp) xendev->feature_grant_copy = (xengnttab_grant_copy(xendev->xgth, 0, NULL) == 0); - xendev->xeh = xenevtchn_open(NULL, 0); - if (!xendev->xeh) { - error_setg_errno(errp, errno, "failed xenevtchn_open"); - goto unrealize; - } - - notifier_list_init(&xendev->event_notifiers); - qemu_set_fd_handler(xenevtchn_fd(xendev->xeh), xen_device_event, NULL, - xendev); - xen_device_backend_create(xendev, &local_err); if (local_err) { error_propagate(errp, local_err); diff --git a/hw/xen/xen-legacy-backend.c b/hw/xen/xen-legacy-backend.c index 36fd1e9b09..3715c94fa6 100644 --- a/hw/xen/xen-legacy-backend.c +++ b/hw/xen/xen-legacy-backend.c @@ -34,8 +34,6 @@ #include "hw/xen/xen_pvdev.h" #include "monitor/qdev.h" -#include <xen/grant_table.h> - DeviceState *xen_sysdev; BusState *xen_sysbus; diff --git a/include/hw/xen/interface/grant_table.h b/include/hw/xen/interface/grant_table.h new file mode 100644 index 0000000000..2af0cbdde3 --- /dev/null +++ b/include/hw/xen/interface/grant_table.h @@ -0,0 +1,36 @@ +/****************************************************************************** + * grant_table.h + * + * Interface for granting foreign access to page frames, and receiving + * page-ownership transfers. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef __XEN_PUBLIC_GRANT_TABLE_H__ +#define __XEN_PUBLIC_GRANT_TABLE_H__ + +/* + * Reference to a grant entry in a specified domain's grant table. + */ +typedef uint32_t grant_ref_t; + +#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */ diff --git a/include/hw/xen/interface/io/blkif.h b/include/hw/xen/interface/io/blkif.h new file mode 100644 index 0000000000..8b1be50ce8 --- /dev/null +++ b/include/hw/xen/interface/io/blkif.h @@ -0,0 +1,712 @@ +/****************************************************************************** + * blkif.h + * + * Unified block-device I/O interface for Xen guest OSes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2003-2004, Keir Fraser + * Copyright (c) 2012, Spectra Logic Corporation + */ + +#ifndef __XEN_PUBLIC_IO_BLKIF_H__ +#define __XEN_PUBLIC_IO_BLKIF_H__ + +#include "ring.h" +#include "../grant_table.h" + +/* + * Front->back notifications: When enqueuing a new request, sending a + * notification can be made conditional on req_event (i.e., the generic + * hold-off mechanism provided by the ring macros). Backends must set + * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()). + * + * Back->front notifications: When enqueuing a new response, sending a + * notification can be made conditional on rsp_event (i.e., the generic + * hold-off mechanism provided by the ring macros). Frontends must set + * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()). + */ + +#ifndef blkif_vdev_t +#define blkif_vdev_t uint16_t +#endif +#define blkif_sector_t uint64_t + +/* + * Feature and Parameter Negotiation + * ================================= + * The two halves of a Xen block driver utilize nodes within the XenStore to + * communicate capabilities and to negotiate operating parameters. This + * section enumerates these nodes which reside in the respective front and + * backend portions of the XenStore, following the XenBus convention. + * + * All data in the XenStore is stored as strings. Nodes specifying numeric + * values are encoded in decimal. Integer value ranges listed below are + * expressed as fixed sized integer types capable of storing the conversion + * of a properly formated node string, without loss of information. + * + * Any specified default value is in effect if the corresponding XenBus node + * is not present in the XenStore. + * + * XenStore nodes in sections marked "PRIVATE" are solely for use by the + * driver side whose XenBus tree contains them. + * + * XenStore nodes marked "DEPRECATED" in their notes section should only be + * used to provide interoperability with legacy implementations. + * + * See the XenBus state transition diagram below for details on when XenBus + * nodes must be published and when they can be queried. + * + ***************************************************************************** + * Backend XenBus Nodes + ***************************************************************************** + * + *------------------ Backend Device Identification (PRIVATE) ------------------ + * + * mode + * Values: "r" (read only), "w" (writable) + * + * The read or write access permissions to the backing store to be + * granted to the frontend. + * + * params + * Values: string + * + * A free formatted string providing sufficient information for the + * hotplug script to attach the device and provide a suitable + * handler (ie: a block device) for blkback to use. + * + * physical-device + * Values: "MAJOR:MINOR" + * Notes: 11 + * + * MAJOR and MINOR are the major number and minor number of the + * backing device respectively. + * + * physical-device-path + * Values: path string + * + * A string that contains the absolute path to the disk image. On + * NetBSD and Linux this is always a block device, while on FreeBSD + * it can be either a block device or a regular file. + * + * type + * Values: "file", "phy", "tap" + * + * The type of the backing device/object. + * + * + * direct-io-safe + * Values: 0/1 (boolean) + * Default Value: 0 + * + * The underlying storage is not affected by the direct IO memory + * lifetime bug. See: + * http://lists.xen.org/archives/html/xen-devel/2012-12/msg01154.html + * + * Therefore this option gives the backend permission to use + * O_DIRECT, notwithstanding that bug. + * + * That is, if this option is enabled, use of O_DIRECT is safe, + * in circumstances where we would normally have avoided it as a + * workaround for that bug. This option is not relevant for all + * backends, and even not necessarily supported for those for + * which it is relevant. A backend which knows that it is not + * affected by the bug can ignore this option. + * + * This option doesn't require a backend to use O_DIRECT, so it + * should not be used to try to control the caching behaviour. + * + *--------------------------------- Features --------------------------------- + * + * feature-barrier + * Values: 0/1 (boolean) + * Default Value: 0 + * + * A value of "1" indicates that the backend can process requests + * containing the BLKIF_OP_WRITE_BARRIER request opcode. Requests + * of this type may still be returned at any time with the + * BLKIF_RSP_EOPNOTSUPP result code. + * + * feature-flush-cache + * Values: 0/1 (boolean) + * Default Value: 0 + * + * A value of "1" indicates that the backend can process requests + * containing the BLKIF_OP_FLUSH_DISKCACHE request opcode. Requests + * of this type may still be returned at any time with the + * BLKIF_RSP_EOPNOTSUPP result code. + * + * feature-discard + * Values: 0/1 (boolean) + * Default Value: 0 + * + * A value of "1" indicates that the backend can process requests + * containing the BLKIF_OP_DISCARD request opcode. Requests + * of this type may still be returned at any time with the + * BLKIF_RSP_EOPNOTSUPP result code. + * + * feature-persistent + * Values: 0/1 (boolean) + * Default Value: 0 + * Notes: 7 + * + * A value of "1" indicates that the backend can keep the grants used + * by the frontend driver mapped, so the same set of grants should be + * used in all transactions. The maximum number of grants the backend + * can map persistently depends on the implementation, but ideally it + * should be RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. Using this + * feature the backend doesn't need to unmap each grant, preventing + * costly TLB flushes. The backend driver should only map grants + * persistently if the frontend supports it. If a backend driver chooses + * to use the persistent protocol when the frontend doesn't support it, + * it will probably hit the maximum number of persistently mapped grants + * (due to the fact that the frontend won't be reusing the same grants), + * and fall back to non-persistent mode. Backend implementations may + * shrink or expand the number of persistently mapped grants without + * notifying the frontend depending on memory constraints (this might + * cause a performance degradation). + * + * If a backend driver wants to limit the maximum number of persistently + * mapped grants to a value less than RING_SIZE * + * BLKIF_MAX_SEGMENTS_PER_REQUEST a LRU strategy should be used to + * discard the grants that are less commonly used. Using a LRU in the + * backend driver paired with a LIFO queue in the frontend will + * allow us to have better performance in this scenario. + * + *----------------------- Request Transport Parameters ------------------------ + * + * max-ring-page-order + * Values: <uint32_t> + * Default Value: 0 + * Notes: 1, 3 + * + * The maximum supported size of the request ring buffer in units of + * lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages, + * etc.). + * + * max-ring-pages + * Values: <uint32_t> + * Default Value: 1 + * Notes: DEPRECATED, 2, 3 + * + * The maximum supported size of the request ring buffer in units of + * machine pages. The value must be a power of 2. + * + *------------------------- Backend Device Properties ------------------------- + * + * discard-enable + * Values: 0/1 (boolean) + * Default Value: 1 + * + * This optional property, set by the toolstack, instructs the backend + * to offer (or not to offer) discard to the frontend. If the property + * is missing the backend should offer discard if the backing storage + * actually supports it. + * + * discard-alignment + * Values: <uint32_t> + * Default Value: 0 + * Notes: 4, 5 + * + * The offset, in bytes from the beginning of the virtual block device, + * to the first, addressable, discard extent on the underlying device. + * + * discard-granularity + * Values: <uint32_t> + * Default Value: <"sector-size"> + * Notes: 4 + * + * The size, in bytes, of the individually addressable discard extents + * of the underlying device. + * + * discard-secure + * Values: 0/1 (boolean) + * Default Value: 0 + * Notes: 10 + * + * A value of "1" indicates that the backend can process BLKIF_OP_DISCARD + * requests with the BLKIF_DISCARD_SECURE flag set. + * + * info + * Values: <uint32_t> (bitmap) + * + * A collection of bit flags describing attributes of the backing + * device. The VDISK_* macros define the meaning of each bit + * location. + * + * sector-size + * Values: <uint32_t> + * + * The logical block size, in bytes, of the underlying storage. This + * must be a power of two with a minimum value of 512. + * + * NOTE: Because of implementation bugs in some frontends this must be + * set to 512, unless the frontend advertizes a non-zero value + * in its "feature-large-sector-size" xenbus node. (See below). + * + * physical-sector-size + * Values: <uint32_t> + * Default Value: <"sector-size"> + * + * The physical block size, in bytes, of the backend storage. This + * must be an integer multiple of "sector-size". + * + * sectors + * Values: <uint64_t> + * + * The size of the backend device, expressed in units of "sector-size". + * The product of "sector-size" and "sectors" must also be an integer + * multiple of "physical-sector-size", if that node is present. + * + ***************************************************************************** + * Frontend XenBus Nodes + ***************************************************************************** + * + *----------------------- Request Transport Parameters ----------------------- + * + * event-channel + * Values: <uint32_t> + * + * The identifier of the Xen event channel used to signal activity + * in the ring buffer. + * + * ring-ref + * Values: <uint32_t> + * Notes: 6 + * + * The Xen grant reference granting permission for the backend to map + * the sole page in a single page sized ring buffer. + * + * ring-ref%u + * Values: <uint32_t> + * Notes: 6 + * + * For a frontend providing a multi-page ring, a "number of ring pages" + * sized list of nodes, each containing a Xen grant reference granting + * permission for the backend to map the page of the ring located + * at page index "%u". Page indexes are zero based. + * + * protocol + * Values: string (XEN_IO_PROTO_ABI_*) + * Default Value: XEN_IO_PROTO_ABI_NATIVE + * + * The machine ABI rules governing the format of all ring request and + * response structures. + * + * ring-page-order + * Values: <uint32_t> + * Default Value: 0 + * Maximum Value: MAX(ffs(max-ring-pages) - 1, max-ring-page-order) + * Notes: 1, 3 + * + * The size of the frontend allocated request ring buffer in units + * of lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages, + * etc.). + * + * num-ring-pages + * Values: <uint32_t> + * Default Value: 1 + * Maximum Value: MAX(max-ring-pages,(0x1 << max-ring-page-order)) + * Notes: DEPRECATED, 2, 3 + * + * The size of the frontend allocated request ring buffer in units of + * machine pages. The value must be a power of 2. + * + *--------------------------------- Features --------------------------------- + * + * feature-persistent + * Values: 0/1 (boolean) + * Default Value: 0 + * Notes: 7, 8, 9 + * + * A value of "1" indicates that the frontend will reuse the same grants + * for all transactions, allowing the backend to map them with write + * access (even when it should be read-only). If the frontend hits the + * maximum number of allowed persistently mapped grants, it can fallback + * to non persistent mode. This will cause a performance degradation, + * since the the backend driver will still try to map those grants + * persistently. Since the persistent grants protocol is compatible with + * the previous protocol, a frontend driver can choose to work in + * persistent mode even when the backend doesn't support it. + * + * It is recommended that the frontend driver stores the persistently + * mapped grants in a LIFO queue, so a subset of all persistently mapped + * grants gets used commonly. This is done in case the backend driver + * decides to limit the maximum number of persistently mapped grants + * to a value less than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. + * + * feature-large-sector-size + * Values: 0/1 (boolean) + * Default Value: 0 + * + * A value of "1" indicates that the frontend will correctly supply and + * interpret all sector-based quantities in terms of the "sector-size" + * value supplied in the backend info, whatever that may be set to. + * If this node is not present or its value is "0" then it is assumed + * that the frontend requires that the logical block size is 512 as it + * is hardcoded (which is the case in some frontend implementations). + * + *------------------------- Virtual Device Properties ------------------------- + * + * device-type + * Values: "disk", "cdrom", "floppy", etc. + * + * virtual-device + * Values: <uint32_t> + * + * A value indicating the physical device to virtualize within the + * frontend's domain. (e.g. "The first ATA disk", "The third SCSI + * disk", etc.) + * + * See docs/misc/vbd-interface.txt for details on the format of this + * value. + * + * Notes + * ----- + * (1) Multi-page ring buffer scheme first developed in the Citrix XenServer + * PV drivers. + * (2) Multi-page ring buffer scheme first used in some RedHat distributions + * including a distribution deployed on certain nodes of the Amazon + * EC2 cluster. + * (3) Support for multi-page ring buffers was implemented independently, + * in slightly different forms, by both Citrix and RedHat/Amazon. + * For full interoperability, block front and backends should publish + * identical ring parameters, adjusted for unit differences, to the + * XenStore nodes used in both schemes. + * (4) Devices that support discard functionality may internally allocate space + * (discardable extents) in units that are larger than the exported logical + * block size. If the backing device has such discardable extents the + * backend should provide both discard-granularity and discard-alignment. + * Providing just one of the two may be considered an error by the frontend. + * Backends supporting discard should include discard-granularity and + * discard-alignment even if it supports discarding individual sectors. + * Frontends should assume discard-alignment == 0 and discard-granularity + * == sector size if these keys are missing. + * (5) The discard-alignment parameter allows a physical device to be + * partitioned into virtual devices that do not necessarily begin or + * end on a discardable extent boundary. + * (6) When there is only a single page allocated to the request ring, + * 'ring-ref' is used to communicate the grant reference for this + * page to the backend. When using a multi-page ring, the 'ring-ref' + * node is not created. Instead 'ring-ref0' - 'ring-refN' are used. + * (7) When using persistent grants data has to be copied from/to the page + * where the grant is currently mapped. The overhead of doing this copy + * however doesn't suppress the speed improvement of not having to unmap + * the grants. + * (8) The frontend driver has to allow the backend driver to map all grants + * with write access, even when they should be mapped read-only, since + * further requests may reuse these grants and require write permissions. + * (9) Linux implementation doesn't have a limit on the maximum number of + * grants that can be persistently mapped in the frontend driver, but + * due to the frontent driver implementation it should never be bigger + * than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. + *(10) The discard-secure property may be present and will be set to 1 if the + * backing device supports secure discard. + *(11) Only used by Linux and NetBSD. + */ + +/* + * Multiple hardware queues/rings: + * If supported, the backend will write the key "multi-queue-max-queues" to + * the directory for that vbd, and set its value to the maximum supported + * number of queues. + * Frontends that are aware of this feature and wish to use it can write the + * key "multi-queue-num-queues" with the number they wish to use, which must be + * greater than zero, and no more than the value reported by the backend in + * "multi-queue-max-queues". + * + * For frontends requesting just one queue, the usual event-channel and + * ring-ref keys are written as before, simplifying the backend processing + * to avoid distinguishing between a frontend that doesn't understand the + * multi-queue feature, and one that does, but requested only one queue. + * + * Frontends requesting two or more queues must not write the toplevel + * event-channel and ring-ref keys, instead writing those keys under sub-keys + * having the name "queue-N" where N is the integer ID of the queue/ring for + * which those keys belong. Queues are indexed from zero. + * For example, a frontend with two queues must write the following set of + * queue-related keys: + * + * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2" + * /local/domain/1/device/vbd/0/queue-0 = "" + * /local/domain/1/device/vbd/0/queue-0/ring-ref = "<ring-ref#0>" + * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>" + * /local/domain/1/device/vbd/0/queue-1 = "" + * /local/domain/1/device/vbd/0/queue-1/ring-ref = "<ring-ref#1>" + * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>" + * + * It is also possible to use multiple queues/rings together with + * feature multi-page ring buffer. + * For example, a frontend requests two queues/rings and the size of each ring + * buffer is two pages must write the following set of related keys: + * + * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2" + * /local/domain/1/device/vbd/0/ring-page-order = "1" + * /local/domain/1/device/vbd/0/queue-0 = "" + * /local/domain/1/device/vbd/0/queue-0/ring-ref0 = "<ring-ref#0>" + * /local/domain/1/device/vbd/0/queue-0/ring-ref1 = "<ring-ref#1>" + * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>" + * /local/domain/1/device/vbd/0/queue-1 = "" + * /local/domain/1/device/vbd/0/queue-1/ring-ref0 = "<ring-ref#2>" + * /local/domain/1/device/vbd/0/queue-1/ring-ref1 = "<ring-ref#3>" + * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>" + * + */ + +/* + * STATE DIAGRAMS + * + ***************************************************************************** + * Startup * + ***************************************************************************** + * + * Tool stack creates front and back nodes with state XenbusStateInitialising. + * + * Front Back + * ================================= ===================================== + * XenbusStateInitialising XenbusStateInitialising + * o Query virtual device o Query backend device identification + * properties. data. + * o Setup OS device instance. o Open and validate backend device. + * o Publish backend features and + * transport parameters. + * | + * | + * V + * XenbusStateInitWait + * + * o Query backend features and + * transport parameters. + * o Allocate and initialize the + * request ring. + * o Publish transport parameters + * that will be in effect during + * this connection. + * | + * | + * V + * XenbusStateInitialised + * + * o Query frontend transport parameters. + * o Connect to the request ring and + * event channel. + * o Publish backend device properties. + * | + * | + * V + * XenbusStateConnected + * + * o Query backend device properties. + * o Finalize OS virtual device + * instance. + * | + * | + * V + * XenbusStateConnected + * + * Note: Drivers that do not support any optional features, or the negotiation + * of transport parameters, can skip certain states in the state machine: + * + * o A frontend may transition to XenbusStateInitialised without + * waiting for the backend to enter XenbusStateInitWait. In this + * case, default transport parameters are in effect and any + * transport parameters published by the frontend must contain + * their default values. + * + * o A backend may transition to XenbusStateInitialised, bypassing + * XenbusStateInitWait, without waiting for the frontend to first + * enter the XenbusStateInitialised state. In this case, default + * transport parameters are in effect and any transport parameters + * published by the backend must contain their default values. + * + * Drivers that support optional features and/or transport parameter + * negotiation must tolerate these additional state transition paths. + * In general this means performing the work of any skipped state + * transition, if it has not already been performed, in addition to the + * work associated with entry into the current state. + */ + +/* + * REQUEST CODES. + */ +#define BLKIF_OP_READ 0 +#define BLKIF_OP_WRITE 1 +/* + * All writes issued prior to a request with the BLKIF_OP_WRITE_BARRIER + * operation code ("barrier request") must be completed prior to the + * execution of the barrier request. All writes issued after the barrier + * request must not execute until after the completion of the barrier request. + * + * Optional. See "feature-barrier" XenBus node documentation above. + */ +#define BLKIF_OP_WRITE_BARRIER 2 +/* + * Commit any uncommitted contents of the backing device's volatile cache + * to stable storage. + * + * Optional. See "feature-flush-cache" XenBus node documentation above. + */ +#define BLKIF_OP_FLUSH_DISKCACHE 3 +/* + * Used in SLES sources for device specific command packet + * contained within the request. Reserved for that purpose. + */ +#define BLKIF_OP_RESERVED_1 4 +/* + * Indicate to the backend device that a region of storage is no longer in + * use, and may be discarded at any time without impact to the client. If + * the BLKIF_DISCARD_SECURE flag is set on the request, all copies of the + * discarded region on the device must be rendered unrecoverable before the + * command returns. + * + * This operation is analogous to performing a trim (ATA) or unamp (SCSI), + * command on a native device. + * + * More information about trim/unmap operations can be found at: + * http://t13.org/Documents/UploadedDocuments/docs2008/ + * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc + * http://www.seagate.com/staticfiles/support/disc/manuals/ + * Interface%20manuals/100293068c.pdf + * + * Optional. See "feature-discard", "discard-alignment", + * "discard-granularity", and "discard-secure" in the XenBus node + * documentation above. + */ +#define BLKIF_OP_DISCARD 5 + +/* + * Recognized if "feature-max-indirect-segments" in present in the backend + * xenbus info. The "feature-max-indirect-segments" node contains the maximum + * number of segments allowed by the backend per request. If the node is + * present, the frontend might use blkif_request_indirect structs in order to + * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST (11). The + * maximum number of indirect segments is fixed by the backend, but the + * frontend can issue requests with any number of indirect segments as long as + * it's less than the number provided by the backend. The indirect_grefs field + * in blkif_request_indirect should be filled by the frontend with the + * grant references of the pages that are holding the indirect segments. + * These pages are filled with an array of blkif_request_segment that hold the + * information about the segments. The number of indirect pages to use is + * determined by the number of segments an indirect request contains. Every + * indirect page can contain a maximum of + * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to + * calculate the number of indirect pages to use we have to do + * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))). + * + * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not* + * create the "feature-max-indirect-segments" node! + */ +#define BLKIF_OP_INDIRECT 6 + +/* + * Maximum scatter/gather segments per request. + * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE. + * NB. This could be 12 if the ring indexes weren't stored in the same page. + */ +#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 + +/* + * Maximum number of indirect pages to use per request. + */ +#define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8 + +/* + * NB. 'first_sect' and 'last_sect' in blkif_request_segment, as well as + * 'sector_number' in blkif_request, blkif_request_discard and + * blkif_request_indirect are sector-based quantities. See the description + * of the "feature-large-sector-size" frontend xenbus node above for + * more information. + */ +struct blkif_request_segment { + grant_ref_t gref; /* reference to I/O buffer frame */ + /* @first_sect: first sector in frame to transfer (inclusive). */ + /* @last_sect: last sector in frame to transfer (inclusive). */ + uint8_t first_sect, last_sect; +}; + +/* + * Starting ring element for any I/O request. + */ +struct blkif_request { + uint8_t operation; /* BLKIF_OP_??? */ + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ + uint64_t id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; +typedef struct blkif_request blkif_request_t; + +/* + * Cast to this structure when blkif_request.operation == BLKIF_OP_DISCARD + * sizeof(struct blkif_request_discard) <= sizeof(struct blkif_request) + */ +struct blkif_request_discard { + uint8_t operation; /* BLKIF_OP_DISCARD */ + uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */ +#define BLKIF_DISCARD_SECURE (1<<0) /* ignored if discard-secure=0 */ + blkif_vdev_t handle; /* same as for read/write requests */ + uint64_t id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk */ + uint64_t nr_sectors; /* number of contiguous sectors to discard*/ +}; +typedef struct blkif_request_discard blkif_request_discard_t; + +struct blkif_request_indirect { + uint8_t operation; /* BLKIF_OP_INDIRECT */ + uint8_t indirect_op; /* BLKIF_OP_{READ/WRITE} */ + uint16_t nr_segments; /* number of segments */ + uint64_t id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + blkif_vdev_t handle; /* same as for read/write requests */ + grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST]; +#ifdef __i386__ + uint64_t pad; /* Make it 64 byte aligned on i386 */ +#endif +}; +typedef struct blkif_request_indirect blkif_request_indirect_t; + +struct blkif_response { + uint64_t id; /* copied from request */ + uint8_t operation; /* copied from request */ + int16_t status; /* BLKIF_RSP_??? */ +}; +typedef struct blkif_response blkif_response_t; + +/* + * STATUS RETURN CODES. + */ + /* Operation not supported (only happens on barrier writes). */ +#define BLKIF_RSP_EOPNOTSUPP -2 + /* Operation failed for some unspecified reason (-EIO). */ +#define BLKIF_RSP_ERROR -1 + /* Operation completed successfully. */ +#define BLKIF_RSP_OKAY 0 + +/* + * Generate blkif ring structures and types. + */ +DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response); + +#define VDISK_CDROM 0x1 +#define VDISK_REMOVABLE 0x2 +#define VDISK_READONLY 0x4 + +#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */ diff --git a/include/hw/xen/interface/io/console.h b/include/hw/xen/interface/io/console.h new file mode 100644 index 0000000000..e2155d1cf5 --- /dev/null +++ b/include/hw/xen/interface/io/console.h @@ -0,0 +1,46 @@ +/****************************************************************************** + * console.h + * + * Console I/O interface for Xen guest OSes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_IO_CONSOLE_H__ +#define __XEN_PUBLIC_IO_CONSOLE_H__ + +typedef uint32_t XENCONS_RING_IDX; + +#define MASK_XENCONS_IDX(idx, ring) ((idx) & (sizeof(ring)-1)) + +struct xencons_interface { + char in[1024]; + char out[2048]; + XENCONS_RING_IDX in_cons, in_prod; + XENCONS_RING_IDX out_cons, out_prod; +}; + +#ifdef XEN_WANT_FLEX_CONSOLE_RING +#include "ring.h" +DEFINE_XEN_FLEX_RING(xencons); +#endif + +#endif /* __XEN_PUBLIC_IO_CONSOLE_H__ */ diff --git a/include/hw/xen/interface/io/fbif.h b/include/hw/xen/interface/io/fbif.h new file mode 100644 index 0000000000..ea87ebec0a --- /dev/null +++ b/include/hw/xen/interface/io/fbif.h @@ -0,0 +1,156 @@ +/* + * fbif.h -- Xen virtual frame buffer device + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com> + * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com> + */ + +#ifndef __XEN_PUBLIC_IO_FBIF_H__ +#define __XEN_PUBLIC_IO_FBIF_H__ + +/* Out events (frontend -> backend) */ + +/* + * Out events may be sent only when requested by backend, and receipt + * of an unknown out event is an error. + */ + +/* Event type 1 currently not used */ +/* + * Framebuffer update notification event + * Capable frontend sets feature-update in xenstore. + * Backend requests it by setting request-update in xenstore. + */ +#define XENFB_TYPE_UPDATE 2 + +struct xenfb_update +{ + uint8_t type; /* XENFB_TYPE_UPDATE */ + int32_t x; /* source x */ + int32_t y; /* source y */ + int32_t width; /* rect width */ + int32_t height; /* rect height */ +}; + +/* + * Framebuffer resize notification event + * Capable backend sets feature-resize in xenstore. + */ +#define XENFB_TYPE_RESIZE 3 + +struct xenfb_resize +{ + uint8_t type; /* XENFB_TYPE_RESIZE */ + int32_t width; /* width in pixels */ + int32_t height; /* height in pixels */ + int32_t stride; /* stride in bytes */ + int32_t depth; /* depth in bits */ + int32_t offset; /* offset of the framebuffer in bytes */ +}; + +#define XENFB_OUT_EVENT_SIZE 40 + +union xenfb_out_event +{ + uint8_t type; + struct xenfb_update update; + struct xenfb_resize resize; + char pad[XENFB_OUT_EVENT_SIZE]; +}; + +/* In events (backend -> frontend) */ + +/* + * Frontends should ignore unknown in events. + */ + +/* + * Framebuffer refresh period advice + * Backend sends it to advise the frontend their preferred period of + * refresh. Frontends that keep the framebuffer constantly up-to-date + * just ignore it. Frontends that use the advice should immediately + * refresh the framebuffer (and send an update notification event if + * those have been requested), then use the update frequency to guide + * their periodical refreshs. + */ +#define XENFB_TYPE_REFRESH_PERIOD 1 +#define XENFB_NO_REFRESH 0 + +struct xenfb_refresh_period +{ + uint8_t type; /* XENFB_TYPE_UPDATE_PERIOD */ + uint32_t period; /* period of refresh, in ms, + * XENFB_NO_REFRESH if no refresh is needed */ +}; + +#define XENFB_IN_EVENT_SIZE 40 + +union xenfb_in_event +{ + uint8_t type; + struct xenfb_refresh_period refresh_period; + char pad[XENFB_IN_EVENT_SIZE]; +}; + +/* shared page */ + +#define XENFB_IN_RING_SIZE 1024 +#define XENFB_IN_RING_LEN (XENFB_IN_RING_SIZE / XENFB_IN_EVENT_SIZE) +#define XENFB_IN_RING_OFFS 1024 +#define XENFB_IN_RING(page) \ + ((union xenfb_in_event *)((char *)(page) + XENFB_IN_RING_OFFS)) +#define XENFB_IN_RING_REF(page, idx) \ + (XENFB_IN_RING((page))[(idx) % XENFB_IN_RING_LEN]) + +#define XENFB_OUT_RING_SIZE 2048 +#define XENFB_OUT_RING_LEN (XENFB_OUT_RING_SIZE / XENFB_OUT_EVENT_SIZE) +#define XENFB_OUT_RING_OFFS (XENFB_IN_RING_OFFS + XENFB_IN_RING_SIZE) +#define XENFB_OUT_RING(page) \ + ((union xenfb_out_event *)((char *)(page) + XENFB_OUT_RING_OFFS)) +#define XENFB_OUT_RING_REF(page, idx) \ + (XENFB_OUT_RING((page))[(idx) % XENFB_OUT_RING_LEN]) + +struct xenfb_page +{ + uint32_t in_cons, in_prod; + uint32_t out_cons, out_prod; + + int32_t width; /* the width of the framebuffer (in pixels) */ + int32_t height; /* the height of the framebuffer (in pixels) */ + uint32_t line_length; /* the length of a row of pixels (in bytes) */ + uint32_t mem_length; /* the length of the framebuffer (in bytes) */ + uint8_t depth; /* the depth of a pixel (in bits) */ + + /* + * Framebuffer page directory + * + * Each directory page holds PAGE_SIZE / sizeof(*pd) + * framebuffer pages, and can thus map up to PAGE_SIZE * + * PAGE_SIZE / sizeof(*pd) bytes. With PAGE_SIZE == 4096 and + * sizeof(unsigned long) == 4/8, that's 4 Megs 32 bit and 2 Megs + * 64 bit. 256 directories give enough room for a 512 Meg + * framebuffer with a max resolution of 12,800x10,240. Should + * be enough for a while with room leftover for expansion. + */ + unsigned long pd[256]; +}; + +#endif diff --git a/include/hw/xen/interface/io/kbdif.h b/include/hw/xen/interface/io/kbdif.h new file mode 100644 index 0000000000..1d68cd458e --- /dev/null +++ b/include/hw/xen/interface/io/kbdif.h @@ -0,0 +1,566 @@ +/* + * kbdif.h -- Xen virtual keyboard/mouse + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com> + * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com> + */ + +#ifndef __XEN_PUBLIC_IO_KBDIF_H__ +#define __XEN_PUBLIC_IO_KBDIF_H__ + +/* + ***************************************************************************** + * Feature and Parameter Negotiation + ***************************************************************************** + * + * The two halves of a para-virtual driver utilize nodes within + * XenStore to communicate capabilities and to negotiate operating parameters. + * This section enumerates these nodes which reside in the respective front and + * backend portions of XenStore, following XenBus convention. + * + * All data in XenStore is stored as strings. Nodes specifying numeric + * values are encoded in decimal. Integer value ranges listed below are + * expressed as fixed sized integer types capable of storing the conversion + * of a properly formated node string, without loss of information. + * + ***************************************************************************** + * Backend XenBus Nodes + ***************************************************************************** + * + *---------------------------- Features supported ---------------------------- + * + * Capable backend advertises supported features by publishing + * corresponding entries in XenStore and puts 1 as the value of the entry. + * If a feature is not supported then 0 must be set or feature entry omitted. + * + * feature-disable-keyboard + * Values: <uint> + * + * If there is no need to expose a virtual keyboard device by the + * frontend then this must be set to 1. + * + * feature-disable-pointer + * Values: <uint> + * + * If there is no need to expose a virtual pointer device by the + * frontend then this must be set to 1. + * + * feature-abs-pointer + * Values: <uint> + * + * Backends, which support reporting of absolute coordinates for pointer + * device should set this to 1. + * + * feature-multi-touch + * Values: <uint> + * + * Backends, which support reporting of multi-touch events + * should set this to 1. + * + * feature-raw-pointer + * Values: <uint> + * + * Backends, which support reporting raw (unscaled) absolute coordinates + * for pointer devices should set this to 1. Raw (unscaled) values have + * a range of [0, 0x7fff]. + * + *----------------------- Device Instance Parameters ------------------------ + * + * unique-id + * Values: <string> + * + * After device instance initialization it is assigned a unique ID, + * so every instance of the frontend can be identified by the backend + * by this ID. This can be UUID or such. + * + *------------------------- Pointer Device Parameters ------------------------ + * + * width + * Values: <uint> + * + * Maximum X coordinate (width) to be used by the frontend + * while reporting input events, pixels, [0; UINT32_MAX]. + * + * height + * Values: <uint> + * + * Maximum Y coordinate (height) to be used by the frontend + * while reporting input events, pixels, [0; UINT32_MAX]. + * + *----------------------- Multi-touch Device Parameters ---------------------- + * + * multi-touch-num-contacts + * Values: <uint> + * + * Number of simultaneous touches reported. + * + * multi-touch-width + * Values: <uint> + * + * Width of the touch area to be used by the frontend + * while reporting input events, pixels, [0; UINT32_MAX]. + * + * multi-touch-height + * Values: <uint> + * + * Height of the touch area to be used by the frontend + * while reporting input events, pixels, [0; UINT32_MAX]. + * + ***************************************************************************** + * Frontend XenBus Nodes + ***************************************************************************** + * + *------------------------------ Feature request ----------------------------- + * + * Capable frontend requests features from backend via setting corresponding + * entries to 1 in XenStore. Requests for features not advertised as supported + * by the backend have no effect. + * + * request-abs-pointer + * Values: <uint> + * + * Request backend to report absolute pointer coordinates + * (XENKBD_TYPE_POS) instead of relative ones (XENKBD_TYPE_MOTION). + * + * request-multi-touch + * Values: <uint> + * + * Request backend to report multi-touch events. + * + * request-raw-pointer + * Values: <uint> + * + * Request backend to report raw unscaled absolute pointer coordinates. + * This option is only valid if request-abs-pointer is also set. + * Raw unscaled coordinates have the range [0, 0x7fff] + * + *----------------------- Request Transport Parameters ----------------------- + * + * event-channel + * Values: <uint> + * + * The identifier of the Xen event channel used to signal activity + * in the ring buffer. + * + * page-gref + * Values: <uint> + * + * The Xen grant reference granting permission for the backend to map + * a sole page in a single page sized event ring buffer. + * + * page-ref + * Values: <uint> + * + * OBSOLETE, not recommended for use. + * PFN of the shared page. + */ + +/* + * EVENT CODES. + */ + +#define XENKBD_TYPE_MOTION 1 +#define XENKBD_TYPE_RESERVED 2 +#define XENKBD_TYPE_KEY 3 +#define XENKBD_TYPE_POS 4 +#define XENKBD_TYPE_MTOUCH 5 + +/* Multi-touch event sub-codes */ + +#define XENKBD_MT_EV_DOWN 0 +#define XENKBD_MT_EV_UP 1 +#define XENKBD_MT_EV_MOTION 2 +#define XENKBD_MT_EV_SYN 3 +#define XENKBD_MT_EV_SHAPE 4 +#define XENKBD_MT_EV_ORIENT 5 + +/* + * CONSTANTS, XENSTORE FIELD AND PATH NAME STRINGS, HELPERS. + */ + +#define XENKBD_DRIVER_NAME "vkbd" + +#define XENKBD_FIELD_FEAT_DSBL_KEYBRD "feature-disable-keyboard" +#define XENKBD_FIELD_FEAT_DSBL_POINTER "feature-disable-pointer" +#define XENKBD_FIELD_FEAT_ABS_POINTER "feature-abs-pointer" +#define XENKBD_FIELD_FEAT_RAW_POINTER "feature-raw-pointer" +#define XENKBD_FIELD_FEAT_MTOUCH "feature-multi-touch" +#define XENKBD_FIELD_REQ_ABS_POINTER "request-abs-pointer" +#define XENKBD_FIELD_REQ_RAW_POINTER "request-raw-pointer" +#define XENKBD_FIELD_REQ_MTOUCH "request-multi-touch" +#define XENKBD_FIELD_RING_GREF "page-gref" +#define XENKBD_FIELD_EVT_CHANNEL "event-channel" +#define XENKBD_FIELD_WIDTH "width" +#define XENKBD_FIELD_HEIGHT "height" +#define XENKBD_FIELD_MT_WIDTH "multi-touch-width" +#define XENKBD_FIELD_MT_HEIGHT "multi-touch-height" +#define XENKBD_FIELD_MT_NUM_CONTACTS "multi-touch-num-contacts" +#define XENKBD_FIELD_UNIQUE_ID "unique-id" + +/* OBSOLETE, not recommended for use */ +#define XENKBD_FIELD_RING_REF "page-ref" + +/* + ***************************************************************************** + * Description of the protocol between frontend and backend driver. + ***************************************************************************** + * + * The two halves of a Para-virtual driver communicate with + * each other using a shared page and an event channel. + * Shared page contains a ring with event structures. + * + * All reserved fields in the structures below must be 0. + * + ***************************************************************************** + * Backend to frontend events + ***************************************************************************** + * + * Frontends should ignore unknown in events. + * All event packets have the same length (40 octets) + * All event packets have common header: + * + * 0 octet + * +-----------------+ + * | type | + * +-----------------+ + * type - uint8_t, event code, XENKBD_TYPE_??? + * + * + * Pointer relative movement event + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | _TYPE_MOTION | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | rel_x | 8 + * +----------------+----------------+----------------+----------------+ + * | rel_y | 12 + * +----------------+----------------+----------------+----------------+ + * | rel_z | 16 + * +----------------+----------------+----------------+----------------+ + * | reserved | 20 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 40 + * +----------------+----------------+----------------+----------------+ + * + * rel_x - int32_t, relative X motion + * rel_y - int32_t, relative Y motion + * rel_z - int32_t, relative Z motion (wheel) + */ + +struct xenkbd_motion +{ + uint8_t type; + int32_t rel_x; + int32_t rel_y; + int32_t rel_z; +}; + +/* + * Key event (includes pointer buttons) + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | _TYPE_KEY | pressed | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | keycode | 8 + * +----------------+----------------+----------------+----------------+ + * | reserved | 12 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 40 + * +----------------+----------------+----------------+----------------+ + * + * pressed - uint8_t, 1 if pressed; 0 otherwise + * keycode - uint32_t, KEY_* from linux/input.h + */ + +struct xenkbd_key +{ + uint8_t type; + uint8_t pressed; + uint32_t keycode; +}; + +/* + * Pointer absolute position event + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | _TYPE_POS | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | abs_x | 8 + * +----------------+----------------+----------------+----------------+ + * | abs_y | 12 + * +----------------+----------------+----------------+----------------+ + * | rel_z | 16 + * +----------------+----------------+----------------+----------------+ + * | reserved | 20 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 40 + * +----------------+----------------+----------------+----------------+ + * + * abs_x - int32_t, absolute X position (in FB pixels) + * abs_y - int32_t, absolute Y position (in FB pixels) + * rel_z - int32_t, relative Z motion (wheel) + */ + +struct xenkbd_position +{ + uint8_t type; + int32_t abs_x; + int32_t abs_y; + int32_t rel_z; +}; + +/* + * Multi-touch event and its sub-types + * + * All multi-touch event packets have common header: + * + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | _TYPE_MTOUCH | event_type | contact_id | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * + * event_type - unt8_t, multi-touch event sub-type, XENKBD_MT_EV_??? + * contact_id - unt8_t, ID of the contact + * + * Touch interactions can consist of one or more contacts. + * For each contact, a series of events is generated, starting + * with a down event, followed by zero or more motion events, + * and ending with an up event. Events relating to the same + * contact point can be identified by the ID of the sequence: contact ID. + * Contact ID may be reused after XENKBD_MT_EV_UP event and + * is in the [0; XENKBD_FIELD_NUM_CONTACTS - 1] range. + * + * For further information please refer to documentation on Wayland [1], + * Linux [2] and Windows [3] multi-touch support. + * + * [1] https://cgit.freedesktop.org/wayland/wayland/tree/protocol/wayland.xml + * [2] https://www.kernel.org/doc/Documentation/input/multi-touch-protocol.txt + * [3] https://msdn.microsoft.com/en-us/library/jj151564(v=vs.85).aspx + * + * + * Multi-touch down event - sent when a new touch is made: touch is assigned + * a unique contact ID, sent with this and consequent events related + * to this touch. + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | _TYPE_MTOUCH | _MT_EV_DOWN | contact_id | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * | abs_x | 12 + * +----------------+----------------+----------------+----------------+ + * | abs_y | 16 + * +----------------+----------------+----------------+----------------+ + * | reserved | 20 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 40 + * +----------------+----------------+----------------+----------------+ + * + * abs_x - int32_t, absolute X position, in pixels + * abs_y - int32_t, absolute Y position, in pixels + * + * Multi-touch contact release event + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | _TYPE_MTOUCH | _MT_EV_UP | contact_id | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 40 + * +----------------+----------------+----------------+----------------+ + * + * Multi-touch motion event + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | _TYPE_MTOUCH | _MT_EV_MOTION | contact_id | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * | abs_x | 12 + * +----------------+----------------+----------------+----------------+ + * | abs_y | 16 + * +----------------+----------------+----------------+----------------+ + * | reserved | 20 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 40 + * +----------------+----------------+----------------+----------------+ + * + * abs_x - int32_t, absolute X position, in pixels, + * abs_y - int32_t, absolute Y position, in pixels, + * + * Multi-touch input synchronization event - shows end of a set of events + * which logically belong together. + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | _TYPE_MTOUCH | _MT_EV_SYN | contact_id | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 40 + * +----------------+----------------+----------------+----------------+ + * + * Multi-touch shape event - touch point's shape has changed its shape. + * Shape is approximated by an ellipse through the major and minor axis + * lengths: major is the longer diameter of the ellipse and minor is the + * shorter one. Center of the ellipse is reported via + * XENKBD_MT_EV_DOWN/XENKBD_MT_EV_MOTION events. + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | _TYPE_MTOUCH | _MT_EV_SHAPE | contact_id | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * | major | 12 + * +----------------+----------------+----------------+----------------+ + * | minor | 16 + * +----------------+----------------+----------------+----------------+ + * | reserved | 20 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 40 + * +----------------+----------------+----------------+----------------+ + * + * major - unt32_t, length of the major axis, pixels + * minor - unt32_t, length of the minor axis, pixels + * + * Multi-touch orientation event - touch point's shape has changed + * its orientation: calculated as a clockwise angle between the major axis + * of the ellipse and positive Y axis in degrees, [-180; +180]. + * 0 1 2 3 octet + * +----------------+----------------+----------------+----------------+ + * | _TYPE_MTOUCH | _MT_EV_ORIENT | contact_id | reserved | 4 + * +----------------+----------------+----------------+----------------+ + * | reserved | 8 + * +----------------+----------------+----------------+----------------+ + * | orientation | reserved | 12 + * +----------------+----------------+----------------+----------------+ + * | reserved | 16 + * +----------------+----------------+----------------+----------------+ + * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/| + * +----------------+----------------+----------------+----------------+ + * | reserved | 40 + * +----------------+----------------+----------------+----------------+ + * + * orientation - int16_t, clockwise angle of the major axis + */ + +struct xenkbd_mtouch { + uint8_t type; /* XENKBD_TYPE_MTOUCH */ + uint8_t event_type; /* XENKBD_MT_EV_??? */ + uint8_t contact_id; + uint8_t reserved[5]; /* reserved for the future use */ + union { + struct { + int32_t abs_x; /* absolute X position, pixels */ + int32_t abs_y; /* absolute Y position, pixels */ + } pos; + struct { + uint32_t major; /* length of the major axis, pixels */ + uint32_t minor; /* length of the minor axis, pixels */ + } shape; + int16_t orientation; /* clockwise angle of the major axis */ + } u; +}; + +#define XENKBD_IN_EVENT_SIZE 40 + +union xenkbd_in_event +{ + uint8_t type; + struct xenkbd_motion motion; + struct xenkbd_key key; + struct xenkbd_position pos; + struct xenkbd_mtouch mtouch; + char pad[XENKBD_IN_EVENT_SIZE]; +}; + +/* + ***************************************************************************** + * Frontend to backend events + ***************************************************************************** + * + * Out events may be sent only when requested by backend, and receipt + * of an unknown out event is an error. + * No out events currently defined. + + * All event packets have the same length (40 octets) + * All event packets have common header: + * 0 octet + * +-----------------+ + * | type | + * +-----------------+ + * type - uint8_t, event code + */ + +#define XENKBD_OUT_EVENT_SIZE 40 + +union xenkbd_out_event +{ + uint8_t type; + char pad[XENKBD_OUT_EVENT_SIZE]; +}; + +/* + ***************************************************************************** + * Shared page + ***************************************************************************** + */ + +#define XENKBD_IN_RING_SIZE 2048 +#define XENKBD_IN_RING_LEN (XENKBD_IN_RING_SIZE / XENKBD_IN_EVENT_SIZE) +#define XENKBD_IN_RING_OFFS 1024 +#define XENKBD_IN_RING(page) \ + ((union xenkbd_in_event *)((char *)(page) + XENKBD_IN_RING_OFFS)) +#define XENKBD_IN_RING_REF(page, idx) \ + (XENKBD_IN_RING((page))[(idx) % XENKBD_IN_RING_LEN]) + +#define XENKBD_OUT_RING_SIZE 1024 +#define XENKBD_OUT_RING_LEN (XENKBD_OUT_RING_SIZE / XENKBD_OUT_EVENT_SIZE) +#define XENKBD_OUT_RING_OFFS (XENKBD_IN_RING_OFFS + XENKBD_IN_RING_SIZE) +#define XENKBD_OUT_RING(page) \ + ((union xenkbd_out_event *)((char *)(page) + XENKBD_OUT_RING_OFFS)) +#define XENKBD_OUT_RING_REF(page, idx) \ + (XENKBD_OUT_RING((page))[(idx) % XENKBD_OUT_RING_LEN]) + +struct xenkbd_page +{ + uint32_t in_cons, in_prod; + uint32_t out_cons, out_prod; +}; + +#endif /* __XEN_PUBLIC_IO_KBDIF_H__ */ diff --git a/include/hw/xen/interface/io/netif.h b/include/hw/xen/interface/io/netif.h new file mode 100644 index 0000000000..48fa530950 --- /dev/null +++ b/include/hw/xen/interface/io/netif.h @@ -0,0 +1,1010 @@ +/****************************************************************************** + * netif.h + * + * Unified network-device I/O interface for Xen guest OSes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2003-2004, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_IO_NETIF_H__ +#define __XEN_PUBLIC_IO_NETIF_H__ + +#include "ring.h" +#include "../grant_table.h" + +/* + * Older implementation of Xen network frontend / backend has an + * implicit dependency on the MAX_SKB_FRAGS as the maximum number of + * ring slots a skb can use. Netfront / netback may not work as + * expected when frontend and backend have different MAX_SKB_FRAGS. + * + * A better approach is to add mechanism for netfront / netback to + * negotiate this value. However we cannot fix all possible + * frontends, so we need to define a value which states the minimum + * slots backend must support. + * + * The minimum value derives from older Linux kernel's MAX_SKB_FRAGS + * (18), which is proved to work with most frontends. Any new backend + * which doesn't negotiate with frontend should expect frontend to + * send a valid packet using slots up to this value. + */ +#define XEN_NETIF_NR_SLOTS_MIN 18 + +/* + * Notifications after enqueuing any type of message should be conditional on + * the appropriate req_event or rsp_event field in the shared ring. + * If the client sends notification for rx requests then it should specify + * feature 'feature-rx-notify' via xenbus. Otherwise the backend will assume + * that it cannot safely queue packets (as it may not be kicked to send them). + */ + +/* + * "feature-split-event-channels" is introduced to separate guest TX + * and RX notification. Backend either doesn't support this feature or + * advertises it via xenstore as 0 (disabled) or 1 (enabled). + * + * To make use of this feature, frontend should allocate two event + * channels for TX and RX, advertise them to backend as + * "event-channel-tx" and "event-channel-rx" respectively. If frontend + * doesn't want to use this feature, it just writes "event-channel" + * node as before. + */ + +/* + * Multiple transmit and receive queues: + * If supported, the backend will write the key "multi-queue-max-queues" to + * the directory for that vif, and set its value to the maximum supported + * number of queues. + * Frontends that are aware of this feature and wish to use it can write the + * key "multi-queue-num-queues", set to the number they wish to use, which + * must be greater than zero, and no more than the value reported by the backend + * in "multi-queue-max-queues". + * + * Queues replicate the shared rings and event channels. + * "feature-split-event-channels" may optionally be used when using + * multiple queues, but is not mandatory. + * + * Each queue consists of one shared ring pair, i.e. there must be the same + * number of tx and rx rings. + * + * For frontends requesting just one queue, the usual event-channel and + * ring-ref keys are written as before, simplifying the backend processing + * to avoid distinguishing between a frontend that doesn't understand the + * multi-queue feature, and one that does, but requested only one queue. + * + * Frontends requesting two or more queues must not write the toplevel + * event-channel (or event-channel-{tx,rx}) and {tx,rx}-ring-ref keys, + * instead writing those keys under sub-keys having the name "queue-N" where + * N is the integer ID of the queue for which those keys belong. Queues + * are indexed from zero. For example, a frontend with two queues and split + * event channels must write the following set of queue-related keys: + * + * /local/domain/1/device/vif/0/multi-queue-num-queues = "2" + * /local/domain/1/device/vif/0/queue-0 = "" + * /local/domain/1/device/vif/0/queue-0/tx-ring-ref = "<ring-ref-tx0>" + * /local/domain/1/device/vif/0/queue-0/rx-ring-ref = "<ring-ref-rx0>" + * /local/domain/1/device/vif/0/queue-0/event-channel-tx = "<evtchn-tx0>" + * /local/domain/1/device/vif/0/queue-0/event-channel-rx = "<evtchn-rx0>" + * /local/domain/1/device/vif/0/queue-1 = "" + * /local/domain/1/device/vif/0/queue-1/tx-ring-ref = "<ring-ref-tx1>" + * /local/domain/1/device/vif/0/queue-1/rx-ring-ref = "<ring-ref-rx1" + * /local/domain/1/device/vif/0/queue-1/event-channel-tx = "<evtchn-tx1>" + * /local/domain/1/device/vif/0/queue-1/event-channel-rx = "<evtchn-rx1>" + * + * If there is any inconsistency in the XenStore data, the backend may + * choose not to connect any queues, instead treating the request as an + * error. This includes scenarios where more (or fewer) queues were + * requested than the frontend provided details for. + * + * Mapping of packets to queues is considered to be a function of the + * transmitting system (backend or frontend) and is not negotiated + * between the two. Guests are free to transmit packets on any queue + * they choose, provided it has been set up correctly. Guests must be + * prepared to receive packets on any queue they have requested be set up. + */ + +/* + * "feature-no-csum-offload" should be used to turn IPv4 TCP/UDP checksum + * offload off or on. If it is missing then the feature is assumed to be on. + * "feature-ipv6-csum-offload" should be used to turn IPv6 TCP/UDP checksum + * offload on or off. If it is missing then the feature is assumed to be off. + */ + +/* + * "feature-gso-tcpv4" and "feature-gso-tcpv6" advertise the capability to + * handle large TCP packets (in IPv4 or IPv6 form respectively). Neither + * frontends nor backends are assumed to be capable unless the flags are + * present. + */ + +/* + * "feature-multicast-control" and "feature-dynamic-multicast-control" + * advertise the capability to filter ethernet multicast packets in the + * backend. If the frontend wishes to take advantage of this feature then + * it may set "request-multicast-control". If the backend only advertises + * "feature-multicast-control" then "request-multicast-control" must be set + * before the frontend moves into the connected state. The backend will + * sample the value on this state transition and any subsequent change in + * value will have no effect. However, if the backend also advertises + * "feature-dynamic-multicast-control" then "request-multicast-control" + * may be set by the frontend at any time. In this case, the backend will + * watch the value and re-sample on watch events. + * + * If the sampled value of "request-multicast-control" is set then the + * backend transmit side should no longer flood multicast packets to the + * frontend, it should instead drop any multicast packet that does not + * match in a filter list. + * The list is amended by the frontend by sending dummy transmit requests + * containing XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL} extra-info fragments as + * specified below. + * Note that the filter list may be amended even if the sampled value of + * "request-multicast-control" is not set, however the filter should only + * be applied if it is set. + */ + +/* + * Control ring + * ============ + * + * Some features, such as hashing (detailed below), require a + * significant amount of out-of-band data to be passed from frontend to + * backend. Use of xenstore is not suitable for large quantities of data + * because of quota limitations and so a dedicated 'control ring' is used. + * The ability of the backend to use a control ring is advertised by + * setting: + * + * /local/domain/X/backend/<domid>/<vif>/feature-ctrl-ring = "1" + * + * The frontend provides a control ring to the backend by setting: + * + * /local/domain/<domid>/device/vif/<vif>/ctrl-ring-ref = <gref> + * /local/domain/<domid>/device/vif/<vif>/event-channel-ctrl = <port> + * + * where <gref> is the grant reference of the shared page used to + * implement the control ring and <port> is an event channel to be used + * as a mailbox interrupt. These keys must be set before the frontend + * moves into the connected state. + * + * The control ring uses a fixed request/response message size and is + * balanced (i.e. one request to one response), so operationally it is much + * the same as a transmit or receive ring. + * Note that there is no requirement that responses are issued in the same + * order as requests. + */ + +/* + * Hash types + * ========== + * + * For the purposes of the definitions below, 'Packet[]' is an array of + * octets containing an IP packet without options, 'Array[X..Y]' means a + * sub-array of 'Array' containing bytes X thru Y inclusive, and '+' is + * used to indicate concatenation of arrays. + */ + +/* + * A hash calculated over an IP version 4 header as follows: + * + * Buffer[0..8] = Packet[12..15] (source address) + + * Packet[16..19] (destination address) + * + * Result = Hash(Buffer, 8) + */ +#define _XEN_NETIF_CTRL_HASH_TYPE_IPV4 0 +#define XEN_NETIF_CTRL_HASH_TYPE_IPV4 \ + (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV4) + +/* + * A hash calculated over an IP version 4 header and TCP header as + * follows: + * + * Buffer[0..12] = Packet[12..15] (source address) + + * Packet[16..19] (destination address) + + * Packet[20..21] (source port) + + * Packet[22..23] (destination port) + * + * Result = Hash(Buffer, 12) + */ +#define _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP 1 +#define XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP \ + (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP) + +/* + * A hash calculated over an IP version 6 header as follows: + * + * Buffer[0..32] = Packet[8..23] (source address ) + + * Packet[24..39] (destination address) + * + * Result = Hash(Buffer, 32) + */ +#define _XEN_NETIF_CTRL_HASH_TYPE_IPV6 2 +#define XEN_NETIF_CTRL_HASH_TYPE_IPV6 \ + (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV6) + +/* + * A hash calculated over an IP version 6 header and TCP header as + * follows: + * + * Buffer[0..36] = Packet[8..23] (source address) + + * Packet[24..39] (destination address) + + * Packet[40..41] (source port) + + * Packet[42..43] (destination port) + * + * Result = Hash(Buffer, 36) + */ +#define _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP 3 +#define XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP \ + (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP) + +/* + * Hash algorithms + * =============== + */ + +#define XEN_NETIF_CTRL_HASH_ALGORITHM_NONE 0 + +/* + * Toeplitz hash: + */ + +#define XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ 1 + +/* + * Control requests (struct xen_netif_ctrl_request) + * ================================================ + * + * All requests have the following format: + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | id | type | data[0] | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | data[1] | data[2] | + * +-----+-----+-----+-----+-----------------------+ + * + * id: the request identifier, echoed in response. + * type: the type of request (see below) + * data[]: any data associated with the request (determined by type) + */ + +struct xen_netif_ctrl_request { + uint16_t id; + uint16_t type; + +#define XEN_NETIF_CTRL_TYPE_INVALID 0 +#define XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS 1 +#define XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS 2 +#define XEN_NETIF_CTRL_TYPE_SET_HASH_KEY 3 +#define XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE 4 +#define XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE 5 +#define XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING 6 +#define XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM 7 +#define XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE 8 +#define XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING 9 +#define XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING 10 + + uint32_t data[3]; +}; + +/* + * Control responses (struct xen_netif_ctrl_response) + * ================================================== + * + * All responses have the following format: + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | id | type | status | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | data | + * +-----+-----+-----+-----+ + * + * id: the corresponding request identifier + * type: the type of the corresponding request + * status: the status of request processing + * data: any data associated with the response (determined by type and + * status) + */ + +struct xen_netif_ctrl_response { + uint16_t id; + uint16_t type; + uint32_t status; + +#define XEN_NETIF_CTRL_STATUS_SUCCESS 0 +#define XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED 1 +#define XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER 2 +#define XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW 3 + + uint32_t data; +}; + +/* + * Static Grants (struct xen_netif_gref) + * ===================================== + * + * A frontend may provide a fixed set of grant references to be mapped on + * the backend. The message of type XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING + * prior its usage in the command ring allows for creation of these mappings. + * The backend will maintain a fixed amount of these mappings. + * + * XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE lets a frontend query how many + * of these mappings can be kept. + * + * Each entry in the XEN_NETIF_CTRL_TYPE_{ADD,DEL}_GREF_MAPPING input table has + * the following format: + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | grant ref | flags | status | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * + * grant ref: grant reference (IN) + * flags: flags describing the control operation (IN) + * status: XEN_NETIF_CTRL_STATUS_* (OUT) + * + * 'status' is an output parameter which does not require to be set to zero + * prior to its usage in the corresponding control messages. + */ + +struct xen_netif_gref { + grant_ref_t ref; + uint16_t flags; + +#define _XEN_NETIF_CTRLF_GREF_readonly 0 +#define XEN_NETIF_CTRLF_GREF_readonly (1U<<_XEN_NETIF_CTRLF_GREF_readonly) + + uint16_t status; +}; + +/* + * Control messages + * ================ + * + * XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM + * -------------------------------------- + * + * This is sent by the frontend to set the desired hash algorithm. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM + * data[0] = a XEN_NETIF_CTRL_HASH_ALGORITHM_* value + * data[1] = 0 + * data[2] = 0 + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not + * supported + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - The algorithm is not + * supported + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * + * NOTE: Setting data[0] to XEN_NETIF_CTRL_HASH_ALGORITHM_NONE disables + * hashing and the backend is free to choose how it steers packets + * to queues (which is the default behaviour). + * + * XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS + * ---------------------------------- + * + * This is sent by the frontend to query the types of hash supported by + * the backend. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS + * data[0] = 0 + * data[1] = 0 + * data[2] = 0 + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not supported + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * data = supported hash types (if operation was successful) + * + * NOTE: A valid hash algorithm must be selected before this operation can + * succeed. + * + * XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS + * ---------------------------------- + * + * This is sent by the frontend to set the types of hash that the backend + * should calculate. (See above for hash type definitions). + * Note that the 'maximal' type of hash should always be chosen. For + * example, if the frontend sets both IPV4 and IPV4_TCP hash types then + * the latter hash type should be calculated for any TCP packet and the + * former only calculated for non-TCP packets. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS + * data[0] = bitwise OR of XEN_NETIF_CTRL_HASH_TYPE_* values + * data[1] = 0 + * data[2] = 0 + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not + * supported + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - One or more flag + * value is invalid or + * unsupported + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * data = 0 + * + * NOTE: A valid hash algorithm must be selected before this operation can + * succeed. + * Also, setting data[0] to zero disables hashing and the backend + * is free to choose how it steers packets to queues. + * + * XEN_NETIF_CTRL_TYPE_SET_HASH_KEY + * -------------------------------- + * + * This is sent by the frontend to set the key of the hash if the algorithm + * requires it. (See hash algorithms above). + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_SET_HASH_KEY + * data[0] = grant reference of page containing the key (assumed to + * start at beginning of grant) + * data[1] = size of key in octets + * data[2] = 0 + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not + * supported + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Key size is invalid + * XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW - Key size is larger + * than the backend + * supports + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * data = 0 + * + * NOTE: Any key octets not specified are assumed to be zero (the key + * is assumed to be empty by default) and specifying a new key + * invalidates any previous key, hence specifying a key size of + * zero will clear the key (which ensures that the calculated hash + * will always be zero). + * The maximum size of key is algorithm and backend specific, but + * is also limited by the single grant reference. + * The grant reference may be read-only and must remain valid until + * the response has been processed. + * + * XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE + * ----------------------------------------- + * + * This is sent by the frontend to query the maximum size of mapping + * table supported by the backend. The size is specified in terms of + * table entries. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE + * data[0] = 0 + * data[1] = 0 + * data[2] = 0 + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not supported + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * data = maximum number of entries allowed in the mapping table + * (if operation was successful) or zero if a mapping table is + * not supported (i.e. hash mapping is done only by modular + * arithmetic). + * + * XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE + * ------------------------------------- + * + * This is sent by the frontend to set the actual size of the mapping + * table to be used by the backend. The size is specified in terms of + * table entries. + * Any previous table is invalidated by this message and any new table + * is assumed to be zero filled. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE + * data[0] = number of entries in mapping table + * data[1] = 0 + * data[2] = 0 + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not + * supported + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Table size is invalid + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * data = 0 + * + * NOTE: Setting data[0] to 0 means that hash mapping should be done + * using modular arithmetic. + * + * XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING + * ------------------------------------ + * + * This is sent by the frontend to set the content of the table mapping + * hash value to queue number. The backend should calculate the hash from + * the packet header, use it as an index into the table (modulo the size + * of the table) and then steer the packet to the queue number found at + * that index. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING + * data[0] = grant reference of page containing the mapping (sub-)table + * (assumed to start at beginning of grant) + * data[1] = size of (sub-)table in entries + * data[2] = offset, in entries, of sub-table within overall table + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not + * supported + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Table size or content + * is invalid + * XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW - Table size is larger + * than the backend + * supports + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * data = 0 + * + * NOTE: The overall table has the following format: + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | mapping[0] | mapping[1] | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | . | + * | . | + * | . | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | mapping[N-2] | mapping[N-1] | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * + * where N is specified by a XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE + * message and each mapping must specifies a queue between 0 and + * "multi-queue-num-queues" (see above). + * The backend may support a mapping table larger than can be + * mapped by a single grant reference. Thus sub-tables within a + * larger table can be individually set by sending multiple messages + * with differing offset values. Specifying a new sub-table does not + * invalidate any table data outside that range. + * The grant reference may be read-only and must remain valid until + * the response has been processed. + * + * XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE + * ----------------------------------------- + * + * This is sent by the frontend to fetch the number of grefs that can be kept + * mapped in the backend. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE + * data[0] = queue index (assumed 0 for single queue) + * data[1] = 0 + * data[2] = 0 + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not + * supported + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - The queue index is + * out of range + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * data = maximum number of entries allowed in the gref mapping table + * (if operation was successful) or zero if it is not supported. + * + * XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING + * ------------------------------------ + * + * This is sent by the frontend for backend to map a list of grant + * references. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING + * data[0] = queue index + * data[1] = grant reference of page containing the mapping list + * (r/w and assumed to start at beginning of page) + * data[2] = size of list in entries + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not + * supported + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Operation failed + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * + * NOTE: Each entry in the input table has the format outlined + * in struct xen_netif_gref. + * Contrary to XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING, the struct + * xen_netif_gref 'status' field is not used and therefore the response + * 'status' determines the success of this operation. In case of + * failure none of grants mappings get added in the backend. + * + * XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING + * ------------------------------------ + * + * This is sent by the frontend for backend to unmap a list of grant + * references. + * + * Request: + * + * type = XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING + * data[0] = queue index + * data[1] = grant reference of page containing the mapping list + * (r/w and assumed to start at beginning of page) + * data[2] = size of list in entries + * + * Response: + * + * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not + * supported + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Operation failed + * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful + * data = number of entries that were unmapped + * + * NOTE: Each entry in the input table has the format outlined in struct + * xen_netif_gref. + * The struct xen_netif_gref 'status' field determines if the entry + * was successfully removed. + * The entries used are only the ones representing grant references that + * were previously the subject of a XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING + * operation. Any other entries will have their status set to + * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER upon completion. + */ + +DEFINE_RING_TYPES(xen_netif_ctrl, + struct xen_netif_ctrl_request, + struct xen_netif_ctrl_response); + +/* + * Guest transmit + * ============== + * + * This is the 'wire' format for transmit (frontend -> backend) packets: + * + * Fragment 1: netif_tx_request_t - flags = NETTXF_* + * size = total packet size + * [Extra 1: netif_extra_info_t] - (only if fragment 1 flags include + * NETTXF_extra_info) + * ... + * [Extra N: netif_extra_info_t] - (only if extra N-1 flags include + * XEN_NETIF_EXTRA_MORE) + * ... + * Fragment N: netif_tx_request_t - (only if fragment N-1 flags include + * NETTXF_more_data - flags on preceding + * extras are not relevant here) + * flags = 0 + * size = fragment size + * + * NOTE: + * + * This format slightly is different from that used for receive + * (backend -> frontend) packets. Specifically, in a multi-fragment + * packet the actual size of fragment 1 can only be determined by + * subtracting the sizes of fragments 2..N from the total packet size. + * + * Ring slot size is 12 octets, however not all request/response + * structs use the full size. + * + * tx request data (netif_tx_request_t) + * ------------------------------------ + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | grant ref | offset | flags | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | id | size | + * +-----+-----+-----+-----+ + * + * grant ref: Reference to buffer page. + * offset: Offset within buffer page. + * flags: NETTXF_*. + * id: request identifier, echoed in response. + * size: packet size in bytes. + * + * tx response (netif_tx_response_t) + * --------------------------------- + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | id | status | unused | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | unused | + * +-----+-----+-----+-----+ + * + * id: reflects id in transmit request + * status: NETIF_RSP_* + * + * Guest receive + * ============= + * + * This is the 'wire' format for receive (backend -> frontend) packets: + * + * Fragment 1: netif_rx_request_t - flags = NETRXF_* + * size = fragment size + * [Extra 1: netif_extra_info_t] - (only if fragment 1 flags include + * NETRXF_extra_info) + * ... + * [Extra N: netif_extra_info_t] - (only if extra N-1 flags include + * XEN_NETIF_EXTRA_MORE) + * ... + * Fragment N: netif_rx_request_t - (only if fragment N-1 flags include + * NETRXF_more_data - flags on preceding + * extras are not relevant here) + * flags = 0 + * size = fragment size + * + * NOTE: + * + * This format slightly is different from that used for transmit + * (frontend -> backend) packets. Specifically, in a multi-fragment + * packet the size of the packet can only be determined by summing the + * sizes of fragments 1..N. + * + * Ring slot size is 8 octets. + * + * rx request (netif_rx_request_t) + * ------------------------------- + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | id | pad | gref | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * + * id: request identifier, echoed in response. + * gref: reference to incoming granted frame. + * + * rx response (netif_rx_response_t) + * --------------------------------- + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | id | offset | flags | status | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * + * id: reflects id in receive request + * offset: offset in page of start of received packet + * flags: NETRXF_* + * status: -ve: NETIF_RSP_*; +ve: Rx'ed pkt size. + * + * NOTE: Historically, to support GSO on the frontend receive side, Linux + * netfront does not make use of the rx response id (because, as + * described below, extra info structures overlay the id field). + * Instead it assumes that responses always appear in the same ring + * slot as their corresponding request. Thus, to maintain + * compatibility, backends must make sure this is the case. + * + * Extra Info + * ========== + * + * Can be present if initial request or response has NET{T,R}XF_extra_info, + * or previous extra request has XEN_NETIF_EXTRA_MORE. + * + * The struct therefore needs to fit into either a tx or rx slot and + * is therefore limited to 8 octets. + * + * NOTE: Because extra info data overlays the usual request/response + * structures, there is no id information in the opposite direction. + * So, if an extra info overlays an rx response the frontend can + * assume that it is in the same ring slot as the request that was + * consumed to make the slot available, and the backend must ensure + * this assumption is true. + * + * extra info (netif_extra_info_t) + * ------------------------------- + * + * General format: + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * |type |flags| type specific data | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | padding for tx | + * +-----+-----+-----+-----+ + * + * type: XEN_NETIF_EXTRA_TYPE_* + * flags: XEN_NETIF_EXTRA_FLAG_* + * padding for tx: present only in the tx case due to 8 octet limit + * from rx case. Not shown in type specific entries + * below. + * + * XEN_NETIF_EXTRA_TYPE_GSO: + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * |type |flags| size |type | pad | features | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * + * type: Must be XEN_NETIF_EXTRA_TYPE_GSO + * flags: XEN_NETIF_EXTRA_FLAG_* + * size: Maximum payload size of each segment. For example, + * for TCP this is just the path MSS. + * type: XEN_NETIF_GSO_TYPE_*: This determines the protocol of + * the packet and any extra features required to segment the + * packet properly. + * features: EN_NETIF_GSO_FEAT_*: This specifies any extra GSO + * features required to process this packet, such as ECN + * support for TCPv4. + * + * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}: + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * |type |flags| addr | + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * + * type: Must be XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL} + * flags: XEN_NETIF_EXTRA_FLAG_* + * addr: address to add/remove + * + * XEN_NETIF_EXTRA_TYPE_HASH: + * + * A backend that supports teoplitz hashing is assumed to accept + * this type of extra info in transmit packets. + * A frontend that enables hashing is assumed to accept + * this type of extra info in receive packets. + * + * 0 1 2 3 4 5 6 7 octet + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * |type |flags|htype| alg |LSB ---- value ---- MSB| + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * + * type: Must be XEN_NETIF_EXTRA_TYPE_HASH + * flags: XEN_NETIF_EXTRA_FLAG_* + * htype: Hash type (one of _XEN_NETIF_CTRL_HASH_TYPE_* - see above) + * alg: The algorithm used to calculate the hash (one of + * XEN_NETIF_CTRL_HASH_TYPE_ALGORITHM_* - see above) + * value: Hash value + */ + +/* Protocol checksum field is blank in the packet (hardware offload)? */ +#define _NETTXF_csum_blank (0) +#define NETTXF_csum_blank (1U<<_NETTXF_csum_blank) + +/* Packet data has been validated against protocol checksum. */ +#define _NETTXF_data_validated (1) +#define NETTXF_data_validated (1U<<_NETTXF_data_validated) + +/* Packet continues in the next request descriptor. */ +#define _NETTXF_more_data (2) +#define NETTXF_more_data (1U<<_NETTXF_more_data) + +/* Packet to be followed by extra descriptor(s). */ +#define _NETTXF_extra_info (3) +#define NETTXF_extra_info (1U<<_NETTXF_extra_info) + +#define XEN_NETIF_MAX_TX_SIZE 0xFFFF +struct netif_tx_request { + grant_ref_t gref; + uint16_t offset; + uint16_t flags; + uint16_t id; + uint16_t size; +}; +typedef struct netif_tx_request netif_tx_request_t; + +/* Types of netif_extra_info descriptors. */ +#define XEN_NETIF_EXTRA_TYPE_NONE (0) /* Never used - invalid */ +#define XEN_NETIF_EXTRA_TYPE_GSO (1) /* u.gso */ +#define XEN_NETIF_EXTRA_TYPE_MCAST_ADD (2) /* u.mcast */ +#define XEN_NETIF_EXTRA_TYPE_MCAST_DEL (3) /* u.mcast */ +#define XEN_NETIF_EXTRA_TYPE_HASH (4) /* u.hash */ +#define XEN_NETIF_EXTRA_TYPE_MAX (5) + +/* netif_extra_info_t flags. */ +#define _XEN_NETIF_EXTRA_FLAG_MORE (0) +#define XEN_NETIF_EXTRA_FLAG_MORE (1U<<_XEN_NETIF_EXTRA_FLAG_MORE) + +/* GSO types */ +#define XEN_NETIF_GSO_TYPE_NONE (0) +#define XEN_NETIF_GSO_TYPE_TCPV4 (1) +#define XEN_NETIF_GSO_TYPE_TCPV6 (2) + +/* + * This structure needs to fit within both netif_tx_request_t and + * netif_rx_response_t for compatibility. + */ +struct netif_extra_info { + uint8_t type; + uint8_t flags; + union { + struct { + uint16_t size; + uint8_t type; + uint8_t pad; + uint16_t features; + } gso; + struct { + uint8_t addr[6]; + } mcast; + struct { + uint8_t type; + uint8_t algorithm; + uint8_t value[4]; + } hash; + uint16_t pad[3]; + } u; +}; +typedef struct netif_extra_info netif_extra_info_t; + +struct netif_tx_response { + uint16_t id; + int16_t status; +}; +typedef struct netif_tx_response netif_tx_response_t; + +struct netif_rx_request { + uint16_t id; /* Echoed in response message. */ + uint16_t pad; + grant_ref_t gref; +}; +typedef struct netif_rx_request netif_rx_request_t; + +/* Packet data has been validated against protocol checksum. */ +#define _NETRXF_data_validated (0) +#define NETRXF_data_validated (1U<<_NETRXF_data_validated) + +/* Protocol checksum field is blank in the packet (hardware offload)? */ +#define _NETRXF_csum_blank (1) +#define NETRXF_csum_blank (1U<<_NETRXF_csum_blank) + +/* Packet continues in the next request descriptor. */ +#define _NETRXF_more_data (2) +#define NETRXF_more_data (1U<<_NETRXF_more_data) + +/* Packet to be followed by extra descriptor(s). */ +#define _NETRXF_extra_info (3) +#define NETRXF_extra_info (1U<<_NETRXF_extra_info) + +/* Packet has GSO prefix. Deprecated but included for compatibility */ +#define _NETRXF_gso_prefix (4) +#define NETRXF_gso_prefix (1U<<_NETRXF_gso_prefix) + +struct netif_rx_response { + uint16_t id; + uint16_t offset; + uint16_t flags; + int16_t status; +}; +typedef struct netif_rx_response netif_rx_response_t; + +/* + * Generate netif ring structures and types. + */ + +DEFINE_RING_TYPES(netif_tx, struct netif_tx_request, struct netif_tx_response); +DEFINE_RING_TYPES(netif_rx, struct netif_rx_request, struct netif_rx_response); + +#define NETIF_RSP_DROPPED -2 +#define NETIF_RSP_ERROR -1 +#define NETIF_RSP_OKAY 0 +/* No response: used for auxiliary requests (e.g., netif_extra_info_t). */ +#define NETIF_RSP_NULL 1 + +#endif diff --git a/include/hw/xen/interface/io/protocols.h b/include/hw/xen/interface/io/protocols.h new file mode 100644 index 0000000000..52b4de0f81 --- /dev/null +++ b/include/hw/xen/interface/io/protocols.h @@ -0,0 +1,42 @@ +/****************************************************************************** + * protocols.h + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2008, Keir Fraser + */ + +#ifndef __XEN_PROTOCOLS_H__ +#define __XEN_PROTOCOLS_H__ + +#define XEN_IO_PROTO_ABI_X86_32 "x86_32-abi" +#define XEN_IO_PROTO_ABI_X86_64 "x86_64-abi" +#define XEN_IO_PROTO_ABI_ARM "arm-abi" + +#if defined(__i386__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32 +#elif defined(__x86_64__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64 +#elif defined(__arm__) || defined(__aarch64__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_ARM +#else +# error arch fixup needed here +#endif + +#endif diff --git a/include/hw/xen/io/ring.h b/include/hw/xen/interface/io/ring.h index 62abfd7a6e..1adacf09f9 100644 --- a/include/hw/xen/io/ring.h +++ b/include/hw/xen/interface/io/ring.h @@ -24,8 +24,8 @@ * Tim Deegan and Andrew Warfield November 2004. */ -#ifndef XEN_PUBLIC_IO_RING_H -#define XEN_PUBLIC_IO_RING_H +#ifndef __XEN_PUBLIC_IO_RING_H__ +#define __XEN_PUBLIC_IO_RING_H__ /* * When #include'ing this header, you need to provide the following @@ -469,7 +469,7 @@ struct name##_data_intf { \ }; \ DEFINE_XEN_FLEX_RING(name) -#endif /* XEN_PUBLIC_IO_RING_H */ +#endif /* __XEN_PUBLIC_IO_RING_H__ */ /* * Local variables: diff --git a/include/hw/xen/interface/io/usbif.h b/include/hw/xen/interface/io/usbif.h new file mode 100644 index 0000000000..c6a58639d6 --- /dev/null +++ b/include/hw/xen/interface/io/usbif.h @@ -0,0 +1,254 @@ +/* + * usbif.h + * + * USB I/O interface for Xen guest OSes. + * + * Copyright (C) 2009, FUJITSU LABORATORIES LTD. + * Author: Noboru Iwamatsu <n_iwamatsu@jp.fujitsu.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_IO_USBIF_H__ +#define __XEN_PUBLIC_IO_USBIF_H__ + +#include "ring.h" +#include "../grant_table.h" + +/* + * Feature and Parameter Negotiation + * ================================= + * The two halves of a Xen pvUSB driver utilize nodes within the XenStore to + * communicate capabilities and to negotiate operating parameters. This + * section enumerates these nodes which reside in the respective front and + * backend portions of the XenStore, following the XenBus convention. + * + * Any specified default value is in effect if the corresponding XenBus node + * is not present in the XenStore. + * + * XenStore nodes in sections marked "PRIVATE" are solely for use by the + * driver side whose XenBus tree contains them. + * + ***************************************************************************** + * Backend XenBus Nodes + ***************************************************************************** + * + *------------------ Backend Device Identification (PRIVATE) ------------------ + * + * num-ports + * Values: unsigned [1...31] + * + * Number of ports for this (virtual) USB host connector. + * + * usb-ver + * Values: unsigned [1...2] + * + * USB version of this host connector: 1 = USB 1.1, 2 = USB 2.0. + * + * port/[1...31] + * Values: string + * + * Physical USB device connected to the given port, e.g. "3-1.5". + * + ***************************************************************************** + * Frontend XenBus Nodes + ***************************************************************************** + * + *----------------------- Request Transport Parameters ----------------------- + * + * event-channel + * Values: unsigned + * + * The identifier of the Xen event channel used to signal activity + * in the ring buffer. + * + * urb-ring-ref + * Values: unsigned + * + * The Xen grant reference granting permission for the backend to map + * the sole page in a single page sized ring buffer. This is the ring + * buffer for urb requests. + * + * conn-ring-ref + * Values: unsigned + * + * The Xen grant reference granting permission for the backend to map + * the sole page in a single page sized ring buffer. This is the ring + * buffer for connection/disconnection requests. + * + * protocol + * Values: string (XEN_IO_PROTO_ABI_*) + * Default Value: XEN_IO_PROTO_ABI_NATIVE + * + * The machine ABI rules governing the format of all ring request and + * response structures. + * + */ + +enum usb_spec_version { + USB_VER_UNKNOWN = 0, + USB_VER_USB11, + USB_VER_USB20, + USB_VER_USB30, /* not supported yet */ +}; + +/* + * USB pipe in usbif_request + * + * - port number: bits 0-4 + * (USB_MAXCHILDREN is 31) + * + * - operation flag: bit 5 + * (0 = submit urb, + * 1 = unlink urb) + * + * - direction: bit 7 + * (0 = Host-to-Device [Out] + * 1 = Device-to-Host [In]) + * + * - device address: bits 8-14 + * + * - endpoint: bits 15-18 + * + * - pipe type: bits 30-31 + * (00 = isochronous, 01 = interrupt, + * 10 = control, 11 = bulk) + */ + +#define USBIF_PIPE_PORT_MASK 0x0000001f +#define USBIF_PIPE_UNLINK 0x00000020 +#define USBIF_PIPE_DIR 0x00000080 +#define USBIF_PIPE_DEV_MASK 0x0000007f +#define USBIF_PIPE_DEV_SHIFT 8 +#define USBIF_PIPE_EP_MASK 0x0000000f +#define USBIF_PIPE_EP_SHIFT 15 +#define USBIF_PIPE_TYPE_MASK 0x00000003 +#define USBIF_PIPE_TYPE_SHIFT 30 +#define USBIF_PIPE_TYPE_ISOC 0 +#define USBIF_PIPE_TYPE_INT 1 +#define USBIF_PIPE_TYPE_CTRL 2 +#define USBIF_PIPE_TYPE_BULK 3 + +#define usbif_pipeportnum(pipe) ((pipe) & USBIF_PIPE_PORT_MASK) +#define usbif_setportnum_pipe(pipe, portnum) ((pipe) | (portnum)) + +#define usbif_pipeunlink(pipe) ((pipe) & USBIF_PIPE_UNLINK) +#define usbif_pipesubmit(pipe) (!usbif_pipeunlink(pipe)) +#define usbif_setunlink_pipe(pipe) ((pipe) | USBIF_PIPE_UNLINK) + +#define usbif_pipein(pipe) ((pipe) & USBIF_PIPE_DIR) +#define usbif_pipeout(pipe) (!usbif_pipein(pipe)) + +#define usbif_pipedevice(pipe) \ + (((pipe) >> USBIF_PIPE_DEV_SHIFT) & USBIF_PIPE_DEV_MASK) + +#define usbif_pipeendpoint(pipe) \ + (((pipe) >> USBIF_PIPE_EP_SHIFT) & USBIF_PIPE_EP_MASK) + +#define usbif_pipetype(pipe) \ + (((pipe) >> USBIF_PIPE_TYPE_SHIFT) & USBIF_PIPE_TYPE_MASK) +#define usbif_pipeisoc(pipe) (usbif_pipetype(pipe) == USBIF_PIPE_TYPE_ISOC) +#define usbif_pipeint(pipe) (usbif_pipetype(pipe) == USBIF_PIPE_TYPE_INT) +#define usbif_pipectrl(pipe) (usbif_pipetype(pipe) == USBIF_PIPE_TYPE_CTRL) +#define usbif_pipebulk(pipe) (usbif_pipetype(pipe) == USBIF_PIPE_TYPE_BULK) + +#define USBIF_MAX_SEGMENTS_PER_REQUEST (16) +#define USBIF_MAX_PORTNR 31 +#define USBIF_RING_SIZE 4096 + +/* + * RING for transferring urbs. + */ +struct usbif_request_segment { + grant_ref_t gref; + uint16_t offset; + uint16_t length; +}; + +struct usbif_urb_request { + uint16_t id; /* request id */ + uint16_t nr_buffer_segs; /* number of urb->transfer_buffer segments */ + + /* basic urb parameter */ + uint32_t pipe; + uint16_t transfer_flags; +#define USBIF_SHORT_NOT_OK 0x0001 + uint16_t buffer_length; + union { + uint8_t ctrl[8]; /* setup_packet (Ctrl) */ + + struct { + uint16_t interval; /* maximum (1024*8) in usb core */ + uint16_t start_frame; /* start frame */ + uint16_t number_of_packets; /* number of ISO packet */ + uint16_t nr_frame_desc_segs; /* number of iso_frame_desc segments */ + } isoc; + + struct { + uint16_t interval; /* maximum (1024*8) in usb core */ + uint16_t pad[3]; + } intr; + + struct { + uint16_t unlink_id; /* unlink request id */ + uint16_t pad[3]; + } unlink; + + } u; + + /* urb data segments */ + struct usbif_request_segment seg[USBIF_MAX_SEGMENTS_PER_REQUEST]; +}; +typedef struct usbif_urb_request usbif_urb_request_t; + +struct usbif_urb_response { + uint16_t id; /* request id */ + uint16_t start_frame; /* start frame (ISO) */ + int32_t status; /* status (non-ISO) */ + int32_t actual_length; /* actual transfer length */ + int32_t error_count; /* number of ISO errors */ +}; +typedef struct usbif_urb_response usbif_urb_response_t; + +DEFINE_RING_TYPES(usbif_urb, struct usbif_urb_request, struct usbif_urb_response); +#define USB_URB_RING_SIZE __CONST_RING_SIZE(usbif_urb, USBIF_RING_SIZE) + +/* + * RING for notifying connect/disconnect events to frontend + */ +struct usbif_conn_request { + uint16_t id; +}; +typedef struct usbif_conn_request usbif_conn_request_t; + +struct usbif_conn_response { + uint16_t id; /* request id */ + uint8_t portnum; /* port number */ + uint8_t speed; /* usb_device_speed */ +#define USBIF_SPEED_NONE 0 +#define USBIF_SPEED_LOW 1 +#define USBIF_SPEED_FULL 2 +#define USBIF_SPEED_HIGH 3 +}; +typedef struct usbif_conn_response usbif_conn_response_t; + +DEFINE_RING_TYPES(usbif_conn, struct usbif_conn_request, struct usbif_conn_response); +#define USB_CONN_RING_SIZE __CONST_RING_SIZE(usbif_conn, USBIF_RING_SIZE) + +#endif /* __XEN_PUBLIC_IO_USBIF_H__ */ diff --git a/include/hw/xen/interface/io/xenbus.h b/include/hw/xen/interface/io/xenbus.h new file mode 100644 index 0000000000..2fbf2a7fdc --- /dev/null +++ b/include/hw/xen/interface/io/xenbus.h @@ -0,0 +1,70 @@ +/***************************************************************************** + * xenbus.h + * + * Xenbus protocol details. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 XenSource Ltd. + */ + +#ifndef _XEN_PUBLIC_IO_XENBUS_H +#define _XEN_PUBLIC_IO_XENBUS_H + +/* + * The state of either end of the Xenbus, i.e. the current communication + * status of initialisation across the bus. States here imply nothing about + * the state of the connection between the driver and the kernel's device + * layers. + */ +enum xenbus_state { + XenbusStateUnknown = 0, + + XenbusStateInitialising = 1, + + /* + * InitWait: Finished early initialisation but waiting for information + * from the peer or hotplug scripts. + */ + XenbusStateInitWait = 2, + + /* + * Initialised: Waiting for a connection from the peer. + */ + XenbusStateInitialised = 3, + + XenbusStateConnected = 4, + + /* + * Closing: The device is being closed due to an error or an unplug event. + */ + XenbusStateClosing = 5, + + XenbusStateClosed = 6, + + /* + * Reconfiguring: The device is being reconfigured. + */ + XenbusStateReconfiguring = 7, + + XenbusStateReconfigured = 8 +}; +typedef enum xenbus_state XenbusState; + +#endif /* _XEN_PUBLIC_IO_XENBUS_H */ diff --git a/include/hw/xen/xen-bus.h b/include/hw/xen/xen-bus.h index 3183f10e3c..1c2d9dfdb8 100644 --- a/include/hw/xen/xen-bus.h +++ b/include/hw/xen/xen-bus.h @@ -15,6 +15,7 @@ typedef void (*XenWatchHandler)(void *opaque); typedef struct XenWatch XenWatch; +typedef struct XenEventChannel XenEventChannel; typedef struct XenDevice { DeviceState qdev; @@ -28,8 +29,7 @@ typedef struct XenDevice { XenWatch *backend_online_watch; xengnttab_handle *xgth; bool feature_grant_copy; - xenevtchn_handle *xeh; - NotifierList event_notifiers; + QLIST_HEAD(, XenEventChannel) event_channels; } XenDevice; typedef char *(*XenDeviceGetName)(XenDevice *xendev, Error **errp); @@ -119,11 +119,10 @@ void xen_device_copy_grant_refs(XenDevice *xendev, bool to_domain, XenDeviceGrantCopySegment segs[], unsigned int nr_segs, Error **errp); -typedef struct XenEventChannel XenEventChannel; - -typedef void (*XenEventHandler)(void *opaque); +typedef bool (*XenEventHandler)(void *opaque); XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev, + AioContext *ctx, unsigned int port, XenEventHandler handler, void *opaque, Error **errp); diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_common.h index 9a8155e172..0504b43659 100644 --- a/include/hw/xen/xen_common.h +++ b/include/hw/xen/xen_common.h @@ -12,7 +12,7 @@ #include <xenctrl.h> #include <xenstore.h> -#include <xen/io/xenbus.h> +#include "hw/xen/interface/io/xenbus.h" #include "hw/hw.h" #include "hw/xen/xen.h" diff --git a/tests/docker/dockerfiles/debian-win32-cross.docker b/tests/docker/dockerfiles/debian-win32-cross.docker index dd021f2df0..0a4970c068 100644 --- a/tests/docker/dockerfiles/debian-win32-cross.docker +++ b/tests/docker/dockerfiles/debian-win32-cross.docker @@ -15,7 +15,6 @@ RUN DEBIAN_FRONTEND=noninteractive eatmydata \ mxe-$TARGET-w64-mingw32.shared-curl \ mxe-$TARGET-w64-mingw32.shared-glib \ mxe-$TARGET-w64-mingw32.shared-libgcrypt \ - mxe-$TARGET-w64-mingw32.shared-libssh2 \ mxe-$TARGET-w64-mingw32.shared-libusb1 \ mxe-$TARGET-w64-mingw32.shared-lzo \ mxe-$TARGET-w64-mingw32.shared-nettle \ diff --git a/tests/docker/dockerfiles/debian-win64-cross.docker b/tests/docker/dockerfiles/debian-win64-cross.docker index 4542bcc821..b27985b1b1 100644 --- a/tests/docker/dockerfiles/debian-win64-cross.docker +++ b/tests/docker/dockerfiles/debian-win64-cross.docker @@ -15,7 +15,6 @@ RUN DEBIAN_FRONTEND=noninteractive eatmydata \ mxe-$TARGET-w64-mingw32.shared-curl \ mxe-$TARGET-w64-mingw32.shared-glib \ mxe-$TARGET-w64-mingw32.shared-libgcrypt \ - mxe-$TARGET-w64-mingw32.shared-libssh2 \ mxe-$TARGET-w64-mingw32.shared-libusb1 \ mxe-$TARGET-w64-mingw32.shared-lzo \ mxe-$TARGET-w64-mingw32.shared-nettle \ diff --git a/tests/docker/dockerfiles/fedora.docker b/tests/docker/dockerfiles/fedora.docker index 12c460597e..619d1b5656 100644 --- a/tests/docker/dockerfiles/fedora.docker +++ b/tests/docker/dockerfiles/fedora.docker @@ -35,7 +35,7 @@ ENV PACKAGES \ libpng-devel \ librbd-devel \ libseccomp-devel \ - libssh2-devel \ + libssh-devel \ libubsan \ libusbx-devel \ libxml2-devel \ @@ -50,7 +50,6 @@ ENV PACKAGES \ mingw32-gtk3 \ mingw32-libjpeg-turbo \ mingw32-libpng \ - mingw32-libssh2 \ mingw32-libtasn1 \ mingw32-nettle \ mingw32-pixman \ @@ -64,7 +63,6 @@ ENV PACKAGES \ mingw64-gtk3 \ mingw64-libjpeg-turbo \ mingw64-libpng \ - mingw64-libssh2 \ mingw64-libtasn1 \ mingw64-nettle \ mingw64-pixman \ diff --git a/tests/docker/dockerfiles/ubuntu.docker b/tests/docker/dockerfiles/ubuntu.docker index 8d256961f0..d3b72209c8 100644 --- a/tests/docker/dockerfiles/ubuntu.docker +++ b/tests/docker/dockerfiles/ubuntu.docker @@ -53,7 +53,7 @@ ENV PACKAGES flex bison \ libsnappy-dev \ libspice-protocol-dev \ libspice-server-dev \ - libssh2-1-dev \ + libssh-dev \ libusb-1.0-0-dev \ libusbredirhost-dev \ libvdeplug-dev \ diff --git a/tests/docker/dockerfiles/ubuntu1804.docker b/tests/docker/dockerfiles/ubuntu1804.docker index 2e2900150b..9d80b11500 100644 --- a/tests/docker/dockerfiles/ubuntu1804.docker +++ b/tests/docker/dockerfiles/ubuntu1804.docker @@ -40,7 +40,7 @@ ENV PACKAGES flex bison \ libsnappy-dev \ libspice-protocol-dev \ libspice-server-dev \ - libssh2-1-dev \ + libssh-dev \ libusb-1.0-0-dev \ libusbredirhost-dev \ libvdeplug-dev \ diff --git a/tests/qemu-iotests/059.out b/tests/qemu-iotests/059.out index f51394ae8e..4fab42a28c 100644 --- a/tests/qemu-iotests/059.out +++ b/tests/qemu-iotests/059.out @@ -2358,5 +2358,5 @@ Offset Length Mapped to File 0x140000000 0x10000 0x50000 TEST_DIR/t-s003.vmdk === Testing afl image with a very large capacity === -qemu-img: Can't get image size 'TEST_DIR/afl9.IMGFMT': File too large +qemu-img: Could not open 'TEST_DIR/afl9.IMGFMT': L1 size too big *** done diff --git a/tests/qemu-iotests/134 b/tests/qemu-iotests/134 index 141a2eaa7e..5f0fb86211 100755 --- a/tests/qemu-iotests/134 +++ b/tests/qemu-iotests/134 @@ -57,6 +57,15 @@ echo "== reading whole image ==" $QEMU_IO --object $SECRET -c "read 0 $size" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir echo +echo "== rewriting cluster part ==" +$QEMU_IO --object $SECRET -c "write -P 0xb 512 512" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir + +echo +echo "== verify pattern ==" +$QEMU_IO --object $SECRET -c "read -P 0 0 512" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir +$QEMU_IO --object $SECRET -c "read -P 0xb 512 512" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir + +echo echo "== rewriting whole image ==" $QEMU_IO --object $SECRET -c "write -P 0xa 0 $size" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir diff --git a/tests/qemu-iotests/134.out b/tests/qemu-iotests/134.out index 972be49d91..09d46f6b17 100644 --- a/tests/qemu-iotests/134.out +++ b/tests/qemu-iotests/134.out @@ -5,6 +5,16 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 encryption=on encrypt. read 134217728/134217728 bytes at offset 0 128 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== rewriting cluster part == +wrote 512/512 bytes at offset 512 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== verify pattern == +read 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 512 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + == rewriting whole image == wrote 134217728/134217728 bytes at offset 0 128 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) diff --git a/tests/qemu-iotests/205 b/tests/qemu-iotests/205 index 69f2c1d392..b8a86c446e 100755 --- a/tests/qemu-iotests/205 +++ b/tests/qemu-iotests/205 @@ -24,7 +24,7 @@ import iotests import time from iotests import qemu_img_create, qemu_io, filter_qemu_io, QemuIoInteractive -nbd_sock = 'nbd_sock' +nbd_sock = os.path.join(iotests.test_dir, 'nbd_sock') nbd_uri = 'nbd+unix:///exp?socket=' + nbd_sock disk = os.path.join(iotests.test_dir, 'disk') diff --git a/tests/qemu-iotests/207 b/tests/qemu-iotests/207 index b3816136f7..ec8c1d06f0 100755 --- a/tests/qemu-iotests/207 +++ b/tests/qemu-iotests/207 @@ -110,12 +110,49 @@ with iotests.FilePath('t.img') as disk_path, \ iotests.img_info_log(remote_path) - md5_key = subprocess.check_output( - 'ssh-keyscan -t rsa 127.0.0.1 2>/dev/null | grep -v "\\^#" | ' + - 'cut -d" " -f3 | base64 -d | md5sum -b | cut -d" " -f1', - shell=True).rstrip().decode('ascii') + keys = subprocess.check_output( + 'ssh-keyscan 127.0.0.1 2>/dev/null | grep -v "\\^#" | ' + + 'cut -d" " -f3', + shell=True).rstrip().decode('ascii').split('\n') + + # Mappings of base64 representations to digests + md5_keys = {} + sha1_keys = {} + + for key in keys: + md5_keys[key] = subprocess.check_output( + 'echo %s | base64 -d | md5sum -b | cut -d" " -f1' % key, + shell=True).rstrip().decode('ascii') + + sha1_keys[key] = subprocess.check_output( + 'echo %s | base64 -d | sha1sum -b | cut -d" " -f1' % key, + shell=True).rstrip().decode('ascii') vm.launch() + + # Find correct key first + matching_key = None + for key in keys: + result = vm.qmp('blockdev-add', + driver='ssh', node_name='node0', path=disk_path, + server={ + 'host': '127.0.0.1', + 'port': '22', + }, host_key_check={ + 'mode': 'hash', + 'type': 'md5', + 'hash': md5_keys[key], + }) + + if 'error' not in result: + vm.qmp('blockdev-del', node_name='node0') + matching_key = key + break + + if matching_key is None: + vm.shutdown() + iotests.notrun('Did not find a key that fits 127.0.0.1') + blockdev_create(vm, { 'driver': 'ssh', 'location': { 'path': disk_path, @@ -140,7 +177,7 @@ with iotests.FilePath('t.img') as disk_path, \ 'host-key-check': { 'mode': 'hash', 'type': 'md5', - 'hash': md5_key, + 'hash': md5_keys[matching_key], } }, 'size': 8388608 }) @@ -148,11 +185,6 @@ with iotests.FilePath('t.img') as disk_path, \ iotests.img_info_log(remote_path) - sha1_key = subprocess.check_output( - 'ssh-keyscan -t rsa 127.0.0.1 2>/dev/null | grep -v "\\^#" | ' + - 'cut -d" " -f3 | base64 -d | sha1sum -b | cut -d" " -f1', - shell=True).rstrip().decode('ascii') - vm.launch() blockdev_create(vm, { 'driver': 'ssh', 'location': { @@ -178,7 +210,7 @@ with iotests.FilePath('t.img') as disk_path, \ 'host-key-check': { 'mode': 'hash', 'type': 'sha1', - 'hash': sha1_key, + 'hash': sha1_keys[matching_key], } }, 'size': 4194304 }) diff --git a/tests/qemu-iotests/207.out b/tests/qemu-iotests/207.out index ec9823793a..1239d9d648 100644 --- a/tests/qemu-iotests/207.out +++ b/tests/qemu-iotests/207.out @@ -68,7 +68,7 @@ virtual size: 4 MiB (4194304 bytes) {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "ssh", "location": {"host-key-check": {"mode": "none"}, "path": "/this/is/not/an/existing/path", "server": {"host": "127.0.0.1", "port": "22"}}, "size": 4194304}}} {"return": {}} -Job failed: failed to open remote file '/this/is/not/an/existing/path': Failed opening remote file (libssh2 error code: -31) +Job failed: failed to open remote file '/this/is/not/an/existing/path': SFTP server: No such file (libssh error code: 1, sftp error code: 2) {"execute": "job-dismiss", "arguments": {"id": "job0"}} {"return": {}} |