diff options
73 files changed, 2054 insertions, 826 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index be79f68f46..b9a2171e9b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -908,6 +908,8 @@ F: hw/acpi/* F: hw/smbios/* F: hw/i386/acpi-build.[hc] F: hw/arm/virt-acpi-build.c +F: tests/bios-tables-test.c +F: tests/acpi-utils.[hc] ppc4xx M: Alexander Graf <agraf@suse.de> @@ -1122,6 +1124,15 @@ F: hw/nvram/chrp_nvram.c F: include/hw/nvram/chrp_nvram.h F: tests/prom-env-test.c +VM Generation ID +M: Ben Warren <ben@skyportsystems.com> +S: Maintained +F: hw/acpi/vmgenid.c +F: include/hw/acpi/vmgenid.h +F: docs/specs/vmgenid.txt +F: tests/vmgenid-test.c +F: stubs/vmgenid.c + Subsystems ---------- Audio diff --git a/block/iscsi.c b/block/iscsi.c index 76319a1a6e..75d890538e 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -637,6 +637,7 @@ retry: } #endif if (iTask.task == NULL) { + qemu_mutex_unlock(&iscsilun->mutex); return -ENOMEM; } #if LIBISCSI_API_VERSION < (20160603) @@ -864,6 +865,7 @@ retry: } #endif if (iTask.task == NULL) { + qemu_mutex_unlock(&iscsilun->mutex); return -ENOMEM; } #if LIBISCSI_API_VERSION < (20160603) @@ -904,6 +906,7 @@ static int coroutine_fn iscsi_co_flush(BlockDriverState *bs) retry: if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0, 0, iscsi_co_generic_cb, &iTask) == NULL) { + qemu_mutex_unlock(&iscsilun->mutex); return -ENOMEM; } @@ -1237,6 +1240,7 @@ retry: 0, 0, iscsi_co_generic_cb, &iTask); } if (iTask.task == NULL) { + qemu_mutex_unlock(&iscsilun->mutex); return -ENOMEM; } diff --git a/block/rbd.c b/block/rbd.c index 22e8e69cbd..ee13f3d9d3 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -18,6 +18,7 @@ #include "block/block_int.h" #include "crypto/secret.h" #include "qemu/cutils.h" +#include "qapi/qmp/qstring.h" #include <rbd/librbd.h> @@ -102,10 +103,10 @@ typedef struct BDRVRBDState { char *snap; } BDRVRBDState; -static int qemu_rbd_next_tok(char *dst, int dst_len, - char *src, char delim, - const char *name, - char **p, Error **errp) +static char *qemu_rbd_next_tok(int max_len, + char *src, char delim, + const char *name, + char **p, Error **errp) { int l; char *end; @@ -127,17 +128,15 @@ static int qemu_rbd_next_tok(char *dst, int dst_len, } } l = strlen(src); - if (l >= dst_len) { + if (l >= max_len) { error_setg(errp, "%s too long", name); - return -EINVAL; + return NULL; } else if (l == 0) { error_setg(errp, "%s too short", name); - return -EINVAL; + return NULL; } - pstrcpy(dst, dst_len, src); - - return 0; + return src; } static void qemu_rbd_unescape(char *src) @@ -153,87 +152,134 @@ static void qemu_rbd_unescape(char *src) *p = '\0'; } -static int qemu_rbd_parsename(const char *filename, - char *pool, int pool_len, - char *snap, int snap_len, - char *name, int name_len, - char *conf, int conf_len, - Error **errp) +static void qemu_rbd_parse_filename(const char *filename, QDict *options, + Error **errp) { const char *start; - char *p, *buf; - int ret; + char *p, *buf, *keypairs; + char *found_str; + size_t max_keypair_size; + Error *local_err = NULL; if (!strstart(filename, "rbd:", &start)) { error_setg(errp, "File name must start with 'rbd:'"); - return -EINVAL; + return; } + max_keypair_size = strlen(start) + 1; buf = g_strdup(start); + keypairs = g_malloc0(max_keypair_size); p = buf; - *snap = '\0'; - *conf = '\0'; - ret = qemu_rbd_next_tok(pool, pool_len, p, - '/', "pool name", &p, errp); - if (ret < 0 || !p) { - ret = -EINVAL; + found_str = qemu_rbd_next_tok(RBD_MAX_POOL_NAME_SIZE, p, + '/', "pool name", &p, &local_err); + if (local_err) { + goto done; + } + if (!p) { + error_setg(errp, "Pool name is required"); goto done; } - qemu_rbd_unescape(pool); + qemu_rbd_unescape(found_str); + qdict_put(options, "pool", qstring_from_str(found_str)); if (strchr(p, '@')) { - ret = qemu_rbd_next_tok(name, name_len, p, - '@', "object name", &p, errp); - if (ret < 0) { + found_str = qemu_rbd_next_tok(RBD_MAX_IMAGE_NAME_SIZE, p, + '@', "object name", &p, &local_err); + if (local_err) { goto done; } - ret = qemu_rbd_next_tok(snap, snap_len, p, - ':', "snap name", &p, errp); - qemu_rbd_unescape(snap); + qemu_rbd_unescape(found_str); + qdict_put(options, "image", qstring_from_str(found_str)); + + found_str = qemu_rbd_next_tok(RBD_MAX_SNAP_NAME_SIZE, p, + ':', "snap name", &p, &local_err); + if (local_err) { + goto done; + } + qemu_rbd_unescape(found_str); + qdict_put(options, "snapshot", qstring_from_str(found_str)); } else { - ret = qemu_rbd_next_tok(name, name_len, p, - ':', "object name", &p, errp); + found_str = qemu_rbd_next_tok(RBD_MAX_IMAGE_NAME_SIZE, p, + ':', "object name", &p, &local_err); + if (local_err) { + goto done; + } + qemu_rbd_unescape(found_str); + qdict_put(options, "image", qstring_from_str(found_str)); } - qemu_rbd_unescape(name); - if (ret < 0 || !p) { + if (!p) { goto done; } - ret = qemu_rbd_next_tok(conf, conf_len, p, - '\0', "configuration", &p, errp); - -done: - g_free(buf); - return ret; -} - -static char *qemu_rbd_parse_clientname(const char *conf, char *clientname) -{ - const char *p = conf; + found_str = qemu_rbd_next_tok(RBD_MAX_CONF_NAME_SIZE, p, + '\0', "configuration", &p, &local_err); + if (local_err) { + goto done; + } - while (*p) { - int len; - const char *end = strchr(p, ':'); + p = found_str; - if (end) { - len = end - p; - } else { - len = strlen(p); + /* The following are essentially all key/value pairs, and we treat + * 'id' and 'conf' a bit special. Key/value pairs may be in any order. */ + while (p) { + char *name, *value; + name = qemu_rbd_next_tok(RBD_MAX_CONF_NAME_SIZE, p, + '=', "conf option name", &p, &local_err); + if (local_err) { + break; } - if (strncmp(p, "id=", 3) == 0) { - len -= 3; - strncpy(clientname, p + 3, len); - clientname[len] = '\0'; - return clientname; + if (!p) { + error_setg(errp, "conf option %s has no value", name); + break; } - if (end == NULL) { + + qemu_rbd_unescape(name); + + value = qemu_rbd_next_tok(RBD_MAX_CONF_VAL_SIZE, p, + ':', "conf option value", &p, &local_err); + if (local_err) { break; } - p = end + 1; + qemu_rbd_unescape(value); + + if (!strcmp(name, "conf")) { + qdict_put(options, "conf", qstring_from_str(value)); + } else if (!strcmp(name, "id")) { + qdict_put(options, "user" , qstring_from_str(value)); + } else { + /* FIXME: This is pretty ugly, and not the right way to do this. + * These should be contained in a structure, and then + * passed explicitly as individual key/value pairs to + * rados. Consider this legacy code that needs to be + * updated. */ + char *tmp = g_malloc0(max_keypair_size); + /* only use a delimiter if it is not the first keypair found */ + /* These are sets of unknown key/value pairs we'll pass along + * to ceph */ + if (keypairs[0]) { + snprintf(tmp, max_keypair_size, ":%s=%s", name, value); + pstrcat(keypairs, max_keypair_size, tmp); + } else { + snprintf(keypairs, max_keypair_size, "%s=%s", name, value); + } + g_free(tmp); + } } - return NULL; + + if (keypairs[0]) { + qdict_put(options, "keyvalue-pairs", qstring_from_str(keypairs)); + } + + +done: + if (local_err) { + error_propagate(errp, local_err); + } + g_free(buf); + g_free(keypairs); + return; } @@ -256,26 +302,24 @@ static int qemu_rbd_set_auth(rados_t cluster, const char *secretid, return 0; } - -static int qemu_rbd_set_conf(rados_t cluster, const char *conf, - bool only_read_conf_file, - Error **errp) +static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs, + Error **errp) { char *p, *buf; - char name[RBD_MAX_CONF_NAME_SIZE]; - char value[RBD_MAX_CONF_VAL_SIZE]; + char *name; + char *value; + Error *local_err = NULL; int ret = 0; - buf = g_strdup(conf); + buf = g_strdup(keypairs); p = buf; while (p) { - ret = qemu_rbd_next_tok(name, sizeof(name), p, - '=', "conf option name", &p, errp); - if (ret < 0) { + name = qemu_rbd_next_tok(RBD_MAX_CONF_NAME_SIZE, p, + '=', "conf option name", &p, &local_err); + if (local_err) { break; } - qemu_rbd_unescape(name); if (!p) { error_setg(errp, "conf option %s has no value", name); @@ -283,36 +327,24 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf, break; } - ret = qemu_rbd_next_tok(value, sizeof(value), p, - ':', "conf option value", &p, errp); - if (ret < 0) { + value = qemu_rbd_next_tok(RBD_MAX_CONF_VAL_SIZE, p, + ':', "conf option value", &p, &local_err); + if (local_err) { break; } - qemu_rbd_unescape(value); - if (strcmp(name, "conf") == 0) { - /* read the conf file alone, so it doesn't override more - specific settings for a particular device */ - if (only_read_conf_file) { - ret = rados_conf_read_file(cluster, value); - if (ret < 0) { - error_setg_errno(errp, -ret, "error reading conf file %s", - value); - break; - } - } - } else if (strcmp(name, "id") == 0) { - /* ignore, this is parsed by qemu_rbd_parse_clientname() */ - } else if (!only_read_conf_file) { - ret = rados_conf_set(cluster, name, value); - if (ret < 0) { - error_setg_errno(errp, -ret, "invalid conf option %s", name); - ret = -EINVAL; - break; - } + ret = rados_conf_set(cluster, name, value); + if (ret < 0) { + error_setg_errno(errp, -ret, "invalid conf option %s", name); + ret = -EINVAL; + break; } } + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + } g_free(buf); return ret; } @@ -328,33 +360,84 @@ static void qemu_rbd_memset(RADOSCB *rcb, int64_t offs) } } +static QemuOptsList runtime_opts = { + .name = "rbd", + .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), + .desc = { + { + .name = "filename", + .type = QEMU_OPT_STRING, + .help = "Specification of the rbd image", + }, + { + .name = "password-secret", + .type = QEMU_OPT_STRING, + .help = "ID of secret providing the password", + }, + { + .name = "conf", + .type = QEMU_OPT_STRING, + .help = "Rados config file location", + }, + { + .name = "pool", + .type = QEMU_OPT_STRING, + .help = "Rados pool name", + }, + { + .name = "image", + .type = QEMU_OPT_STRING, + .help = "Image name in the pool", + }, + { + .name = "snapshot", + .type = QEMU_OPT_STRING, + .help = "Ceph snapshot name", + }, + { + /* maps to 'id' in rados_create() */ + .name = "user", + .type = QEMU_OPT_STRING, + .help = "Rados id name", + }, + { + .name = "keyvalue-pairs", + .type = QEMU_OPT_STRING, + .help = "Legacy rados key/value option parameters", + }, + { + .name = "host", + .type = QEMU_OPT_STRING, + }, + { + .name = "port", + .type = QEMU_OPT_STRING, + }, + { + .name = "auth", + .type = QEMU_OPT_STRING, + .help = "Supported authentication method, either cephx or none", + }, + { /* end of list */ } + }, +}; + static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp) { Error *local_err = NULL; int64_t bytes = 0; int64_t objsize; int obj_order = 0; - char pool[RBD_MAX_POOL_NAME_SIZE]; - char name[RBD_MAX_IMAGE_NAME_SIZE]; - char snap_buf[RBD_MAX_SNAP_NAME_SIZE]; - char conf[RBD_MAX_CONF_SIZE]; - char clientname_buf[RBD_MAX_CONF_SIZE]; - char *clientname; + const char *pool, *name, *conf, *clientname, *keypairs; const char *secretid; rados_t cluster; rados_ioctx_t io_ctx; - int ret; + QDict *options = NULL; + QemuOpts *rbd_opts = NULL; + int ret = 0; secretid = qemu_opt_get(opts, "password-secret"); - if (qemu_rbd_parsename(filename, pool, sizeof(pool), - snap_buf, sizeof(snap_buf), - name, sizeof(name), - conf, sizeof(conf), &local_err) < 0) { - error_propagate(errp, local_err); - return -EINVAL; - } - /* Read out options */ bytes = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), BDRV_SECTOR_SIZE); @@ -362,35 +445,55 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp) if (objsize) { if ((objsize - 1) & objsize) { /* not a power of 2? */ error_setg(errp, "obj size needs to be power of 2"); - return -EINVAL; + ret = -EINVAL; + goto exit; } if (objsize < 4096) { error_setg(errp, "obj size too small"); - return -EINVAL; + ret = -EINVAL; + goto exit; } obj_order = ctz32(objsize); } - clientname = qemu_rbd_parse_clientname(conf, clientname_buf); + options = qdict_new(); + qemu_rbd_parse_filename(filename, options, &local_err); + if (local_err) { + ret = -EINVAL; + error_propagate(errp, local_err); + goto exit; + } + + rbd_opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); + qemu_opts_absorb_qdict(rbd_opts, options, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto exit; + } + + pool = qemu_opt_get(rbd_opts, "pool"); + conf = qemu_opt_get(rbd_opts, "conf"); + clientname = qemu_opt_get(rbd_opts, "user"); + name = qemu_opt_get(rbd_opts, "image"); + keypairs = qemu_opt_get(rbd_opts, "keyvalue-pairs"); + ret = rados_create(&cluster, clientname); if (ret < 0) { error_setg_errno(errp, -ret, "error initializing"); - return ret; + goto exit; } - if (strstr(conf, "conf=") == NULL) { - /* try default location, but ignore failure */ - rados_conf_read_file(cluster, NULL); - } else if (conf[0] != '\0' && - qemu_rbd_set_conf(cluster, conf, true, &local_err) < 0) { - error_propagate(errp, local_err); + /* try default location when conf=NULL, but ignore failure */ + ret = rados_conf_read_file(cluster, conf); + if (conf && ret < 0) { + error_setg_errno(errp, -ret, "error reading conf file %s", conf); ret = -EIO; goto shutdown; } - if (conf[0] != '\0' && - qemu_rbd_set_conf(cluster, conf, false, &local_err) < 0) { - error_propagate(errp, local_err); + ret = qemu_rbd_set_keypairs(cluster, keypairs, errp); + if (ret < 0) { ret = -EIO; goto shutdown; } @@ -421,6 +524,10 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp) shutdown: rados_shutdown(cluster); + +exit: + QDECREF(options); + qemu_opts_del(rbd_opts); return ret; } @@ -471,38 +578,104 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) qemu_aio_unref(acb); } -/* TODO Convert to fine grained options */ -static QemuOptsList runtime_opts = { - .name = "rbd", - .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), - .desc = { - { - .name = "filename", - .type = QEMU_OPT_STRING, - .help = "Specification of the rbd image", - }, - { - .name = "password-secret", - .type = QEMU_OPT_STRING, - .help = "ID of secret providing the password", - }, - { /* end of list */ } - }, -}; +#define RBD_MON_HOST 0 +#define RBD_AUTH_SUPPORTED 1 + +static char *qemu_rbd_array_opts(QDict *options, const char *prefix, int type, + Error **errp) +{ + int num_entries; + QemuOpts *opts = NULL; + QDict *sub_options; + const char *host; + const char *port; + char *str; + char *rados_str = NULL; + Error *local_err = NULL; + int i; + + assert(type == RBD_MON_HOST || type == RBD_AUTH_SUPPORTED); + + num_entries = qdict_array_entries(options, prefix); + + if (num_entries < 0) { + error_setg(errp, "Parse error on RBD QDict array"); + return NULL; + } + + for (i = 0; i < num_entries; i++) { + char *strbuf = NULL; + const char *value; + char *rados_str_tmp; + + str = g_strdup_printf("%s%d.", prefix, i); + qdict_extract_subqdict(options, &sub_options, str); + g_free(str); + + opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); + qemu_opts_absorb_qdict(opts, sub_options, &local_err); + QDECREF(sub_options); + if (local_err) { + error_propagate(errp, local_err); + g_free(rados_str); + rados_str = NULL; + goto exit; + } + + if (type == RBD_MON_HOST) { + host = qemu_opt_get(opts, "host"); + port = qemu_opt_get(opts, "port"); + + value = host; + if (port) { + /* check for ipv6 */ + if (strchr(host, ':')) { + strbuf = g_strdup_printf("[%s]:%s", host, port); + } else { + strbuf = g_strdup_printf("%s:%s", host, port); + } + value = strbuf; + } else if (strchr(host, ':')) { + strbuf = g_strdup_printf("[%s]", host); + value = strbuf; + } + } else { + value = qemu_opt_get(opts, "auth"); + } + + + /* each iteration in the for loop will build upon the string, and if + * rados_str is NULL then it is our first pass */ + if (rados_str) { + /* separate options with ';', as that is what rados_conf_set() + * requires */ + rados_str_tmp = rados_str; + rados_str = g_strdup_printf("%s;%s", rados_str_tmp, value); + g_free(rados_str_tmp); + } else { + rados_str = g_strdup(value); + } + + g_free(strbuf); + qemu_opts_del(opts); + opts = NULL; + } + +exit: + qemu_opts_del(opts); + return rados_str; +} static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { BDRVRBDState *s = bs->opaque; - char pool[RBD_MAX_POOL_NAME_SIZE]; - char snap_buf[RBD_MAX_SNAP_NAME_SIZE]; - char conf[RBD_MAX_CONF_SIZE]; - char clientname_buf[RBD_MAX_CONF_SIZE]; - char *clientname; + const char *pool, *snap, *conf, *clientname, *name, *keypairs; const char *secretid; QemuOpts *opts; Error *local_err = NULL; - const char *filename; + char *mon_host = NULL; + char *auth_supported = NULL; int r; opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); @@ -513,41 +686,63 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, return -EINVAL; } - filename = qemu_opt_get(opts, "filename"); - secretid = qemu_opt_get(opts, "password-secret"); + auth_supported = qemu_rbd_array_opts(options, "auth-supported.", + RBD_AUTH_SUPPORTED, &local_err); + if (local_err) { + error_propagate(errp, local_err); + r = -EINVAL; + goto failed_opts; + } - if (qemu_rbd_parsename(filename, pool, sizeof(pool), - snap_buf, sizeof(snap_buf), - s->name, sizeof(s->name), - conf, sizeof(conf), errp) < 0) { + mon_host = qemu_rbd_array_opts(options, "server.", + RBD_MON_HOST, &local_err); + if (local_err) { + error_propagate(errp, local_err); r = -EINVAL; goto failed_opts; } - clientname = qemu_rbd_parse_clientname(conf, clientname_buf); + secretid = qemu_opt_get(opts, "password-secret"); + + pool = qemu_opt_get(opts, "pool"); + conf = qemu_opt_get(opts, "conf"); + snap = qemu_opt_get(opts, "snapshot"); + clientname = qemu_opt_get(opts, "user"); + name = qemu_opt_get(opts, "image"); + keypairs = qemu_opt_get(opts, "keyvalue-pairs"); + r = rados_create(&s->cluster, clientname); if (r < 0) { error_setg_errno(errp, -r, "error initializing"); goto failed_opts; } - s->snap = NULL; - if (snap_buf[0] != '\0') { - s->snap = g_strdup(snap_buf); + s->snap = g_strdup(snap); + if (name) { + pstrcpy(s->name, RBD_MAX_IMAGE_NAME_SIZE, name); + } + + /* try default location when conf=NULL, but ignore failure */ + r = rados_conf_read_file(s->cluster, conf); + if (conf && r < 0) { + error_setg_errno(errp, -r, "error reading conf file %s", conf); + goto failed_shutdown; + } + + r = qemu_rbd_set_keypairs(s->cluster, keypairs, errp); + if (r < 0) { + goto failed_shutdown; } - if (strstr(conf, "conf=") == NULL) { - /* try default location, but ignore failure */ - rados_conf_read_file(s->cluster, NULL); - } else if (conf[0] != '\0') { - r = qemu_rbd_set_conf(s->cluster, conf, true, errp); + if (mon_host) { + r = rados_conf_set(s->cluster, "mon_host", mon_host); if (r < 0) { goto failed_shutdown; } } - if (conf[0] != '\0') { - r = qemu_rbd_set_conf(s->cluster, conf, false, errp); + if (auth_supported) { + r = rados_conf_set(s->cluster, "auth_supported", auth_supported); if (r < 0) { goto failed_shutdown; } @@ -601,6 +796,8 @@ failed_shutdown: g_free(s->snap); failed_opts: qemu_opts_del(opts); + g_free(mon_host); + g_free(auth_supported); return r; } @@ -1004,18 +1201,18 @@ static QemuOptsList qemu_rbd_create_opts = { }; static BlockDriver bdrv_rbd = { - .format_name = "rbd", - .instance_size = sizeof(BDRVRBDState), - .bdrv_needs_filename = true, - .bdrv_file_open = qemu_rbd_open, - .bdrv_close = qemu_rbd_close, - .bdrv_create = qemu_rbd_create, - .bdrv_has_zero_init = bdrv_has_zero_init_1, - .bdrv_get_info = qemu_rbd_getinfo, - .create_opts = &qemu_rbd_create_opts, - .bdrv_getlength = qemu_rbd_getlength, - .bdrv_truncate = qemu_rbd_truncate, - .protocol_name = "rbd", + .format_name = "rbd", + .instance_size = sizeof(BDRVRBDState), + .bdrv_parse_filename = qemu_rbd_parse_filename, + .bdrv_file_open = qemu_rbd_open, + .bdrv_close = qemu_rbd_close, + .bdrv_create = qemu_rbd_create, + .bdrv_has_zero_init = bdrv_has_zero_init_1, + .bdrv_get_info = qemu_rbd_getinfo, + .create_opts = &qemu_rbd_create_opts, + .bdrv_getlength = qemu_rbd_getlength, + .bdrv_truncate = qemu_rbd_truncate, + .protocol_name = "rbd", .bdrv_aio_readv = qemu_rbd_aio_readv, .bdrv_aio_writev = qemu_rbd_aio_writev, diff --git a/cpu-exec.c b/cpu-exec.c index 1a5ad4889d..d04dd91ebd 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -186,12 +186,6 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb) cc->set_pc(cpu, last_tb->pc); } } - if (tb_exit == TB_EXIT_REQUESTED) { - /* We were asked to stop executing TBs (probably a pending - * interrupt. We've now stopped, so clear the flag. - */ - atomic_set(&cpu->tcg_exit_req, 0); - } return ret; } @@ -560,8 +554,9 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, qemu_mutex_unlock_iothread(); } - - if (unlikely(atomic_read(&cpu->exit_request) || replay_has_interrupt())) { + /* Finally, check if we need to exit to the main loop. */ + if (unlikely(atomic_read(&cpu->exit_request) + || (use_icount && cpu->icount_decr.u16.low + cpu->icount_extra == 0))) { atomic_set(&cpu->exit_request, 0); cpu->exception_index = EXCP_INTERRUPT; return true; @@ -571,62 +566,54 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, } static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb, - TranslationBlock **last_tb, int *tb_exit, - SyncClocks *sc) + TranslationBlock **last_tb, int *tb_exit) { uintptr_t ret; - - if (unlikely(atomic_read(&cpu->exit_request))) { - return; - } + int32_t insns_left; trace_exec_tb(tb, tb->pc); ret = cpu_tb_exec(cpu, tb); tb = (TranslationBlock *)(ret & ~TB_EXIT_MASK); *tb_exit = ret & TB_EXIT_MASK; - switch (*tb_exit) { - case TB_EXIT_REQUESTED: + if (*tb_exit != TB_EXIT_REQUESTED) { + *last_tb = tb; + return; + } + + *last_tb = NULL; + insns_left = atomic_read(&cpu->icount_decr.u32); + atomic_set(&cpu->icount_decr.u16.high, 0); + if (insns_left < 0) { /* Something asked us to stop executing chained TBs; just * continue round the main loop. Whatever requested the exit - * will also have set something else (eg interrupt_request) - * which we will handle next time around the loop. But we - * need to ensure the tcg_exit_req read in generated code - * comes before the next read of cpu->exit_request or - * cpu->interrupt_request. + * will also have set something else (eg exit_request or + * interrupt_request) which we will handle next time around + * the loop. But we need to ensure the zeroing of icount_decr + * comes before the next read of cpu->exit_request + * or cpu->interrupt_request. */ smp_mb(); - *last_tb = NULL; - break; - case TB_EXIT_ICOUNT_EXPIRED: - { - /* Instruction counter expired. */ -#ifdef CONFIG_USER_ONLY - abort(); -#else - int insns_left = cpu->icount_decr.u32; - *last_tb = NULL; - if (cpu->icount_extra && insns_left >= 0) { - /* Refill decrementer and continue execution. */ - cpu->icount_extra += insns_left; - insns_left = MIN(0xffff, cpu->icount_extra); - cpu->icount_extra -= insns_left; - cpu->icount_decr.u16.low = insns_left; - } else { - if (insns_left > 0) { - /* Execute remaining instructions. */ - cpu_exec_nocache(cpu, insns_left, tb, false); - align_clocks(sc, cpu); - } - cpu->exception_index = EXCP_INTERRUPT; - cpu_loop_exit(cpu); - } - break; -#endif + return; } - default: - *last_tb = tb; - break; + + /* Instruction counter expired. */ + assert(use_icount); +#ifndef CONFIG_USER_ONLY + if (cpu->icount_extra) { + /* Refill decrementer and continue execution. */ + cpu->icount_extra += insns_left; + insns_left = MIN(0xffff, cpu->icount_extra); + cpu->icount_extra -= insns_left; + cpu->icount_decr.u16.low = insns_left; + } else { + /* Execute any remaining instructions, then let the main loop + * handle the next event. + */ + if (insns_left > 0) { + cpu_exec_nocache(cpu, insns_left, tb, false); + } } +#endif } /* main execution loop */ @@ -635,7 +622,7 @@ int cpu_exec(CPUState *cpu) { CPUClass *cc = CPU_GET_CLASS(cpu); int ret; - SyncClocks sc; + SyncClocks sc = { 0 }; /* replay_interrupt may need current_cpu */ current_cpu = cpu; @@ -683,7 +670,7 @@ int cpu_exec(CPUState *cpu) while (!cpu_handle_interrupt(cpu, &last_tb)) { TranslationBlock *tb = tb_find(cpu, last_tb, tb_exit); - cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit, &sc); + cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit); /* Try to align the host and virtual clocks if the guest is in advance */ align_clocks(&sc, cpu); @@ -51,10 +51,6 @@ #include "hw/nmi.h" #include "sysemu/replay.h" -#ifndef _WIN32 -#include "qemu/compatfd.h" -#endif - #ifdef CONFIG_LINUX #include <sys/prctl.h> @@ -924,13 +920,23 @@ static void sigbus_reraise(void) abort(); } -static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo, - void *ctx) +static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx) { - if (kvm_on_sigbus(siginfo->ssi_code, - (void *)(intptr_t)siginfo->ssi_addr)) { + if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) { sigbus_reraise(); } + + if (current_cpu) { + /* Called asynchronously in VCPU thread. */ + if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) { + sigbus_reraise(); + } + } else { + /* Called synchronously (via signalfd) in main thread. */ + if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) { + sigbus_reraise(); + } + } } static void qemu_init_sigbus(void) @@ -939,92 +945,17 @@ static void qemu_init_sigbus(void) memset(&action, 0, sizeof(action)); action.sa_flags = SA_SIGINFO; - action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler; + action.sa_sigaction = sigbus_handler; sigaction(SIGBUS, &action, NULL); prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0); } - -static void qemu_kvm_eat_signals(CPUState *cpu) -{ - struct timespec ts = { 0, 0 }; - siginfo_t siginfo; - sigset_t waitset; - sigset_t chkset; - int r; - - sigemptyset(&waitset); - sigaddset(&waitset, SIG_IPI); - sigaddset(&waitset, SIGBUS); - - do { - r = sigtimedwait(&waitset, &siginfo, &ts); - if (r == -1 && !(errno == EAGAIN || errno == EINTR)) { - perror("sigtimedwait"); - exit(1); - } - - switch (r) { - case SIGBUS: - if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) { - sigbus_reraise(); - } - break; - default: - break; - } - - r = sigpending(&chkset); - if (r == -1) { - perror("sigpending"); - exit(1); - } - } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS)); -} - #else /* !CONFIG_LINUX */ - static void qemu_init_sigbus(void) { } - -static void qemu_kvm_eat_signals(CPUState *cpu) -{ -} #endif /* !CONFIG_LINUX */ -#ifndef _WIN32 -static void dummy_signal(int sig) -{ -} - -static void qemu_kvm_init_cpu_signals(CPUState *cpu) -{ - int r; - sigset_t set; - struct sigaction sigact; - - memset(&sigact, 0, sizeof(sigact)); - sigact.sa_handler = dummy_signal; - sigaction(SIG_IPI, &sigact, NULL); - - pthread_sigmask(SIG_BLOCK, NULL, &set); - sigdelset(&set, SIG_IPI); - sigdelset(&set, SIGBUS); - r = kvm_set_signal_mask(cpu, &set); - if (r) { - fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r)); - exit(1); - } -} - -#else /* _WIN32 */ -static void qemu_kvm_init_cpu_signals(CPUState *cpu) -{ - abort(); -} -#endif /* _WIN32 */ - static QemuMutex qemu_global_mutex; static QemuThread io_thread; @@ -1099,7 +1030,6 @@ static void qemu_kvm_wait_io_event(CPUState *cpu) qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex); } - qemu_kvm_eat_signals(cpu); qemu_wait_io_event_common(cpu); } @@ -1122,7 +1052,7 @@ static void *qemu_kvm_cpu_thread_fn(void *arg) exit(1); } - qemu_kvm_init_cpu_signals(cpu); + kvm_init_cpu_signals(cpu); /* signal CPU creation */ cpu->created = true; diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak index 48b07a4c91..029e95202a 100644 --- a/default-configs/i386-softmmu.mak +++ b/default-configs/i386-softmmu.mak @@ -59,3 +59,4 @@ CONFIG_I82801B11=y CONFIG_SMBIOS=y CONFIG_HYPERV_TESTDEV=$(CONFIG_KVM) CONFIG_PXB=y +CONFIG_ACPI_VMGENID=y diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak index fd96345f3c..d1d7432f74 100644 --- a/default-configs/x86_64-softmmu.mak +++ b/default-configs/x86_64-softmmu.mak @@ -59,3 +59,4 @@ CONFIG_I82801B11=y CONFIG_SMBIOS=y CONFIG_HYPERV_TESTDEV=$(CONFIG_KVM) CONFIG_PXB=y +CONFIG_ACPI_VMGENID=y diff --git a/docs/specs/vmgenid.txt b/docs/specs/vmgenid.txt new file mode 100644 index 0000000000..aa9f518676 --- /dev/null +++ b/docs/specs/vmgenid.txt @@ -0,0 +1,245 @@ +VIRTUAL MACHINE GENERATION ID +============================= + +Copyright (C) 2016 Red Hat, Inc. +Copyright (C) 2017 Skyport Systems, Inc. + +This work is licensed under the terms of the GNU GPL, version 2 or later. +See the COPYING file in the top-level directory. + +=== + +The VM generation ID (vmgenid) device is an emulated device which +exposes a 128-bit, cryptographically random, integer value identifier, +referred to as a Globally Unique Identifier, or GUID. + +This allows management applications (e.g. libvirt) to notify the guest +operating system when the virtual machine is executed with a different +configuration (e.g. snapshot execution or creation from a template). The +guest operating system notices the change, and is then able to react as +appropriate by marking its copies of distributed databases as dirty, +re-initializing its random number generator etc. + + +Requirements +------------ + +These requirements are extracted from the "How to implement virtual machine +generation ID support in a virtualization platform" section of the +specification, dated August 1, 2012. + + +The document may be found on the web at: + http://go.microsoft.com/fwlink/?LinkId=260709 + +R1a. The generation ID shall live in an 8-byte aligned buffer. + +R1b. The buffer holding the generation ID shall be in guest RAM, ROM, or device + MMIO range. + +R1c. The buffer holding the generation ID shall be kept separate from areas + used by the operating system. + +R1d. The buffer shall not be covered by an AddressRangeMemory or + AddressRangeACPI entry in the E820 or UEFI memory map. + +R1e. The generation ID shall not live in a page frame that could be mapped with + caching disabled. (In other words, regardless of whether the generation ID + lives in RAM, ROM or MMIO, it shall only be mapped as cacheable.) + +R2 to R5. [These AML requirements are isolated well enough in the Microsoft + specification for us to simply refer to them here.] + +R6. The hypervisor shall expose a _HID (hardware identifier) object in the + VMGenId device's scope that is unique to the hypervisor vendor. + + +QEMU Implementation +------------------- + +The above-mentioned specification does not dictate which ACPI descriptor table +will contain the VM Generation ID device. Other implementations (Hyper-V and +Xen) put it in the main descriptor table (Differentiated System Description +Table or DSDT). For ease of debugging and implementation, we have decided to +put it in its own Secondary System Description Table, or SSDT. + +The following is a dump of the contents from a running system: + +# iasl -p ./SSDT -d /sys/firmware/acpi/tables/SSDT + +Intel ACPI Component Architecture +ASL+ Optimizing Compiler version 20150717-64 +Copyright (c) 2000 - 2015 Intel Corporation + +Reading ACPI table from file /sys/firmware/acpi/tables/SSDT - Length +00000198 (0x0000C6) +ACPI: SSDT 0x0000000000000000 0000C6 (v01 BOCHS VMGENID 00000001 BXPC +00000001) +Acpi table [SSDT] successfully installed and loaded +Pass 1 parse of [SSDT] +Pass 2 parse of [SSDT] +Parsing Deferred Opcodes (Methods/Buffers/Packages/Regions) + +Parsing completed +Disassembly completed +ASL Output: ./SSDT.dsl - 1631 bytes +# cat SSDT.dsl +/* + * Intel ACPI Component Architecture + * AML/ASL+ Disassembler version 20150717-64 + * Copyright (c) 2000 - 2015 Intel Corporation + * + * Disassembling to symbolic ASL+ operators + * + * Disassembly of /sys/firmware/acpi/tables/SSDT, Sun Feb 5 00:19:37 2017 + * + * Original Table Header: + * Signature "SSDT" + * Length 0x000000CA (202) + * Revision 0x01 + * Checksum 0x4B + * OEM ID "BOCHS " + * OEM Table ID "VMGENID" + * OEM Revision 0x00000001 (1) + * Compiler ID "BXPC" + * Compiler Version 0x00000001 (1) + */ +DefinitionBlock ("/sys/firmware/acpi/tables/SSDT.aml", "SSDT", 1, "BOCHS ", +"VMGENID", 0x00000001) +{ + Name (VGIA, 0x07FFF000) + Scope (\_SB) + { + Device (VGEN) + { + Name (_HID, "QEMUVGID") // _HID: Hardware ID + Name (_CID, "VM_Gen_Counter") // _CID: Compatible ID + Name (_DDN, "VM_Gen_Counter") // _DDN: DOS Device Name + Method (_STA, 0, NotSerialized) // _STA: Status + { + Local0 = 0x0F + If ((VGIA == Zero)) + { + Local0 = Zero + } + + Return (Local0) + } + + Method (ADDR, 0, NotSerialized) + { + Local0 = Package (0x02) {} + Index (Local0, Zero) = (VGIA + 0x28) + Index (Local0, One) = Zero + Return (Local0) + } + } + } + + Method (\_GPE._E05, 0, NotSerialized) // _Exx: Edge-Triggered GPE + { + Notify (\_SB.VGEN, 0x80) // Status Change + } +} + + +Design Details: +--------------- + +Requirements R1a through R1e dictate that the memory holding the +VM Generation ID must be allocated and owned by the guest firmware, +in this case BIOS or UEFI. However, to be useful, QEMU must be able to +change the contents of the memory at runtime, specifically when starting a +backed-up or snapshotted image. In order to do this, QEMU must know the +address that has been allocated. + +The mechanism chosen for this memory sharing is writeable fw_cfg blobs. +These are data object that are visible to both QEMU and guests, and are +addressable as sequential files. + +More information about fw_cfg can be found in "docs/specs/fw_cfg.txt" + +Two fw_cfg blobs are used in this case: + +/etc/vmgenid_guid - contains the actual VM Generation ID GUID + - read-only to the guest +/etc/vmgenid_addr - contains the address of the downloaded vmgenid blob + - writeable by the guest + + +QEMU sends the following commands to the guest at startup: + +1. Allocate memory for vmgenid_guid fw_cfg blob. +2. Write the address of vmgenid_guid into the SSDT (VGIA ACPI variable as + shown above in the iasl dump). Note that this change is not propagated + back to QEMU. +3. Write the address of vmgenid_guid back to QEMU's copy of vmgenid_addr + via the fw_cfg DMA interface. + +After step 3, QEMU is able to update the contents of vmgenid_guid at will. + +Since BIOS or UEFI does not necessarily run when we wish to change the GUID, +the value of VGIA is persisted via the VMState mechanism. + +As spelled out in the specification, any change to the GUID executes an +ACPI notification. The exact handler to use is not specified, so the vmgenid +device uses the first unused one: \_GPE._E05. + + +Endian-ness Considerations: +--------------------------- + +Although not specified in Microsoft's document, it is assumed that the +device is expected to use little-endian format. + +All GUID passed in via command line or monitor are treated as big-endian. +GUID values displayed via monitor are shown in big-endian format. + + +GUID Storage Format: +-------------------- + +In order to implement an OVMF "SDT Header Probe Suppressor", the contents of +the vmgenid_guid fw_cfg blob are not simply a 128-bit GUID. There is also +significant padding in order to align and fill a memory page, as shown in the +following diagram: + ++----------------------------------+ +| SSDT with OEM Table ID = VMGENID | ++----------------------------------+ +| ... | TOP OF PAGE +| VGIA dword object ---------------|-----> +---------------------------+ +| ... | | fw-allocated array for | +| _STA method referring to VGIA | | "etc/vmgenid_guid" | +| ... | +---------------------------+ +| ADDR method referring to VGIA | | 0: OVMF SDT Header probe | +| ... | | suppressor | ++----------------------------------+ | 36: padding for 8-byte | + | alignment | + | 40: GUID | + | 56: padding to page size | + +---------------------------+ + END OF PAGE + + +Device Usage: +------------- + +The device has one property, which may be only be set using the command line: + + guid - sets the value of the GUID. A special value "auto" instructs + QEMU to generate a new random GUID. + +For example: + + QEMU -device vmgenid,guid="324e6eaf-d1d1-4bf6-bf41-b9bb6c91fb87" + QEMU -device vmgenid,guid=auto + +The property may be queried via QMP/HMP: + + (QEMU) query-vm-generation-id + {"return": {"guid": "324e6eaf-d1d1-4bf6-bf41-b9bb6c91fb87"}} + +Setting of this parameter is intentionally left out from the QMP/HMP +interfaces. There are no known use cases for changing the GUID once QEMU is +running, and adding this capability would greatly increase the complexity. diff --git a/dtc b/dtc -Subproject ec02b34c05be04f249ffaaca4b666f5246877de +Subproject fa8bc7f928ac25f23532afc8beb2073efc8fb06 diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx index b0f35e6829..a53f105c52 100644 --- a/hmp-commands-info.hx +++ b/hmp-commands-info.hx @@ -802,6 +802,20 @@ Show information about hotpluggable CPUs ETEXI STEXI +@item info vm-generation-id +@findex vm-generation-id +Show Virtual Machine Generation ID +ETEXI + + { + .name = "vm-generation-id", + .args_type = "", + .params = "", + .help = "Show Virtual Machine Generation ID", + .cmd = hmp_info_vm_generation_id, + }, + +STEXI @end table ETEXI @@ -2605,3 +2605,12 @@ void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict) qapi_free_HotpluggableCPUList(saved); } + +void hmp_info_vm_generation_id(Monitor *mon, const QDict *qdict) +{ + GuidInfo *info = qmp_query_vm_generation_id(NULL); + if (info) { + monitor_printf(mon, "%s\n", info->guid); + } + qapi_free_GuidInfo(info); +} @@ -137,5 +137,6 @@ void hmp_rocker_of_dpa_flows(Monitor *mon, const QDict *qdict); void hmp_rocker_of_dpa_groups(Monitor *mon, const QDict *qdict); void hmp_info_dump(Monitor *mon, const QDict *qdict); void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict); +void hmp_info_vm_generation_id(Monitor *mon, const QDict *qdict); #endif diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs index 6acf79860a..11c35bcb44 100644 --- a/hw/acpi/Makefile.objs +++ b/hw/acpi/Makefile.objs @@ -5,6 +5,7 @@ common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu_hotplug.o common-obj-$(CONFIG_ACPI_MEMORY_HOTPLUG) += memory_hotplug.o common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu.o common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o +common-obj-$(CONFIG_ACPI_VMGENID) += vmgenid.o common-obj-$(call lnot,$(CONFIG_ACPI_X86)) += acpi-stub.o common-obj-y += acpi_interface.o diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c index b2a1e4033b..c6f2032dec 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c @@ -1559,6 +1559,7 @@ void acpi_build_tables_init(AcpiBuildTables *tables) tables->rsdp = g_array_new(false, true /* clear */, 1); tables->table_data = g_array_new(false, true /* clear */, 1); tables->tcpalog = g_array_new(false, true /* clear */, 1); + tables->vmgenid = g_array_new(false, true /* clear */, 1); tables->linker = bios_linker_loader_init(); } @@ -1568,6 +1569,7 @@ void acpi_build_tables_cleanup(AcpiBuildTables *tables, bool mfre) g_array_free(tables->rsdp, true); g_array_free(tables->table_data, true); g_array_free(tables->tcpalog, mfre); + g_array_free(tables->vmgenid, mfre); } /* Build rsdt table */ diff --git a/hw/acpi/bios-linker-loader.c b/hw/acpi/bios-linker-loader.c index d963ebe241..046183a0f1 100644 --- a/hw/acpi/bios-linker-loader.c +++ b/hw/acpi/bios-linker-loader.c @@ -78,6 +78,21 @@ struct BiosLinkerLoaderEntry { uint32_t length; } cksum; + /* + * COMMAND_WRITE_POINTER - write the fw_cfg file (originating from + * @dest_file) at @wr_pointer.offset, by adding a pointer to + * @src_offset within the table originating from @src_file. + * 1,2,4 or 8 byte unsigned addition is used depending on + * @wr_pointer.size. + */ + struct { + char dest_file[BIOS_LINKER_LOADER_FILESZ]; + char src_file[BIOS_LINKER_LOADER_FILESZ]; + uint32_t dst_offset; + uint32_t src_offset; + uint8_t size; + } wr_pointer; + /* padding */ char pad[124]; }; @@ -85,9 +100,10 @@ struct BiosLinkerLoaderEntry { typedef struct BiosLinkerLoaderEntry BiosLinkerLoaderEntry; enum { - BIOS_LINKER_LOADER_COMMAND_ALLOCATE = 0x1, - BIOS_LINKER_LOADER_COMMAND_ADD_POINTER = 0x2, - BIOS_LINKER_LOADER_COMMAND_ADD_CHECKSUM = 0x3, + BIOS_LINKER_LOADER_COMMAND_ALLOCATE = 0x1, + BIOS_LINKER_LOADER_COMMAND_ADD_POINTER = 0x2, + BIOS_LINKER_LOADER_COMMAND_ADD_CHECKSUM = 0x3, + BIOS_LINKER_LOADER_COMMAND_WRITE_POINTER = 0x4, }; enum { @@ -278,3 +294,47 @@ void bios_linker_loader_add_pointer(BIOSLinker *linker, g_array_append_vals(linker->cmd_blob, &entry, sizeof entry); } + +/* + * bios_linker_loader_write_pointer: ask guest to write a pointer to the + * source file into the destination file, and write it back to QEMU via + * fw_cfg DMA. + * + * @linker: linker object instance + * @dest_file: destination file that must be written + * @dst_patched_offset: location within destination file blob to be patched + * with the pointer to @src_file, in bytes + * @dst_patched_offset_size: size of the pointer to be patched + * at @dst_patched_offset in @dest_file blob, in bytes + * @src_file: source file who's address must be taken + * @src_offset: location within source file blob to which + * @dest_file+@dst_patched_offset will point to after + * firmware's executed WRITE_POINTER command + */ +void bios_linker_loader_write_pointer(BIOSLinker *linker, + const char *dest_file, + uint32_t dst_patched_offset, + uint8_t dst_patched_size, + const char *src_file, + uint32_t src_offset) +{ + BiosLinkerLoaderEntry entry; + const BiosLinkerFileEntry *source_file = + bios_linker_find_file(linker, src_file); + + assert(source_file); + assert(src_offset < source_file->blob->len); + memset(&entry, 0, sizeof entry); + strncpy(entry.wr_pointer.dest_file, dest_file, + sizeof entry.wr_pointer.dest_file - 1); + strncpy(entry.wr_pointer.src_file, src_file, + sizeof entry.wr_pointer.src_file - 1); + entry.command = cpu_to_le32(BIOS_LINKER_LOADER_COMMAND_WRITE_POINTER); + entry.wr_pointer.dst_offset = cpu_to_le32(dst_patched_offset); + entry.wr_pointer.src_offset = cpu_to_le32(src_offset); + entry.wr_pointer.size = dst_patched_size; + assert(dst_patched_size == 1 || dst_patched_size == 2 || + dst_patched_size == 4 || dst_patched_size == 8); + + g_array_append_vals(linker->cmd_blob, &entry, sizeof entry); +} diff --git a/hw/acpi/vmgenid.c b/hw/acpi/vmgenid.c new file mode 100644 index 0000000000..744f2847da --- /dev/null +++ b/hw/acpi/vmgenid.c @@ -0,0 +1,258 @@ +/* + * Virtual Machine Generation ID Device + * + * Copyright (C) 2017 Skyport Systems. + * + * Author: Ben Warren <ben@skyportsystems.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qmp-commands.h" +#include "hw/acpi/acpi.h" +#include "hw/acpi/aml-build.h" +#include "hw/acpi/vmgenid.h" +#include "hw/nvram/fw_cfg.h" +#include "sysemu/sysemu.h" + +void vmgenid_build_acpi(VmGenIdState *vms, GArray *table_data, GArray *guid, + BIOSLinker *linker) +{ + Aml *ssdt, *dev, *scope, *method, *addr, *if_ctx; + uint32_t vgia_offset; + QemuUUID guid_le; + + /* Fill in the GUID values. These need to be converted to little-endian + * first, since that's what the guest expects + */ + g_array_set_size(guid, VMGENID_FW_CFG_SIZE - ARRAY_SIZE(guid_le.data)); + guid_le = vms->guid; + qemu_uuid_bswap(&guid_le); + /* The GUID is written at a fixed offset into the fw_cfg file + * in order to implement the "OVMF SDT Header probe suppressor" + * see docs/specs/vmgenid.txt for more details + */ + g_array_insert_vals(guid, VMGENID_GUID_OFFSET, guid_le.data, + ARRAY_SIZE(guid_le.data)); + + /* Put this in a separate SSDT table */ + ssdt = init_aml_allocator(); + + /* Reserve space for header */ + acpi_data_push(ssdt->buf, sizeof(AcpiTableHeader)); + + /* Storage for the GUID address */ + vgia_offset = table_data->len + + build_append_named_dword(ssdt->buf, "VGIA"); + scope = aml_scope("\\_SB"); + dev = aml_device("VGEN"); + aml_append(dev, aml_name_decl("_HID", aml_string("QEMUVGID"))); + aml_append(dev, aml_name_decl("_CID", aml_string("VM_Gen_Counter"))); + aml_append(dev, aml_name_decl("_DDN", aml_string("VM_Gen_Counter"))); + + /* Simple status method to check that address is linked and non-zero */ + method = aml_method("_STA", 0, AML_NOTSERIALIZED); + addr = aml_local(0); + aml_append(method, aml_store(aml_int(0xf), addr)); + if_ctx = aml_if(aml_equal(aml_name("VGIA"), aml_int(0))); + aml_append(if_ctx, aml_store(aml_int(0), addr)); + aml_append(method, if_ctx); + aml_append(method, aml_return(addr)); + aml_append(dev, method); + + /* the ADDR method returns two 32-bit words representing the lower and + * upper halves * of the physical address of the fw_cfg blob + * (holding the GUID) + */ + method = aml_method("ADDR", 0, AML_NOTSERIALIZED); + + addr = aml_local(0); + aml_append(method, aml_store(aml_package(2), addr)); + + aml_append(method, aml_store(aml_add(aml_name("VGIA"), + aml_int(VMGENID_GUID_OFFSET), NULL), + aml_index(addr, aml_int(0)))); + aml_append(method, aml_store(aml_int(0), aml_index(addr, aml_int(1)))); + aml_append(method, aml_return(addr)); + + aml_append(dev, method); + aml_append(scope, dev); + aml_append(ssdt, scope); + + /* attach an ACPI notify */ + method = aml_method("\\_GPE._E05", 0, AML_NOTSERIALIZED); + aml_append(method, aml_notify(aml_name("\\_SB.VGEN"), aml_int(0x80))); + aml_append(ssdt, method); + + g_array_append_vals(table_data, ssdt->buf->data, ssdt->buf->len); + + /* Allocate guest memory for the Data fw_cfg blob */ + bios_linker_loader_alloc(linker, VMGENID_GUID_FW_CFG_FILE, guid, 4096, + false /* page boundary, high memory */); + + /* Patch address of GUID fw_cfg blob into the ADDR fw_cfg blob + * so QEMU can write the GUID there. The address is expected to be + * < 4GB, but write 64 bits anyway. + * The address that is patched in is offset in order to implement + * the "OVMF SDT Header probe suppressor" + * see docs/specs/vmgenid.txt for more details. + */ + bios_linker_loader_write_pointer(linker, + VMGENID_ADDR_FW_CFG_FILE, 0, sizeof(uint64_t), + VMGENID_GUID_FW_CFG_FILE, VMGENID_GUID_OFFSET); + + /* Patch address of GUID fw_cfg blob into the AML so OSPM can retrieve + * and read it. Note that while we provide storage for 64 bits, only + * the least-signficant 32 get patched into AML. + */ + bios_linker_loader_add_pointer(linker, + ACPI_BUILD_TABLE_FILE, vgia_offset, sizeof(uint32_t), + VMGENID_GUID_FW_CFG_FILE, 0); + + build_header(linker, table_data, + (void *)(table_data->data + table_data->len - ssdt->buf->len), + "SSDT", ssdt->buf->len, 1, NULL, "VMGENID"); + free_aml_allocator(); +} + +void vmgenid_add_fw_cfg(VmGenIdState *vms, FWCfgState *s, GArray *guid) +{ + /* Create a read-only fw_cfg file for GUID */ + fw_cfg_add_file(s, VMGENID_GUID_FW_CFG_FILE, guid->data, + VMGENID_FW_CFG_SIZE); + /* Create a read-write fw_cfg file for Address */ + fw_cfg_add_file_callback(s, VMGENID_ADDR_FW_CFG_FILE, NULL, NULL, + vms->vmgenid_addr_le, + ARRAY_SIZE(vms->vmgenid_addr_le), false); +} + +static void vmgenid_update_guest(VmGenIdState *vms) +{ + Object *obj = object_resolve_path_type("", TYPE_ACPI_DEVICE_IF, NULL); + uint32_t vmgenid_addr; + QemuUUID guid_le; + + if (obj) { + /* Write the GUID to guest memory */ + memcpy(&vmgenid_addr, vms->vmgenid_addr_le, sizeof(vmgenid_addr)); + vmgenid_addr = le32_to_cpu(vmgenid_addr); + /* A zero value in vmgenid_addr means that BIOS has not yet written + * the address + */ + if (vmgenid_addr) { + /* QemuUUID has the first three words as big-endian, and expect + * that any GUIDs passed in will always be BE. The guest, + * however, will expect the fields to be little-endian. + * Perform a byte swap immediately before writing. + */ + guid_le = vms->guid; + qemu_uuid_bswap(&guid_le); + /* The GUID is written at a fixed offset into the fw_cfg file + * in order to implement the "OVMF SDT Header probe suppressor" + * see docs/specs/vmgenid.txt for more details. + */ + cpu_physical_memory_write(vmgenid_addr, guid_le.data, + sizeof(guid_le.data)); + /* Send _GPE.E05 event */ + acpi_send_event(DEVICE(obj), ACPI_VMGENID_CHANGE_STATUS); + } + } +} + +static void vmgenid_set_guid(Object *obj, const char *value, Error **errp) +{ + VmGenIdState *vms = VMGENID(obj); + + if (!strcmp(value, "auto")) { + qemu_uuid_generate(&vms->guid); + } else if (qemu_uuid_parse(value, &vms->guid) < 0) { + error_setg(errp, "'%s. %s': Failed to parse GUID string: %s", + object_get_typename(OBJECT(vms)), VMGENID_GUID, value); + return; + } + + vmgenid_update_guest(vms); +} + +/* After restoring an image, we need to update the guest memory and notify + * it of a potential change to VM Generation ID + */ +static int vmgenid_post_load(void *opaque, int version_id) +{ + VmGenIdState *vms = opaque; + vmgenid_update_guest(vms); + return 0; +} + +static const VMStateDescription vmstate_vmgenid = { + .name = "vmgenid", + .version_id = 1, + .minimum_version_id = 1, + .post_load = vmgenid_post_load, + .fields = (VMStateField[]) { + VMSTATE_UINT8_ARRAY(vmgenid_addr_le, VmGenIdState, sizeof(uint64_t)), + VMSTATE_END_OF_LIST() + }, +}; + +static void vmgenid_handle_reset(void *opaque) +{ + VmGenIdState *vms = VMGENID(opaque); + /* Clear the guest-allocated GUID address when the VM resets */ + memset(vms->vmgenid_addr_le, 0, ARRAY_SIZE(vms->vmgenid_addr_le)); +} + +static void vmgenid_realize(DeviceState *dev, Error **errp) +{ + VmGenIdState *vms = VMGENID(dev); + qemu_register_reset(vmgenid_handle_reset, vms); +} + +static void vmgenid_device_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->vmsd = &vmstate_vmgenid; + dc->realize = vmgenid_realize; + dc->hotpluggable = false; + + object_class_property_add_str(klass, VMGENID_GUID, NULL, + vmgenid_set_guid, NULL); + object_class_property_set_description(klass, VMGENID_GUID, + "Set Global Unique Identifier " + "(big-endian) or auto for random value", + NULL); +} + +static const TypeInfo vmgenid_device_info = { + .name = VMGENID_DEVICE, + .parent = TYPE_DEVICE, + .instance_size = sizeof(VmGenIdState), + .class_init = vmgenid_device_class_init, +}; + +static void vmgenid_register_types(void) +{ + type_register_static(&vmgenid_device_info); +} + +type_init(vmgenid_register_types) + +GuidInfo *qmp_query_vm_generation_id(Error **errp) +{ + GuidInfo *info; + VmGenIdState *vms; + Object *obj = find_vmgenid_dev(); + + if (!obj) { + return NULL; + } + vms = VMGENID(obj); + + info = g_malloc0(sizeof(*info)); + info->guid = qemu_uuid_unparse_strdup(&vms->guid); + return info; +} diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index f44767b9be..2073108577 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -42,6 +42,7 @@ #include "hw/acpi/memory_hotplug.h" #include "sysemu/tpm.h" #include "hw/acpi/tpm.h" +#include "hw/acpi/vmgenid.h" #include "sysemu/tpm_backend.h" #include "hw/timer/mc146818rtc_regs.h" #include "sysemu/numa.h" @@ -1803,7 +1804,7 @@ static Aml *build_q35_osc_method(void) Aml *else_ctx; Aml *method; Aml *a_cwd1 = aml_name("CDW1"); - Aml *a_ctrl = aml_name("CTRL"); + Aml *a_ctrl = aml_local(0); method = aml_method("_OSC", 4, AML_NOTSERIALIZED); aml_append(method, aml_create_dword_field(aml_arg(3), aml_int(0), "CDW1")); @@ -1813,7 +1814,6 @@ static Aml *build_q35_osc_method(void) aml_append(if_ctx, aml_create_dword_field(aml_arg(3), aml_int(4), "CDW2")); aml_append(if_ctx, aml_create_dword_field(aml_arg(3), aml_int(8), "CDW3")); - aml_append(if_ctx, aml_store(aml_name("CDW2"), aml_name("SUPP"))); aml_append(if_ctx, aml_store(aml_name("CDW3"), a_ctrl)); /* @@ -1898,8 +1898,6 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03"))); aml_append(dev, aml_name_decl("_ADR", aml_int(0))); aml_append(dev, aml_name_decl("_UID", aml_int(1))); - aml_append(dev, aml_name_decl("SUPP", aml_int(0))); - aml_append(dev, aml_name_decl("CTRL", aml_int(0))); aml_append(dev, build_q35_osc_method()); aml_append(sb_scope, dev); aml_append(dsdt, sb_scope); @@ -1964,6 +1962,9 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(dev, aml_name_decl("_UID", aml_int(bus_num))); aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03"))); aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num))); + if (pci_bus_is_express(bus)) { + aml_append(dev, build_q35_osc_method()); + } if (numa_node != NUMA_NODE_UNASSIGNED) { aml_append(dev, aml_name_decl("_PXM", aml_int(numa_node))); @@ -2610,6 +2611,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) size_t aml_len = 0; GArray *tables_blob = tables->table_data; AcpiSlicOem slic_oem = { .id = NULL, .table_id = NULL }; + Object *vmgenid_dev; acpi_get_pm_info(&pm); acpi_get_misc_info(&misc); @@ -2653,6 +2655,13 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) acpi_add_table(table_offsets, tables_blob); build_madt(tables_blob, tables->linker, pcms); + vmgenid_dev = find_vmgenid_dev(); + if (vmgenid_dev) { + acpi_add_table(table_offsets, tables_blob); + vmgenid_build_acpi(VMGENID(vmgenid_dev), tables_blob, + tables->vmgenid, tables->linker); + } + if (misc.has_hpet) { acpi_add_table(table_offsets, tables_blob); build_hpet(tables_blob, tables->linker); @@ -2823,6 +2832,7 @@ void acpi_setup(void) PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); AcpiBuildTables tables; AcpiBuildState *build_state; + Object *vmgenid_dev; if (!pcms->fw_cfg) { ACPI_BUILD_DPRINTF("No fw cfg. Bailing out.\n"); @@ -2859,6 +2869,12 @@ void acpi_setup(void) fw_cfg_add_file(pcms->fw_cfg, ACPI_BUILD_TPMLOG_FILE, tables.tcpalog->data, acpi_data_len(tables.tcpalog)); + vmgenid_dev = find_vmgenid_dev(); + if (vmgenid_dev) { + vmgenid_add_fw_cfg(VMGENID(vmgenid_dev), pcms->fw_cfg, + tables.vmgenid); + } + if (!pcmc->rsdp_in_ram) { /* * Keep for compatibility with old machine types. diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 5ce42af9d4..b76f3f62a0 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1153,7 +1153,7 @@ static AddressSpace *virtio_pci_get_dma_as(DeviceState *d) VirtIOPCIProxy *proxy = VIRTIO_PCI(d); PCIDevice *dev = &proxy->pci_dev; - return pci_get_address_space(dev); + return pci_device_iommu_address_space(dev); } static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy, diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 23483c752f..efce4b343a 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -282,12 +282,17 @@ static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val) caches = atomic_rcu_read(&vq->vring.caches); pa = offsetof(VRingUsed, ring[vq->vring.num]); virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val); + address_space_cache_invalidate(&caches->used, pa, sizeof(val)); } void virtio_queue_set_notification(VirtQueue *vq, int enable) { vq->notification = enable; + if (!vq->vring.desc) { + return; + } + rcu_read_lock(); if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) { vring_set_avail_event(vq, vring_avail_idx(vq)); @@ -1852,7 +1857,10 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f) if (k->has_variable_vring_alignment) { qemu_put_be32(f, vdev->vq[i].vring.align); } - /* XXX virtio-1 devices */ + /* + * Save desc now, the rest of the ring addresses are saved in + * subsections for VIRTIO-1 devices. + */ qemu_put_be64(f, vdev->vq[i].vring.desc); qemu_put_be16s(f, &vdev->vq[i].last_avail_idx); if (k->save_queue) { @@ -1993,14 +2001,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) vdev->vq[i].signalled_used_valid = false; vdev->vq[i].notification = true; - if (vdev->vq[i].vring.desc) { - /* XXX virtio-1 devices */ - virtio_queue_update_rings(vdev, i); - } else if (vdev->vq[i].last_avail_idx) { + if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) { error_report("VQ %d address 0x0 " "inconsistent with Host index 0x%x", i, vdev->vq[i].last_avail_idx); - return -1; + return -1; } if (k->load_queue) { ret = k->load_queue(qbus->parent, i, f); @@ -2061,6 +2066,19 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) for (i = 0; i < num; i++) { if (vdev->vq[i].vring.desc) { uint16_t nheads; + + /* + * VIRTIO-1 devices migrate desc, used, and avail ring addresses so + * only the region cache needs to be set up. Legacy devices need + * to calculate used and avail ring addresses based on the desc + * address. + */ + if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { + virtio_init_region_cache(vdev, i); + } else { + virtio_queue_update_rings(vdev, i); + } + nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx; /* Check it isn't doing strange things with descriptor numbers. */ if (nheads > vdev->vq[i].vring.num) { @@ -2291,7 +2309,7 @@ static bool virtio_queue_host_notifier_aio_poll(void *opaque) VirtQueue *vq = container_of(n, VirtQueue, host_notifier); bool progress; - if (virtio_queue_empty(vq)) { + if (!vq->vring.desc || virtio_queue_empty(vq)) { return false; } diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 8c305aa4fa..b62f0d82e4 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -36,6 +36,12 @@ enum device_endian { DEVICE_LITTLE_ENDIAN, }; +#if defined(HOST_WORDS_BIGENDIAN) +#define DEVICE_HOST_ENDIAN DEVICE_BIG_ENDIAN +#else +#define DEVICE_HOST_ENDIAN DEVICE_LITTLE_ENDIAN +#endif + /* address in the RAM (different from a physical address) */ #if defined(CONFIG_XEN_BACKEND) typedef uint64_t ram_addr_t; diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h index 050de59b38..62d462e494 100644 --- a/include/exec/gen-icount.h +++ b/include/exec/gen-icount.h @@ -6,58 +6,55 @@ /* Helpers for instruction counting code generation. */ static int icount_start_insn_idx; -static TCGLabel *icount_label; static TCGLabel *exitreq_label; static inline void gen_tb_start(TranslationBlock *tb) { - TCGv_i32 count, flag, imm; + TCGv_i32 count, imm; exitreq_label = gen_new_label(); - flag = tcg_temp_new_i32(); - tcg_gen_ld_i32(flag, cpu_env, - offsetof(CPUState, tcg_exit_req) - ENV_OFFSET); - tcg_gen_brcondi_i32(TCG_COND_NE, flag, 0, exitreq_label); - tcg_temp_free_i32(flag); - - if (!(tb->cflags & CF_USE_ICOUNT)) { - return; + if (tb->cflags & CF_USE_ICOUNT) { + count = tcg_temp_local_new_i32(); + } else { + count = tcg_temp_new_i32(); } - icount_label = gen_new_label(); - count = tcg_temp_local_new_i32(); tcg_gen_ld_i32(count, cpu_env, -ENV_OFFSET + offsetof(CPUState, icount_decr.u32)); - imm = tcg_temp_new_i32(); - /* We emit a movi with a dummy immediate argument. Keep the insn index - * of the movi so that we later (when we know the actual insn count) - * can update the immediate argument with the actual insn count. */ - icount_start_insn_idx = tcg_op_buf_count(); - tcg_gen_movi_i32(imm, 0xdeadbeef); + if (tb->cflags & CF_USE_ICOUNT) { + imm = tcg_temp_new_i32(); + /* We emit a movi with a dummy immediate argument. Keep the insn index + * of the movi so that we later (when we know the actual insn count) + * can update the immediate argument with the actual insn count. */ + icount_start_insn_idx = tcg_op_buf_count(); + tcg_gen_movi_i32(imm, 0xdeadbeef); + + tcg_gen_sub_i32(count, count, imm); + tcg_temp_free_i32(imm); + } + + tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, exitreq_label); - tcg_gen_sub_i32(count, count, imm); - tcg_temp_free_i32(imm); + if (tb->cflags & CF_USE_ICOUNT) { + tcg_gen_st16_i32(count, cpu_env, + -ENV_OFFSET + offsetof(CPUState, icount_decr.u16.low)); + } - tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, icount_label); - tcg_gen_st16_i32(count, cpu_env, - -ENV_OFFSET + offsetof(CPUState, icount_decr.u16.low)); tcg_temp_free_i32(count); } static void gen_tb_end(TranslationBlock *tb, int num_insns) { - gen_set_label(exitreq_label); - tcg_gen_exit_tb((uintptr_t)tb + TB_EXIT_REQUESTED); - if (tb->cflags & CF_USE_ICOUNT) { /* Update the num_insn immediate parameter now that we know * the actual insn count. */ tcg_set_insn_param(icount_start_insn_idx, 1, num_insns); - gen_set_label(icount_label); - tcg_gen_exit_tb((uintptr_t)tb + TB_EXIT_ICOUNT_EXPIRED); } + gen_set_label(exitreq_label); + tcg_gen_exit_tb((uintptr_t)tb + TB_EXIT_REQUESTED); + /* Terminate the linked list. */ tcg_ctx.gen_op_buf[tcg_ctx.gen_op_buf[0].prev].next = 0; } diff --git a/include/hw/acpi/acpi_dev_interface.h b/include/hw/acpi/acpi_dev_interface.h index 71d3c48e7d..3c2e4e95a5 100644 --- a/include/hw/acpi/acpi_dev_interface.h +++ b/include/hw/acpi/acpi_dev_interface.h @@ -11,6 +11,7 @@ typedef enum { ACPI_CPU_HOTPLUG_STATUS = 4, ACPI_MEMORY_HOTPLUG_STATUS = 8, ACPI_NVDIMM_HOTPLUG_STATUS = 16, + ACPI_VMGENID_CHANGE_STATUS = 32, } AcpiEventStatusBits; #define TYPE_ACPI_DEVICE_IF "acpi-device-interface" diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h index 559326cbd5..00c21f160c 100644 --- a/include/hw/acpi/aml-build.h +++ b/include/hw/acpi/aml-build.h @@ -210,6 +210,7 @@ struct AcpiBuildTables { GArray *table_data; GArray *rsdp; GArray *tcpalog; + GArray *vmgenid; BIOSLinker *linker; } AcpiBuildTables; diff --git a/include/hw/acpi/bios-linker-loader.h b/include/hw/acpi/bios-linker-loader.h index fa1e5d1a4e..efe17b0b9c 100644 --- a/include/hw/acpi/bios-linker-loader.h +++ b/include/hw/acpi/bios-linker-loader.h @@ -26,5 +26,12 @@ void bios_linker_loader_add_pointer(BIOSLinker *linker, const char *src_file, uint32_t src_offset); +void bios_linker_loader_write_pointer(BIOSLinker *linker, + const char *dest_file, + uint32_t dst_patched_offset, + uint8_t dst_patched_size, + const char *src_file, + uint32_t src_offset); + void bios_linker_loader_cleanup(BIOSLinker *linker); #endif diff --git a/include/hw/acpi/vmgenid.h b/include/hw/acpi/vmgenid.h new file mode 100644 index 0000000000..db7fa0e633 --- /dev/null +++ b/include/hw/acpi/vmgenid.h @@ -0,0 +1,35 @@ +#ifndef ACPI_VMGENID_H +#define ACPI_VMGENID_H + +#include "hw/acpi/bios-linker-loader.h" +#include "hw/qdev.h" +#include "qemu/uuid.h" + +#define VMGENID_DEVICE "vmgenid" +#define VMGENID_GUID "guid" +#define VMGENID_GUID_FW_CFG_FILE "etc/vmgenid_guid" +#define VMGENID_ADDR_FW_CFG_FILE "etc/vmgenid_addr" + +#define VMGENID_FW_CFG_SIZE 4096 /* Occupy a page of memory */ +#define VMGENID_GUID_OFFSET 40 /* allow space for + * OVMF SDT Header Probe Supressor + */ + +#define VMGENID(obj) OBJECT_CHECK(VmGenIdState, (obj), VMGENID_DEVICE) + +typedef struct VmGenIdState { + DeviceClass parent_obj; + QemuUUID guid; /* The 128-bit GUID seen by the guest */ + uint8_t vmgenid_addr_le[8]; /* Address of the GUID (little-endian) */ +} VmGenIdState; + +static inline Object *find_vmgenid_dev(void) +{ + return object_resolve_path_type("", VMGENID_DEVICE, NULL); +} + +void vmgenid_build_acpi(VmGenIdState *vms, GArray *table_data, GArray *guid, + BIOSLinker *linker); +void vmgenid_add_fw_cfg(VmGenIdState *vms, FWCfgState *s, GArray *guid); + +#endif diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index d1f45540a1..ab303c7fee 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -623,6 +623,10 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); .driver = "Broadwell-noTSX" "-" TYPE_X86_CPU,\ .property = "xlevel",\ .value = stringify(0x8000000a),\ + },{\ + .driver = TYPE_X86_CPU,\ + .property = "kvm-no-smi-migration",\ + .value = "on",\ }, #define PC_COMPAT_2_2 \ diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 3258eaa6bf..808aac8703 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -21,7 +21,6 @@ typedef struct sPAPREventSource sPAPREventSource; #define SPAPR_TIMEBASE_FREQ 512000000ULL typedef struct sPAPRMachineClass sPAPRMachineClass; -typedef struct sPAPRMachineState sPAPRMachineState; #define TYPE_SPAPR_MACHINE "spapr-machine" #define SPAPR_MACHINE(obj) \ diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h index 1945913bf1..9a5e715fe5 100644 --- a/include/hw/ppc/xics.h +++ b/include/hw/ppc/xics.h @@ -139,9 +139,9 @@ struct ICSIRQState { uint8_t flags; }; -typedef struct XICSFabric { +struct XICSFabric { Object parent; -} XICSFabric; +}; #define TYPE_XICS_FABRIC "xics-fabric" #define XICS_FABRIC(obj) \ diff --git a/include/qemu/compatfd.h b/include/qemu/compatfd.h deleted file mode 100644 index aa12ee9364..0000000000 --- a/include/qemu/compatfd.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * signalfd/eventfd compatibility - * - * Copyright IBM, Corp. 2008 - * - * Authors: - * Anthony Liguori <aliguori@us.ibm.com> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * - */ - -#ifndef QEMU_COMPATFD_H -#define QEMU_COMPATFD_H - - -struct qemu_signalfd_siginfo { - uint32_t ssi_signo; /* Signal number */ - int32_t ssi_errno; /* Error number (unused) */ - int32_t ssi_code; /* Signal code */ - uint32_t ssi_pid; /* PID of sender */ - uint32_t ssi_uid; /* Real UID of sender */ - int32_t ssi_fd; /* File descriptor (SIGIO) */ - uint32_t ssi_tid; /* Kernel timer ID (POSIX timers) */ - uint32_t ssi_band; /* Band event (SIGIO) */ - uint32_t ssi_overrun; /* POSIX timer overrun count */ - uint32_t ssi_trapno; /* Trap number that caused signal */ - int32_t ssi_status; /* Exit status or signal (SIGCHLD) */ - int32_t ssi_int; /* Integer sent by sigqueue(2) */ - uint64_t ssi_ptr; /* Pointer sent by sigqueue(2) */ - uint64_t ssi_utime; /* User CPU time consumed (SIGCHLD) */ - uint64_t ssi_stime; /* System CPU time consumed (SIGCHLD) */ - uint64_t ssi_addr; /* Address that generated signal - (for hardware-generated signals) */ - uint8_t pad[48]; /* Pad size to 128 bytes (allow for - additional fields in the future) */ -}; - -int qemu_signalfd(const sigset_t *mask); - -#endif diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index 56c9e22405..af37195fef 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -284,6 +284,15 @@ void qemu_anon_ram_free(void *ptr, size_t size); #endif +#if defined(CONFIG_LINUX) +#ifndef BUS_MCEERR_AR +#define BUS_MCEERR_AR 4 +#endif +#ifndef BUS_MCEERR_AO +#define BUS_MCEERR_AO 5 +#endif +#endif + #if defined(__linux__) && \ (defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)) /* Use 2 MiB alignment so transparent hugepages can be used by KVM. @@ -297,6 +306,34 @@ void qemu_anon_ram_free(void *ptr, size_t size); # define QEMU_VMALLOC_ALIGN getpagesize() #endif +#ifdef CONFIG_POSIX +struct qemu_signalfd_siginfo { + uint32_t ssi_signo; /* Signal number */ + int32_t ssi_errno; /* Error number (unused) */ + int32_t ssi_code; /* Signal code */ + uint32_t ssi_pid; /* PID of sender */ + uint32_t ssi_uid; /* Real UID of sender */ + int32_t ssi_fd; /* File descriptor (SIGIO) */ + uint32_t ssi_tid; /* Kernel timer ID (POSIX timers) */ + uint32_t ssi_band; /* Band event (SIGIO) */ + uint32_t ssi_overrun; /* POSIX timer overrun count */ + uint32_t ssi_trapno; /* Trap number that caused signal */ + int32_t ssi_status; /* Exit status or signal (SIGCHLD) */ + int32_t ssi_int; /* Integer sent by sigqueue(2) */ + uint64_t ssi_ptr; /* Pointer sent by sigqueue(2) */ + uint64_t ssi_utime; /* User CPU time consumed (SIGCHLD) */ + uint64_t ssi_stime; /* System CPU time consumed (SIGCHLD) */ + uint64_t ssi_addr; /* Address that generated signal + (for hardware-generated signals) */ + uint8_t pad[48]; /* Pad size to 128 bytes (allow for + additional fields in the future) */ +}; + +int qemu_signalfd(const sigset_t *mask); +void sigaction_invoke(struct sigaction *action, + struct qemu_signalfd_siginfo *info); +#endif + int qemu_madvise(void *addr, size_t len, int advice); int qemu_open(const char *name, int flags, ...); diff --git a/include/qom/cpu.h b/include/qom/cpu.h index 3e61c880da..c3292efe1c 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -275,11 +275,11 @@ struct qemu_work_item; * @stopped: Indicates the CPU has been artificially stopped. * @unplug: Indicates a pending CPU unplug request. * @crash_occurred: Indicates the OS reported a crash (panic) for this CPU - * @tcg_exit_req: Set to force TCG to stop executing linked TBs for this - * CPU and return to its top level loop. * @singlestep_enabled: Flags for single-stepping. * @icount_extra: Instructions until next timer event. - * @icount_decr: Number of cycles left, with interrupt flag in high bit. + * @icount_decr: Low 16 bits: number of cycles left, only used in icount mode. + * High 16 bits: Set to -1 to force TCG to stop executing linked TBs for this + * CPU and return to its top level loop (even in non-icount mode). * This allows a single read-compare-cbranch-write sequence to test * for both decrementer underflow and exceptions. * @can_do_io: Nonzero if memory-mapped IO is safe. Deterministic execution @@ -382,10 +382,6 @@ struct CPUState { /* TODO Move common fields from CPUArchState here. */ int cpu_index; /* used by alpha TCG */ uint32_t halted; /* used by alpha, cris, ppc TCG */ - union { - uint32_t u32; - icount_decr_u16 u16; - } icount_decr; uint32_t can_do_io; int32_t exception_index; /* used by m68k TCG */ @@ -398,7 +394,10 @@ struct CPUState { offset from AREG0. Leave this field at the end so as to make the (absolute value) offset as small as possible. This reduces code size, especially for hosts without large memory offsets. */ - uint32_t tcg_exit_req; + union { + uint32_t u32; + icount_decr_u16 u16; + } icount_decr; bool hax_vcpu_dirty; struct hax_vcpu_state *hax_vcpu; diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 3045ee7678..24281fc7f8 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -238,9 +238,6 @@ int kvm_remove_breakpoint(CPUState *cpu, target_ulong addr, target_ulong len, int type); void kvm_remove_all_breakpoints(CPUState *cpu); int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap); -#ifndef _WIN32 -int kvm_set_signal_mask(CPUState *cpu, const sigset_t *sigset); -#endif int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr); int kvm_on_sigbus(int code, void *addr); @@ -357,8 +354,10 @@ bool kvm_vcpu_id_is_valid(int vcpu_id); /* Returns VCPU ID to be used on KVM_CREATE_VCPU ioctl() */ unsigned long kvm_arch_vcpu_id(CPUState *cpu); -int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr); -int kvm_arch_on_sigbus(int code, void *addr); +#ifdef TARGET_I386 +#define KVM_HAVE_MCE_INJECTION 1 +void kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr); +#endif void kvm_arch_init_irq_routing(KVMState *s); @@ -461,6 +460,8 @@ void kvm_cpu_synchronize_state(CPUState *cpu); void kvm_cpu_synchronize_post_reset(CPUState *cpu); void kvm_cpu_synchronize_post_init(CPUState *cpu); +void kvm_init_cpu_signals(CPUState *cpu); + /** * kvm_irqchip_add_msi_route - Add MSI route for specific vector * @s: KVM state diff --git a/io/channel-websock.c b/io/channel-websock.c index e47279a1ae..8fabadea2f 100644 --- a/io/channel-websock.c +++ b/io/channel-websock.c @@ -33,11 +33,16 @@ #define QIO_CHANNEL_WEBSOCK_GUID "258EAFA5-E914-47DA-95CA-C5AB0DC85B11" #define QIO_CHANNEL_WEBSOCK_GUID_LEN strlen(QIO_CHANNEL_WEBSOCK_GUID) -#define QIO_CHANNEL_WEBSOCK_HEADER_PROTOCOL "Sec-WebSocket-Protocol" -#define QIO_CHANNEL_WEBSOCK_HEADER_VERSION "Sec-WebSocket-Version" -#define QIO_CHANNEL_WEBSOCK_HEADER_KEY "Sec-WebSocket-Key" +#define QIO_CHANNEL_WEBSOCK_HEADER_PROTOCOL "sec-websocket-protocol" +#define QIO_CHANNEL_WEBSOCK_HEADER_VERSION "sec-websocket-version" +#define QIO_CHANNEL_WEBSOCK_HEADER_KEY "sec-websocket-key" +#define QIO_CHANNEL_WEBSOCK_HEADER_UPGRADE "upgrade" +#define QIO_CHANNEL_WEBSOCK_HEADER_HOST "host" +#define QIO_CHANNEL_WEBSOCK_HEADER_CONNECTION "connection" #define QIO_CHANNEL_WEBSOCK_PROTOCOL_BINARY "binary" +#define QIO_CHANNEL_WEBSOCK_CONNECTION_UPGRADE "Upgrade" +#define QIO_CHANNEL_WEBSOCK_UPGRADE_WEBSOCKET "websocket" #define QIO_CHANNEL_WEBSOCK_HANDSHAKE_RESPONSE \ "HTTP/1.1 101 Switching Protocols\r\n" \ @@ -49,6 +54,9 @@ #define QIO_CHANNEL_WEBSOCK_HANDSHAKE_DELIM "\r\n" #define QIO_CHANNEL_WEBSOCK_HANDSHAKE_END "\r\n\r\n" #define QIO_CHANNEL_WEBSOCK_SUPPORTED_VERSION "13" +#define QIO_CHANNEL_WEBSOCK_HTTP_METHOD "GET" +#define QIO_CHANNEL_WEBSOCK_HTTP_PATH "/" +#define QIO_CHANNEL_WEBSOCK_HTTP_VERSION "HTTP/1.1" /* The websockets packet header is variable length * depending on the size of the payload... */ @@ -99,6 +107,13 @@ struct QEMU_PACKED QIOChannelWebsockHeader { } u; }; +typedef struct QIOChannelWebsockHTTPHeader QIOChannelWebsockHTTPHeader; + +struct QIOChannelWebsockHTTPHeader { + char *name; + char *value; +}; + enum { QIO_CHANNEL_WEBSOCK_OPCODE_CONTINUATION = 0x0, QIO_CHANNEL_WEBSOCK_OPCODE_TEXT_FRAME = 0x1, @@ -108,25 +123,130 @@ enum { QIO_CHANNEL_WEBSOCK_OPCODE_PONG = 0xA }; -static char *qio_channel_websock_handshake_entry(const char *handshake, - size_t handshake_len, - const char *name) -{ - char *begin, *end, *ret = NULL; - char *line = g_strdup_printf("%s%s: ", - QIO_CHANNEL_WEBSOCK_HANDSHAKE_DELIM, - name); - begin = g_strstr_len(handshake, handshake_len, line); - if (begin != NULL) { - begin += strlen(line); - end = g_strstr_len(begin, handshake_len - (begin - handshake), - QIO_CHANNEL_WEBSOCK_HANDSHAKE_DELIM); - if (end != NULL) { - ret = g_strndup(begin, end - begin); +static size_t +qio_channel_websock_extract_headers(char *buffer, + QIOChannelWebsockHTTPHeader *hdrs, + size_t nhdrsalloc, + Error **errp) +{ + char *nl, *sep, *tmp; + size_t nhdrs = 0; + + /* + * First parse the HTTP protocol greeting of format: + * + * $METHOD $PATH $VERSION + * + * e.g. + * + * GET / HTTP/1.1 + */ + + nl = strstr(buffer, QIO_CHANNEL_WEBSOCK_HANDSHAKE_DELIM); + if (!nl) { + error_setg(errp, "Missing HTTP header delimiter"); + return 0; + } + *nl = '\0'; + + tmp = strchr(buffer, ' '); + if (!tmp) { + error_setg(errp, "Missing HTTP path delimiter"); + return 0; + } + *tmp = '\0'; + + if (!g_str_equal(buffer, QIO_CHANNEL_WEBSOCK_HTTP_METHOD)) { + error_setg(errp, "Unsupported HTTP method %s", buffer); + return 0; + } + + buffer = tmp + 1; + tmp = strchr(buffer, ' '); + if (!tmp) { + error_setg(errp, "Missing HTTP version delimiter"); + return 0; + } + *tmp = '\0'; + + if (!g_str_equal(buffer, QIO_CHANNEL_WEBSOCK_HTTP_PATH)) { + error_setg(errp, "Unexpected HTTP path %s", buffer); + return 0; + } + + buffer = tmp + 1; + + if (!g_str_equal(buffer, QIO_CHANNEL_WEBSOCK_HTTP_VERSION)) { + error_setg(errp, "Unsupported HTTP version %s", buffer); + return 0; + } + + buffer = nl + strlen(QIO_CHANNEL_WEBSOCK_HANDSHAKE_DELIM); + + /* + * Now parse all the header fields of format + * + * $NAME: $VALUE + * + * e.g. + * + * Cache-control: no-cache + */ + do { + QIOChannelWebsockHTTPHeader *hdr; + + nl = strstr(buffer, QIO_CHANNEL_WEBSOCK_HANDSHAKE_DELIM); + if (nl) { + *nl = '\0'; + } + + sep = strchr(buffer, ':'); + if (!sep) { + error_setg(errp, "Malformed HTTP header"); + return 0; + } + *sep = '\0'; + sep++; + while (*sep == ' ') { + sep++; + } + + if (nhdrs >= nhdrsalloc) { + error_setg(errp, "Too many HTTP headers"); + return 0; + } + + hdr = &hdrs[nhdrs++]; + hdr->name = buffer; + hdr->value = sep; + + /* Canonicalize header name for easier identification later */ + for (tmp = hdr->name; *tmp; tmp++) { + *tmp = g_ascii_tolower(*tmp); + } + + if (nl) { + buffer = nl + strlen(QIO_CHANNEL_WEBSOCK_HANDSHAKE_DELIM); + } + } while (nl != NULL); + + return nhdrs; +} + +static const char * +qio_channel_websock_find_header(QIOChannelWebsockHTTPHeader *hdrs, + size_t nhdrs, + const char *name) +{ + size_t i; + + for (i = 0; i < nhdrs; i++) { + if (g_str_equal(hdrs[i].name, name)) { + return hdrs[i].value; } } - g_free(line); - return ret; + + return NULL; } @@ -166,58 +286,90 @@ static int qio_channel_websock_handshake_send_response(QIOChannelWebsock *ioc, } static int qio_channel_websock_handshake_process(QIOChannelWebsock *ioc, - const char *line, - size_t size, + char *buffer, Error **errp) { - int ret = -1; - char *protocols = qio_channel_websock_handshake_entry( - line, size, QIO_CHANNEL_WEBSOCK_HEADER_PROTOCOL); - char *version = qio_channel_websock_handshake_entry( - line, size, QIO_CHANNEL_WEBSOCK_HEADER_VERSION); - char *key = qio_channel_websock_handshake_entry( - line, size, QIO_CHANNEL_WEBSOCK_HEADER_KEY); + QIOChannelWebsockHTTPHeader hdrs[32]; + size_t nhdrs = G_N_ELEMENTS(hdrs); + const char *protocols = NULL, *version = NULL, *key = NULL, + *host = NULL, *connection = NULL, *upgrade = NULL; + nhdrs = qio_channel_websock_extract_headers(buffer, hdrs, nhdrs, errp); + if (!nhdrs) { + return -1; + } + + protocols = qio_channel_websock_find_header( + hdrs, nhdrs, QIO_CHANNEL_WEBSOCK_HEADER_PROTOCOL); if (!protocols) { error_setg(errp, "Missing websocket protocol header data"); - goto cleanup; + return -1; } + version = qio_channel_websock_find_header( + hdrs, nhdrs, QIO_CHANNEL_WEBSOCK_HEADER_VERSION); if (!version) { error_setg(errp, "Missing websocket version header data"); - goto cleanup; + return -1; } + key = qio_channel_websock_find_header( + hdrs, nhdrs, QIO_CHANNEL_WEBSOCK_HEADER_KEY); if (!key) { error_setg(errp, "Missing websocket key header data"); - goto cleanup; + return -1; + } + + host = qio_channel_websock_find_header( + hdrs, nhdrs, QIO_CHANNEL_WEBSOCK_HEADER_HOST); + if (!host) { + error_setg(errp, "Missing websocket host header data"); + return -1; + } + + connection = qio_channel_websock_find_header( + hdrs, nhdrs, QIO_CHANNEL_WEBSOCK_HEADER_CONNECTION); + if (!connection) { + error_setg(errp, "Missing websocket connection header data"); + return -1; + } + + upgrade = qio_channel_websock_find_header( + hdrs, nhdrs, QIO_CHANNEL_WEBSOCK_HEADER_UPGRADE); + if (!upgrade) { + error_setg(errp, "Missing websocket upgrade header data"); + return -1; } if (!g_strrstr(protocols, QIO_CHANNEL_WEBSOCK_PROTOCOL_BINARY)) { error_setg(errp, "No '%s' protocol is supported by client '%s'", QIO_CHANNEL_WEBSOCK_PROTOCOL_BINARY, protocols); - goto cleanup; + return -1; } if (!g_str_equal(version, QIO_CHANNEL_WEBSOCK_SUPPORTED_VERSION)) { error_setg(errp, "Version '%s' is not supported by client '%s'", QIO_CHANNEL_WEBSOCK_SUPPORTED_VERSION, version); - goto cleanup; + return -1; } if (strlen(key) != QIO_CHANNEL_WEBSOCK_CLIENT_KEY_LEN) { error_setg(errp, "Key length '%zu' was not as expected '%d'", strlen(key), QIO_CHANNEL_WEBSOCK_CLIENT_KEY_LEN); - goto cleanup; + return -1; } - ret = qio_channel_websock_handshake_send_response(ioc, key, errp); + if (!g_strrstr(connection, QIO_CHANNEL_WEBSOCK_CONNECTION_UPGRADE)) { + error_setg(errp, "No connection upgrade requested '%s'", connection); + return -1; + } - cleanup: - g_free(protocols); - g_free(version); - g_free(key); - return ret; + if (!g_str_equal(upgrade, QIO_CHANNEL_WEBSOCK_UPGRADE_WEBSOCKET)) { + error_setg(errp, "Incorrect upgrade method '%s'", upgrade); + return -1; + } + + return qio_channel_websock_handshake_send_response(ioc, key, errp); } static int qio_channel_websock_handshake_read(QIOChannelWebsock *ioc, @@ -248,10 +400,10 @@ static int qio_channel_websock_handshake_read(QIOChannelWebsock *ioc, return 0; } } + *handshake_end = '\0'; if (qio_channel_websock_handshake_process(ioc, (char *)ioc->encinput.buffer, - ioc->encinput.offset, errp) < 0) { return -1; } @@ -570,21 +722,24 @@ static ssize_t qio_channel_websock_read_wire(QIOChannelWebsock *ioc, ioc->encinput.offset += ret; } - if (ioc->payload_remain == 0) { - ret = qio_channel_websock_decode_header(ioc, errp); + while (ioc->encinput.offset != 0) { + if (ioc->payload_remain == 0) { + ret = qio_channel_websock_decode_header(ioc, errp); + if (ret < 0) { + return ret; + } + if (ret == 0) { + ioc->io_eof = TRUE; + break; + } + } + + ret = qio_channel_websock_decode_payload(ioc, errp); if (ret < 0) { return ret; } - if (ret == 0) { - return 0; - } } - - ret = qio_channel_websock_decode_payload(ioc, errp); - if (ret < 0) { - return ret; - } - return ret; + return 1; } @@ -642,9 +797,6 @@ static gboolean qio_channel_websock_flush(QIOChannel *ioc, if (ret < 0) { goto cleanup; } - if (ret == 0) { - wioc->io_eof = TRUE; - } } cleanup: @@ -120,6 +120,7 @@ bool kvm_vm_attributes_allowed; bool kvm_direct_msi_allowed; bool kvm_ioeventfd_any_length_allowed; bool kvm_msi_use_devid; +static bool kvm_immediate_exit; static const KVMCapabilityInfo kvm_required_capabilites[] = { KVM_CAP_INFO(USER_MEMORY), @@ -1619,6 +1620,7 @@ static int kvm_init(MachineState *ms) goto err; } + kvm_immediate_exit = kvm_check_extension(s, KVM_CAP_IMMEDIATE_EXIT); s->nr_slots = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS); /* If unspecified, use the default value */ @@ -1893,6 +1895,61 @@ void kvm_cpu_synchronize_post_init(CPUState *cpu) run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL); } +#ifdef KVM_HAVE_MCE_INJECTION +static __thread void *pending_sigbus_addr; +static __thread int pending_sigbus_code; +static __thread bool have_sigbus_pending; +#endif + +static void kvm_cpu_kick(CPUState *cpu) +{ + atomic_set(&cpu->kvm_run->immediate_exit, 1); +} + +static void kvm_cpu_kick_self(void) +{ + if (kvm_immediate_exit) { + kvm_cpu_kick(current_cpu); + } else { + qemu_cpu_kick_self(); + } +} + +static void kvm_eat_signals(CPUState *cpu) +{ + struct timespec ts = { 0, 0 }; + siginfo_t siginfo; + sigset_t waitset; + sigset_t chkset; + int r; + + if (kvm_immediate_exit) { + atomic_set(&cpu->kvm_run->immediate_exit, 0); + /* Write kvm_run->immediate_exit before the cpu->exit_request + * write in kvm_cpu_exec. + */ + smp_wmb(); + return; + } + + sigemptyset(&waitset); + sigaddset(&waitset, SIG_IPI); + + do { + r = sigtimedwait(&waitset, &siginfo, &ts); + if (r == -1 && !(errno == EAGAIN || errno == EINTR)) { + perror("sigtimedwait"); + exit(1); + } + + r = sigpending(&chkset); + if (r == -1) { + perror("sigpending"); + exit(1); + } + } while (sigismember(&chkset, SIG_IPI)); +} + int kvm_cpu_exec(CPUState *cpu) { struct kvm_run *run = cpu->kvm_run; @@ -1901,7 +1958,7 @@ int kvm_cpu_exec(CPUState *cpu) DPRINTF("kvm_cpu_exec()\n"); if (kvm_arch_process_async_events(cpu)) { - cpu->exit_request = 0; + atomic_set(&cpu->exit_request, 0); return EXCP_HLT; } @@ -1916,23 +1973,39 @@ int kvm_cpu_exec(CPUState *cpu) } kvm_arch_pre_run(cpu, run); - if (cpu->exit_request) { + if (atomic_read(&cpu->exit_request)) { DPRINTF("interrupt exit requested\n"); /* * KVM requires us to reenter the kernel after IO exits to complete * instruction emulation. This self-signal will ensure that we * leave ASAP again. */ - qemu_cpu_kick_self(); + kvm_cpu_kick_self(); } + /* Read cpu->exit_request before KVM_RUN reads run->immediate_exit. + * Matching barrier in kvm_eat_signals. + */ + smp_rmb(); + run_ret = kvm_vcpu_ioctl(cpu, KVM_RUN, 0); attrs = kvm_arch_post_run(cpu, run); +#ifdef KVM_HAVE_MCE_INJECTION + if (unlikely(have_sigbus_pending)) { + qemu_mutex_lock_iothread(); + kvm_arch_on_sigbus_vcpu(cpu, pending_sigbus_code, + pending_sigbus_addr); + have_sigbus_pending = false; + qemu_mutex_unlock_iothread(); + } +#endif + if (run_ret < 0) { if (run_ret == -EINTR || run_ret == -EAGAIN) { DPRINTF("io window exit\n"); + kvm_eat_signals(cpu); ret = EXCP_INTERRUPT; break; } @@ -2026,7 +2099,7 @@ int kvm_cpu_exec(CPUState *cpu) vm_stop(RUN_STATE_INTERNAL_ERROR); } - cpu->exit_request = 0; + atomic_set(&cpu->exit_request, 0); return ret; } @@ -2372,16 +2445,12 @@ void kvm_remove_all_breakpoints(CPUState *cpu) } #endif /* !KVM_CAP_SET_GUEST_DEBUG */ -int kvm_set_signal_mask(CPUState *cpu, const sigset_t *sigset) +static int kvm_set_signal_mask(CPUState *cpu, const sigset_t *sigset) { KVMState *s = kvm_state; struct kvm_signal_mask *sigmask; int r; - if (!sigset) { - return kvm_vcpu_ioctl(cpu, KVM_SET_SIGNAL_MASK, NULL); - } - sigmask = g_malloc(sizeof(*sigmask) + sizeof(*sigset)); sigmask->len = s->sigmask_len; @@ -2391,14 +2460,73 @@ int kvm_set_signal_mask(CPUState *cpu, const sigset_t *sigset) return r; } + +static void kvm_ipi_signal(int sig) +{ + if (current_cpu) { + assert(kvm_immediate_exit); + kvm_cpu_kick(current_cpu); + } +} + +void kvm_init_cpu_signals(CPUState *cpu) +{ + int r; + sigset_t set; + struct sigaction sigact; + + memset(&sigact, 0, sizeof(sigact)); + sigact.sa_handler = kvm_ipi_signal; + sigaction(SIG_IPI, &sigact, NULL); + + pthread_sigmask(SIG_BLOCK, NULL, &set); +#if defined KVM_HAVE_MCE_INJECTION + sigdelset(&set, SIGBUS); + pthread_sigmask(SIG_SETMASK, &set, NULL); +#endif + sigdelset(&set, SIG_IPI); + if (kvm_immediate_exit) { + r = pthread_sigmask(SIG_SETMASK, &set, NULL); + } else { + r = kvm_set_signal_mask(cpu, &set); + } + if (r) { + fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r)); + exit(1); + } +} + +/* Called asynchronously in VCPU thread. */ int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr) { - return kvm_arch_on_sigbus_vcpu(cpu, code, addr); +#ifdef KVM_HAVE_MCE_INJECTION + if (have_sigbus_pending) { + return 1; + } + have_sigbus_pending = true; + pending_sigbus_addr = addr; + pending_sigbus_code = code; + atomic_set(&cpu->exit_request, 1); + return 0; +#else + return 1; +#endif } +/* Called synchronously (via signalfd) in main thread. */ int kvm_on_sigbus(int code, void *addr) { - return kvm_arch_on_sigbus(code, addr); +#ifdef KVM_HAVE_MCE_INJECTION + /* Action required MCE kills the process if SIGBUS is blocked. Because + * that's what happens in the I/O thread, where we handle MCE via signalfd, + * we can only get action optional here. + */ + assert(code != BUS_MCEERR_AR); + kvm_arch_on_sigbus_vcpu(first_cpu, code, addr); + return 0; +#else + return 1; +#endif } int kvm_create_device(KVMState *s, uint64_t type, bool test) diff --git a/kvm-stub.c b/kvm-stub.c index b1b6b96c96..ef0c7346af 100644 --- a/kvm-stub.c +++ b/kvm-stub.c @@ -95,13 +95,6 @@ void kvm_remove_all_breakpoints(CPUState *cpu) { } -#ifndef _WIN32 -int kvm_set_signal_mask(CPUState *cpu, const sigset_t *sigset) -{ - abort(); -} -#endif - int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr) { return 1; @@ -157,4 +150,9 @@ bool kvm_has_free_slot(MachineState *ms) { return false; } + +void kvm_init_cpu_signals(CPUState *cpu) +{ + abort(); +} #endif @@ -1182,7 +1182,7 @@ static void memory_region_ram_device_write(void *opaque, hwaddr addr, static const MemoryRegionOps ram_device_mem_ops = { .read = memory_region_ram_device_read, .write = memory_region_ram_device_write, - .endianness = DEVICE_NATIVE_ENDIAN, + .endianness = DEVICE_HOST_ENDIAN, .valid = { .min_access_size = 1, .max_access_size = 8, @@ -2588,13 +2588,24 @@ static void mtree_print_flatview(fprintf_function p, void *f, while (n--) { mr = range->mr; - p(f, MTREE_INDENT TARGET_FMT_plx "-" - TARGET_FMT_plx " (prio %d, %s): %s\n", - int128_get64(range->addr.start), - int128_get64(range->addr.start) + MR_SIZE(range->addr.size), - mr->priority, - memory_region_type(mr), - memory_region_name(mr)); + if (range->offset_in_region) { + p(f, MTREE_INDENT TARGET_FMT_plx "-" + TARGET_FMT_plx " (prio %d, %s): %s @" TARGET_FMT_plx "\n", + int128_get64(range->addr.start), + int128_get64(range->addr.start) + MR_SIZE(range->addr.size), + mr->priority, + range->readonly ? "rom" : memory_region_type(mr), + memory_region_name(mr), + range->offset_in_region); + } else { + p(f, MTREE_INDENT TARGET_FMT_plx "-" + TARGET_FMT_plx " (prio %d, %s): %s\n", + int128_get64(range->addr.start), + int128_get64(range->addr.start) + MR_SIZE(range->addr.size), + mr->priority, + range->readonly ? "rom" : memory_region_type(mr), + memory_region_name(mr)); + } range++; } diff --git a/pc-bios/README b/pc-bios/README index 47a913f9c7..dcead369bf 100644 --- a/pc-bios/README +++ b/pc-bios/README @@ -17,7 +17,7 @@ - SLOF (Slimline Open Firmware) is a free IEEE 1275 Open Firmware implementation for certain IBM POWER hardware. The sources are at https://github.com/aik/SLOF, and the image currently in qemu is - built from git tag qemu-slof-20161019. + built from git tag qemu-slof-20170303. - sgabios (the Serial Graphics Adapter option ROM) provides a means for legacy x86 software to communicate with an attached serial console as diff --git a/pc-bios/slof.bin b/pc-bios/slof.bin Binary files differindex 30ce7ac384..0408723dbf 100644 --- a/pc-bios/slof.bin +++ b/pc-bios/slof.bin diff --git a/qapi-schema.json b/qapi-schema.json index 5eb1b8b628..6febfa7b90 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -5915,6 +5915,16 @@ 'data': [ 'pause', 'poweroff' ] } ## +# @GuestPanicInformationType: +# +# An enumeration of the guest panic information types +# +# Since: 2.9 +## +{ 'enum': 'GuestPanicInformationType', + 'data': [ 'hyper-v'] } + +## # @GuestPanicInformation: # # Information about a guest panic @@ -5922,6 +5932,8 @@ # Since: 2.9 ## {'union': 'GuestPanicInformation', + 'base': {'type': 'GuestPanicInformationType'}, + 'discriminator': 'type', 'data': { 'hyper-v': 'GuestPanicInformationHyperV' } } ## @@ -6197,3 +6209,23 @@ # ## { 'command': 'query-hotpluggable-cpus', 'returns': ['HotpluggableCPU'] } + +## +# @GuidInfo: +# +# GUID information. +# +# @guid: the globally unique identifier +# +# Since: 2.9 +## +{ 'struct': 'GuidInfo', 'data': {'guid': 'str'} } + +## +# @query-vm-generation-id: +# +# Show Virtual Machine Generation ID +# +# Since 2.9 +## +{ 'command': 'query-vm-generation-id', 'returns': 'GuidInfo' } diff --git a/qapi/block-core.json b/qapi/block-core.json index 5cc992fb8f..bc0ccd615c 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -2123,6 +2123,7 @@ # @replication: Since 2.8 # @ssh: Since 2.8 # @iscsi: Since 2.9 +# @rbd: Since 2.9 # # Since: 2.0 ## @@ -2131,7 +2132,7 @@ 'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom', 'host_device', 'http', 'https', 'iscsi', 'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'parallels', 'qcow', 'qcow2', 'qed', - 'quorum', 'raw', 'replication', 'ssh', 'vdi', 'vhdx', 'vmdk', + 'quorum', 'raw', 'rbd', 'replication', 'ssh', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] } ## @@ -2677,6 +2678,63 @@ '*header-digest': 'IscsiHeaderDigest', '*timeout': 'int' } } + +## +# @RbdAuthSupport: +# +# An enumeration of RBD auth support +# +# Since: 2.9 +## +{ 'enum': 'RbdAuthSupport', + 'data': [ 'cephx', 'none' ] } + + +## +# @RbdAuthMethod: +# +# An enumeration of rados auth_supported types +# +# Since: 2.9 +## +{ 'struct': 'RbdAuthMethod', + 'data': { 'auth': 'RbdAuthSupport' } } + +## +# @BlockdevOptionsRbd: +# +# @pool: Ceph pool name. +# +# @image: Image name in the Ceph pool. +# +# @conf: #optional path to Ceph configuration file. Values +# in the configuration file will be overridden by +# options specified via QAPI. +# +# @snapshot: #optional Ceph snapshot name. +# +# @user: #optional Ceph id name. +# +# @server: #optional Monitor host address and port. This maps +# to the "mon_host" Ceph option. +# +# @auth-supported: #optional Authentication supported. +# +# @password-secret: #optional The ID of a QCryptoSecret object providing +# the password for the login. +# +# Since: 2.9 +## +{ 'struct': 'BlockdevOptionsRbd', + 'data': { 'pool': 'str', + 'image': 'str', + '*conf': 'str', + '*snapshot': 'str', + '*user': 'str', + '*server': ['InetSocketAddress'], + '*auth-supported': ['RbdAuthMethod'], + '*password-secret': 'str' } } + ## # @ReplicationMode: # @@ -2875,7 +2933,7 @@ 'qed': 'BlockdevOptionsGenericCOWFormat', 'quorum': 'BlockdevOptionsQuorum', 'raw': 'BlockdevOptionsRaw', -# TODO rbd: Wait for structured options + 'rbd': 'BlockdevOptionsRbd', 'replication':'BlockdevOptionsReplication', # TODO sheepdog: Wait for structured options 'ssh': 'BlockdevOptionsSsh', diff --git a/qapi/event.json b/qapi/event.json index 970ff0255a..e02852cd8a 100644 --- a/qapi/event.json +++ b/qapi/event.json @@ -488,9 +488,9 @@ # # @action: action that has been taken, currently always "pause" # -# @info: optional information about a panic +# @info: #optional information about a panic (since 2.9) # -# Since: 1.5 (@info since 2.9) +# Since: 1.5 # # Example: # diff --git a/qemu-options.hx b/qemu-options.hx index c85f77d1d8..229243831b 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -144,16 +144,34 @@ STEXI @item -numa node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}] @itemx -numa node[,memdev=@var{id}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}] @findex -numa -Simulate a multi node NUMA system. If @samp{mem}, @samp{memdev} -and @samp{cpus} are omitted, resources are split equally. Also, note -that the -@option{numa} option doesn't allocate any of the specified -resources. That is, it just assigns existing resources to NUMA nodes. This -means that one still has to use the @option{-m}, @option{-smp} options -to allocate RAM and VCPUs respectively, and possibly @option{-object} -to specify the memory backend for the @samp{memdev} suboption. - -@samp{mem} and @samp{memdev} are mutually exclusive. Furthermore, if one -node uses @samp{memdev}, all of them have to use it. +Define a NUMA node and assign RAM and VCPUs to it. + +@var{firstcpu} and @var{lastcpu} are CPU indexes. Each +@samp{cpus} option represent a contiguous range of CPU indexes +(or a single VCPU if @var{lastcpu} is omitted). A non-contiguous +set of VCPUs can be represented by providing multiple @samp{cpus} +options. If @samp{cpus} is omitted on all nodes, VCPUs are automatically +split between them. + +For example, the following option assigns VCPUs 0, 1, 2 and 5 to +a NUMA node: +@example +-numa node,cpus=0-2,cpus=5 +@end example + +@samp{mem} assigns a given RAM amount to a node. @samp{memdev} +assigns RAM from a given memory backend device to a node. If +@samp{mem} and @samp{memdev} are omitted in all nodes, RAM is +split equally between them. + +@samp{mem} and @samp{memdev} are mutually exclusive. Furthermore, +if one node uses @samp{memdev}, all of them have to use it. + +Note that the -@option{numa} option doesn't allocate any of the +specified resources, it just assigns existing resources to NUMA +nodes. This means that one still has to use the @option{-m}, +@option{-smp} options to allocate RAM and VCPUs respectively. + ETEXI DEF("add-fd", HAS_ARG, QEMU_OPTION_add_fd, @@ -133,7 +133,7 @@ void cpu_exit(CPUState *cpu) atomic_set(&cpu->exit_request, 1); /* Ensure cpu_exec will see the exit request after TCG has exited. */ smp_wmb(); - atomic_set(&cpu->tcg_exit_req, 1); + atomic_set(&cpu->icount_decr.u16.high, -1); } int cpu_write_elf32_qemunote(WriteCoreDumpFunction f, CPUState *cpu, diff --git a/roms/SLOF b/roms/SLOF -Subproject efd65f49929d7db775b26066d538c8120ae3db9 +Subproject 66d250ef0fd06bb88b7399b9563b5008201f2d6 diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap index 222025525b..d9a6db0bb7 100755 --- a/scripts/kvm/vmxcap +++ b/scripts/kvm/vmxcap @@ -27,9 +27,9 @@ MSR_IA32_VMX_VMFUNC = 0x491 class msr(object): def __init__(self): try: - self.f = open('/dev/cpu/0/msr', 'r', 0) + self.f = open('/dev/cpu/0/msr', 'rb', 0) except: - self.f = open('/dev/msr0', 'r', 0) + self.f = open('/dev/msr0', 'rb', 0) def read(self, index, default = None): import struct self.f.seek(index) @@ -49,7 +49,7 @@ class Control(object): val = m.read(nr, 0) return (val & 0xffffffff, val >> 32) def show(self): - print self.name + print(self.name) mbz, mb1 = self.read2(self.cap_msr) tmbz, tmb1 = 0, 0 if self.true_cap_msr: @@ -69,7 +69,7 @@ class Control(object): s = 'forced' elif one and zero: s = 'yes' - print ' %-40s %s' % (self.bits[bit], s) + print(' %-40s %s' % (self.bits[bit], s)) class Misc(object): def __init__(self, name, bits, msr): @@ -77,9 +77,9 @@ class Misc(object): self.bits = bits self.msr = msr def show(self): - print self.name + print(self.name) value = msr().read(self.msr, 0) - print ' Hex: 0x%x' % (value) + print(' Hex: 0x%x' % (value)) def first_bit(key): if type(key) is tuple: return key[0] @@ -94,7 +94,7 @@ class Misc(object): def fmt(x): return { True: 'yes', False: 'no' }[x] v = (value >> lo) & ((1 << (hi - lo + 1)) - 1) - print ' %-40s %s' % (self.bits[bits], fmt(v)) + print(' %-40s %s' % (self.bits[bits], fmt(v))) controls = [ Misc( @@ -170,9 +170,13 @@ controls = [ 12: 'Enable INVPCID', 13: 'Enable VM functions', 14: 'VMCS shadowing', + 15: 'Enable ENCLS exiting', 16: 'RDSEED exiting', + 17: 'Enable PML', 18: 'EPT-violation #VE', + 19: 'Conceal non-root operation from PT', 20: 'Enable XSAVES/XRSTORS', + 22: 'Mode-based execute control (XS/XU)', 25: 'TSC scaling', }, cap_msr = MSR_IA32_VMX_PROCBASED_CTLS2, @@ -190,6 +194,8 @@ controls = [ 20: 'Save IA32_EFER', 21: 'Load IA32_EFER', 22: 'Save VMX-preemption timer value', + 23: 'Clear IA32_BNDCFGS', + 24: 'Conceal VM exits from PT', }, cap_msr = MSR_IA32_VMX_EXIT_CTLS, true_cap_msr = MSR_IA32_VMX_TRUE_EXIT_CTLS, @@ -205,6 +211,8 @@ controls = [ 13: 'Load IA32_PERF_GLOBAL_CTRL', 14: 'Load IA32_PAT', 15: 'Load IA32_EFER', + 16: 'Load IA32_BNDCFGS', + 17: 'Conceal VM entries from PT', }, cap_msr = MSR_IA32_VMX_ENTRY_CTLS, true_cap_msr = MSR_IA32_VMX_TRUE_ENTRY_CTLS, @@ -223,6 +231,7 @@ controls = [ (25,27): 'MSR-load/store count recommendation', 28: 'IA32_SMM_MONITOR_CTL[2] can be set to 1', 29: 'VMWRITE to VM-exit information fields', + 30: 'Inject event with insn length=0', (32,63): 'MSEG revision identifier', }, msr = MSR_IA32_VMX_MISC_CTLS, diff --git a/spice-qemu-char.c b/spice-qemu-char.c index 6f46f46b25..4d1c76e8a4 100644 --- a/spice-qemu-char.c +++ b/spice-qemu-char.c @@ -215,7 +215,10 @@ static void char_spice_finalize(Object *obj) SpiceChardev *s = SPICE_CHARDEV(obj); vmc_unregister_interface(s); - QLIST_REMOVE(s, next); + + if (s->next.le_prev) { + QLIST_REMOVE(s, next); + } g_free((char *)s->sin.subtype); #if SPICE_SERVER_VERSION >= 0x000c02 diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs index aa6050f406..224f04ba69 100644 --- a/stubs/Makefile.objs +++ b/stubs/Makefile.objs @@ -36,3 +36,4 @@ stub-obj-y += qmp_pc_dimm_device_list.o stub-obj-y += target-monitor-defs.o stub-obj-y += target-get-monitor-def.o stub-obj-y += pc_madt_cpu_entry.o +stub-obj-y += vmgenid.o diff --git a/stubs/vmgenid.c b/stubs/vmgenid.c new file mode 100644 index 0000000000..c64eb7a16e --- /dev/null +++ b/stubs/vmgenid.c @@ -0,0 +1,9 @@ +#include "qemu/osdep.h" +#include "qmp-commands.h" +#include "qapi/qmp/qerror.h" + +GuidInfo *qmp_query_vm_generation_id(Error **errp) +{ + error_setg(errp, QERR_UNSUPPORTED); + return NULL; +} diff --git a/target/arm/kvm.c b/target/arm/kvm.c index 395e986973..45554682f2 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -560,16 +560,6 @@ int kvm_arch_process_async_events(CPUState *cs) return 0; } -int kvm_arch_on_sigbus_vcpu(CPUState *cs, int code, void *addr) -{ - return 1; -} - -int kvm_arch_on_sigbus(int code, void *addr) -{ - return 1; -} - /* The #ifdef protections are until 32bit headers are imported and can * be removed once both 32 and 64 bit reach feature parity. */ diff --git a/target/hppa/translate.c b/target/hppa/translate.c index 5d571f0a4e..9e8c233501 100644 --- a/target/hppa/translate.c +++ b/target/hppa/translate.c @@ -84,14 +84,14 @@ typedef struct DisasInsn { ExitStatus (*trans)(DisasContext *ctx, uint32_t insn, const struct DisasInsn *f); union { - void (*f_ttt)(TCGv, TCGv, TCGv); - void (*f_weww)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32); - void (*f_dedd)(TCGv_i64, TCGv_env, TCGv_i64, TCGv_i64); - void (*f_wew)(TCGv_i32, TCGv_env, TCGv_i32); - void (*f_ded)(TCGv_i64, TCGv_env, TCGv_i64); - void (*f_wed)(TCGv_i32, TCGv_env, TCGv_i64); - void (*f_dew)(TCGv_i64, TCGv_env, TCGv_i32); - }; + void (*ttt)(TCGv, TCGv, TCGv); + void (*weww)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32); + void (*dedd)(TCGv_i64, TCGv_env, TCGv_i64, TCGv_i64); + void (*wew)(TCGv_i32, TCGv_env, TCGv_i32); + void (*ded)(TCGv_i64, TCGv_env, TCGv_i64); + void (*wed)(TCGv_i32, TCGv_env, TCGv_i64); + void (*dew)(TCGv_i64, TCGv_env, TCGv_i32); + } f; } DisasInsn; /* global register indexes */ @@ -1934,7 +1934,7 @@ static ExitStatus trans_log(DisasContext *ctx, uint32_t insn, } tcg_r1 = load_gpr(ctx, r1); tcg_r2 = load_gpr(ctx, r2); - ret = do_log(ctx, rt, tcg_r1, tcg_r2, cf, di->f_ttt); + ret = do_log(ctx, rt, tcg_r1, tcg_r2, cf, di->f.ttt); return nullify_end(ctx, ret); } @@ -2111,10 +2111,10 @@ static ExitStatus trans_ds(DisasContext *ctx, uint32_t insn, static const DisasInsn table_arith_log[] = { { 0x08000240u, 0xfc00ffffu, trans_nop }, /* or x,y,0 */ { 0x08000240u, 0xffe0ffe0u, trans_copy }, /* or x,0,t */ - { 0x08000000u, 0xfc000fe0u, trans_log, .f_ttt = tcg_gen_andc_tl }, - { 0x08000200u, 0xfc000fe0u, trans_log, .f_ttt = tcg_gen_and_tl }, - { 0x08000240u, 0xfc000fe0u, trans_log, .f_ttt = tcg_gen_or_tl }, - { 0x08000280u, 0xfc000fe0u, trans_log, .f_ttt = tcg_gen_xor_tl }, + { 0x08000000u, 0xfc000fe0u, trans_log, .f.ttt = tcg_gen_andc_tl }, + { 0x08000200u, 0xfc000fe0u, trans_log, .f.ttt = tcg_gen_and_tl }, + { 0x08000240u, 0xfc000fe0u, trans_log, .f.ttt = tcg_gen_or_tl }, + { 0x08000280u, 0xfc000fe0u, trans_log, .f.ttt = tcg_gen_xor_tl }, { 0x08000880u, 0xfc000fe0u, trans_cmpclr }, { 0x08000380u, 0xfc000fe0u, trans_uxor }, { 0x08000980u, 0xfc000fa0u, trans_uaddcm }, @@ -3061,7 +3061,7 @@ static ExitStatus trans_fop_wew_0c(DisasContext *ctx, uint32_t insn, { unsigned rt = extract32(insn, 0, 5); unsigned ra = extract32(insn, 21, 5); - return do_fop_wew(ctx, rt, ra, di->f_wew); + return do_fop_wew(ctx, rt, ra, di->f.wew); } static ExitStatus trans_fop_wew_0e(DisasContext *ctx, uint32_t insn, @@ -3069,7 +3069,7 @@ static ExitStatus trans_fop_wew_0e(DisasContext *ctx, uint32_t insn, { unsigned rt = assemble_rt64(insn); unsigned ra = assemble_ra64(insn); - return do_fop_wew(ctx, rt, ra, di->f_wew); + return do_fop_wew(ctx, rt, ra, di->f.wew); } static ExitStatus trans_fop_ded(DisasContext *ctx, uint32_t insn, @@ -3077,7 +3077,7 @@ static ExitStatus trans_fop_ded(DisasContext *ctx, uint32_t insn, { unsigned rt = extract32(insn, 0, 5); unsigned ra = extract32(insn, 21, 5); - return do_fop_ded(ctx, rt, ra, di->f_ded); + return do_fop_ded(ctx, rt, ra, di->f.ded); } static ExitStatus trans_fop_wed_0c(DisasContext *ctx, uint32_t insn, @@ -3085,7 +3085,7 @@ static ExitStatus trans_fop_wed_0c(DisasContext *ctx, uint32_t insn, { unsigned rt = extract32(insn, 0, 5); unsigned ra = extract32(insn, 21, 5); - return do_fop_wed(ctx, rt, ra, di->f_wed); + return do_fop_wed(ctx, rt, ra, di->f.wed); } static ExitStatus trans_fop_wed_0e(DisasContext *ctx, uint32_t insn, @@ -3093,7 +3093,7 @@ static ExitStatus trans_fop_wed_0e(DisasContext *ctx, uint32_t insn, { unsigned rt = assemble_rt64(insn); unsigned ra = extract32(insn, 21, 5); - return do_fop_wed(ctx, rt, ra, di->f_wed); + return do_fop_wed(ctx, rt, ra, di->f.wed); } static ExitStatus trans_fop_dew_0c(DisasContext *ctx, uint32_t insn, @@ -3101,7 +3101,7 @@ static ExitStatus trans_fop_dew_0c(DisasContext *ctx, uint32_t insn, { unsigned rt = extract32(insn, 0, 5); unsigned ra = extract32(insn, 21, 5); - return do_fop_dew(ctx, rt, ra, di->f_dew); + return do_fop_dew(ctx, rt, ra, di->f.dew); } static ExitStatus trans_fop_dew_0e(DisasContext *ctx, uint32_t insn, @@ -3109,7 +3109,7 @@ static ExitStatus trans_fop_dew_0e(DisasContext *ctx, uint32_t insn, { unsigned rt = extract32(insn, 0, 5); unsigned ra = assemble_ra64(insn); - return do_fop_dew(ctx, rt, ra, di->f_dew); + return do_fop_dew(ctx, rt, ra, di->f.dew); } static ExitStatus trans_fop_weww_0c(DisasContext *ctx, uint32_t insn, @@ -3118,7 +3118,7 @@ static ExitStatus trans_fop_weww_0c(DisasContext *ctx, uint32_t insn, unsigned rt = extract32(insn, 0, 5); unsigned rb = extract32(insn, 16, 5); unsigned ra = extract32(insn, 21, 5); - return do_fop_weww(ctx, rt, ra, rb, di->f_weww); + return do_fop_weww(ctx, rt, ra, rb, di->f.weww); } static ExitStatus trans_fop_weww_0e(DisasContext *ctx, uint32_t insn, @@ -3127,7 +3127,7 @@ static ExitStatus trans_fop_weww_0e(DisasContext *ctx, uint32_t insn, unsigned rt = assemble_rt64(insn); unsigned rb = assemble_rb64(insn); unsigned ra = assemble_ra64(insn); - return do_fop_weww(ctx, rt, ra, rb, di->f_weww); + return do_fop_weww(ctx, rt, ra, rb, di->f.weww); } static ExitStatus trans_fop_dedd(DisasContext *ctx, uint32_t insn, @@ -3136,7 +3136,7 @@ static ExitStatus trans_fop_dedd(DisasContext *ctx, uint32_t insn, unsigned rt = extract32(insn, 0, 5); unsigned rb = extract32(insn, 16, 5); unsigned ra = extract32(insn, 21, 5); - return do_fop_dedd(ctx, rt, ra, rb, di->f_dedd); + return do_fop_dedd(ctx, rt, ra, rb, di->f.dedd); } static void gen_fcpy_s(TCGv_i32 dst, TCGv_env unused, TCGv_i32 src) @@ -3340,13 +3340,13 @@ static ExitStatus trans_xmpyu(DisasContext *ctx, uint32_t insn, return nullify_end(ctx, NO_EXIT); } -#define FOP_DED trans_fop_ded, .f_ded -#define FOP_DEDD trans_fop_dedd, .f_dedd +#define FOP_DED trans_fop_ded, .f.ded +#define FOP_DEDD trans_fop_dedd, .f.dedd -#define FOP_WEW trans_fop_wew_0c, .f_wew -#define FOP_DEW trans_fop_dew_0c, .f_dew -#define FOP_WED trans_fop_wed_0c, .f_wed -#define FOP_WEWW trans_fop_weww_0c, .f_weww +#define FOP_WEW trans_fop_wew_0c, .f.wew +#define FOP_DEW trans_fop_dew_0c, .f.dew +#define FOP_WED trans_fop_wed_0c, .f.wed +#define FOP_WEWW trans_fop_weww_0c, .f.weww static const DisasInsn table_float_0c[] = { /* floating point class zero */ @@ -3425,10 +3425,10 @@ static const DisasInsn table_float_0c[] = { #undef FOP_DEW #undef FOP_WED #undef FOP_WEWW -#define FOP_WEW trans_fop_wew_0e, .f_wew -#define FOP_DEW trans_fop_dew_0e, .f_dew -#define FOP_WED trans_fop_wed_0e, .f_wed -#define FOP_WEWW trans_fop_weww_0e, .f_weww +#define FOP_WEW trans_fop_wew_0e, .f.wew +#define FOP_DEW trans_fop_dew_0e, .f.dew +#define FOP_WED trans_fop_wed_0e, .f.wed +#define FOP_WEWW trans_fop_weww_0e, .f.weww static const DisasInsn table_float_0e[] = { /* floating point class zero */ diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 89421c893b..fba92125ab 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -3778,19 +3778,16 @@ static GuestPanicInformation *x86_cpu_get_crash_info(CPUState *cs) GuestPanicInformation *panic_info = NULL; if (env->features[FEAT_HYPERV_EDX] & HV_X64_GUEST_CRASH_MSR_AVAILABLE) { - GuestPanicInformationHyperV *panic_info_hv = - g_malloc0(sizeof(GuestPanicInformationHyperV)); panic_info = g_malloc0(sizeof(GuestPanicInformation)); - panic_info->type = GUEST_PANIC_INFORMATION_KIND_HYPER_V; - panic_info->u.hyper_v.data = panic_info_hv; + panic_info->type = GUEST_PANIC_INFORMATION_TYPE_HYPER_V; assert(HV_X64_MSR_CRASH_PARAMS >= 5); - panic_info_hv->arg1 = env->msr_hv_crash_params[0]; - panic_info_hv->arg2 = env->msr_hv_crash_params[1]; - panic_info_hv->arg3 = env->msr_hv_crash_params[2]; - panic_info_hv->arg4 = env->msr_hv_crash_params[3]; - panic_info_hv->arg5 = env->msr_hv_crash_params[4]; + panic_info->u.hyper_v.arg1 = env->msr_hv_crash_params[0]; + panic_info->u.hyper_v.arg2 = env->msr_hv_crash_params[1]; + panic_info->u.hyper_v.arg3 = env->msr_hv_crash_params[2]; + panic_info->u.hyper_v.arg4 = env->msr_hv_crash_params[3]; + panic_info->u.hyper_v.arg5 = env->msr_hv_crash_params[4]; } return panic_info; @@ -3986,6 +3983,8 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true), DEFINE_PROP_BOOL("lmce", X86CPU, enable_lmce, false), DEFINE_PROP_BOOL("l3-cache", X86CPU, enable_l3_cache, true), + DEFINE_PROP_BOOL("kvm-no-smi-migration", X86CPU, kvm_no_smi_migration, + false), DEFINE_PROP_BOOL("vmware-cpuid-freq", X86CPU, vmware_cpuid_freq, true), DEFINE_PROP_END_OF_LIST() }; diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 12a39d590f..ac2ad6d443 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1255,6 +1255,9 @@ struct X86CPU { /* if true override the phys_bits value with a value read from the host */ bool host_phys_bits; + /* Stop SMI delivery for migration compatibility with old machines */ + bool kvm_no_smi_migration; + /* Number of physical address bits supported */ uint32_t phys_bits; diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 27fd0505df..887a81268f 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -64,13 +64,6 @@ * 255 kvm_msr_entry structs */ #define MSR_BUF_SIZE 4096 -#ifndef BUS_MCEERR_AR -#define BUS_MCEERR_AR 4 -#endif -#ifndef BUS_MCEERR_AO -#define BUS_MCEERR_AO 5 -#endif - const KVMCapabilityInfo kvm_arch_required_capabilities[] = { KVM_CAP_INFO(SET_TSS_ADDR), KVM_CAP_INFO(EXT_CPUID), @@ -462,70 +455,38 @@ static void hardware_memory_error(void) exit(1); } -int kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) +void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) { X86CPU *cpu = X86_CPU(c); CPUX86State *env = &cpu->env; ram_addr_t ram_addr; hwaddr paddr; - if ((env->mcg_cap & MCG_SER_P) && addr - && (code == BUS_MCEERR_AR || code == BUS_MCEERR_AO)) { + /* If we get an action required MCE, it has been injected by KVM + * while the VM was running. An action optional MCE instead should + * be coming from the main thread, which qemu_init_sigbus identifies + * as the "early kill" thread. + */ + assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO); + + if ((env->mcg_cap & MCG_SER_P) && addr) { ram_addr = qemu_ram_addr_from_host(addr); - if (ram_addr == RAM_ADDR_INVALID || - !kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) { - fprintf(stderr, "Hardware memory error for memory used by " - "QEMU itself instead of guest system!\n"); - /* Hope we are lucky for AO MCE */ - if (code == BUS_MCEERR_AO) { - return 0; - } else { - hardware_memory_error(); - } - } - kvm_hwpoison_page_add(ram_addr); - kvm_mce_inject(cpu, paddr, code); - } else { - if (code == BUS_MCEERR_AO) { - return 0; - } else if (code == BUS_MCEERR_AR) { - hardware_memory_error(); - } else { - return 1; + if (ram_addr != RAM_ADDR_INVALID && + kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) { + kvm_hwpoison_page_add(ram_addr); + kvm_mce_inject(cpu, paddr, code); + return; } - } - return 0; -} -int kvm_arch_on_sigbus(int code, void *addr) -{ - X86CPU *cpu = X86_CPU(first_cpu); - - if ((cpu->env.mcg_cap & MCG_SER_P) && addr && code == BUS_MCEERR_AO) { - ram_addr_t ram_addr; - hwaddr paddr; + fprintf(stderr, "Hardware memory error for memory used by " + "QEMU itself instead of guest system!\n"); + } - /* Hope we are lucky for AO MCE */ - ram_addr = qemu_ram_addr_from_host(addr); - if (ram_addr == RAM_ADDR_INVALID || - !kvm_physical_memory_addr_from_host(first_cpu->kvm_state, - addr, &paddr)) { - fprintf(stderr, "Hardware memory error for memory used by " - "QEMU itself instead of guest system!: %p\n", addr); - return 0; - } - kvm_hwpoison_page_add(ram_addr); - kvm_mce_inject(X86_CPU(first_cpu), paddr, code); - } else { - if (code == BUS_MCEERR_AO) { - return 0; - } else if (code == BUS_MCEERR_AR) { - hardware_memory_error(); - } else { - return 1; - } + if (code == BUS_MCEERR_AR) { + hardware_memory_error(); } - return 0; + + /* Hope we are lucky for AO MCE */ } static int kvm_inject_mce_oldstyle(X86CPU *cpu) @@ -2531,7 +2492,12 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level) events.smi.pending = 0; events.smi.latched_init = 0; } - events.flags |= KVM_VCPUEVENT_VALID_SMM; + /* Stop SMI delivery on old machine types to avoid a reboot + * on an inward migration of an old VM. + */ + if (!cpu->kvm_no_smi_migration) { + events.flags |= KVM_VCPUEVENT_VALID_SMM; + } } if (level >= KVM_PUT_RESET_STATE) { diff --git a/target/mips/kvm.c b/target/mips/kvm.c index 998c3412c3..0982e874bb 100644 --- a/target/mips/kvm.c +++ b/target/mips/kvm.c @@ -180,18 +180,6 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cs) return true; } -int kvm_arch_on_sigbus_vcpu(CPUState *cs, int code, void *addr) -{ - DPRINTF("%s\n", __func__); - return 1; -} - -int kvm_arch_on_sigbus(int code, void *addr) -{ - DPRINTF("%s\n", __func__); - return 1; -} - void kvm_arch_init_irq_routing(KVMState *s) { } diff --git a/target/nios2/translate.c b/target/nios2/translate.c index 2d738391ad..cfec47959d 100644 --- a/target/nios2/translate.c +++ b/target/nios2/translate.c @@ -48,14 +48,14 @@ struct { \ uint8_t op; \ union { \ - uint16_t imm16; \ - int16_t imm16s; \ - }; \ + uint16_t u; \ + int16_t s; \ + } imm16; \ uint8_t b; \ uint8_t a; \ } (instr) = { \ .op = extract32((code), 0, 6), \ - .imm16 = extract32((code), 6, 16), \ + .imm16.u = extract32((code), 6, 16), \ .b = extract32((code), 22, 5), \ .a = extract32((code), 27, 5), \ } @@ -232,7 +232,7 @@ static void gen_ldx(DisasContext *dc, uint32_t code, uint32_t flags) data = tcg_temp_new(); } - tcg_gen_addi_tl(addr, load_gpr(dc, instr.a), instr.imm16s); + tcg_gen_addi_tl(addr, load_gpr(dc, instr.a), instr.imm16.s); tcg_gen_qemu_ld_tl(data, addr, dc->mem_idx, flags); if (unlikely(instr.b == R_ZERO)) { @@ -249,7 +249,7 @@ static void gen_stx(DisasContext *dc, uint32_t code, uint32_t flags) TCGv val = load_gpr(dc, instr.b); TCGv addr = tcg_temp_new(); - tcg_gen_addi_tl(addr, load_gpr(dc, instr.a), instr.imm16s); + tcg_gen_addi_tl(addr, load_gpr(dc, instr.a), instr.imm16.s); tcg_gen_qemu_st_tl(val, addr, dc->mem_idx, flags); tcg_temp_free(addr); } @@ -259,7 +259,7 @@ static void br(DisasContext *dc, uint32_t code, uint32_t flags) { I_TYPE(instr, code); - gen_goto_tb(dc, 0, dc->pc + 4 + (instr.imm16s & -4)); + gen_goto_tb(dc, 0, dc->pc + 4 + (instr.imm16.s & -4)); dc->is_jmp = DISAS_TB_JUMP; } @@ -271,7 +271,7 @@ static void gen_bxx(DisasContext *dc, uint32_t code, uint32_t flags) tcg_gen_brcond_tl(flags, dc->cpu_R[instr.a], dc->cpu_R[instr.b], l1); gen_goto_tb(dc, 0, dc->pc + 4); gen_set_label(l1); - gen_goto_tb(dc, 1, dc->pc + 4 + (instr.imm16s & -4)); + gen_goto_tb(dc, 1, dc->pc + 4 + (instr.imm16.s & -4)); dc->is_jmp = DISAS_TB_JUMP; } @@ -284,8 +284,8 @@ static void (fname)(DisasContext *dc, uint32_t code, uint32_t flags) \ (op3)); \ } -gen_i_cmpxx(gen_cmpxxsi, instr.imm16s) -gen_i_cmpxx(gen_cmpxxui, instr.imm16) +gen_i_cmpxx(gen_cmpxxsi, instr.imm16.s) +gen_i_cmpxx(gen_cmpxxui, instr.imm16.u) /* Math/logic instructions */ #define gen_i_math_logic(fname, insn, resimm, op3) \ @@ -302,16 +302,16 @@ static void (fname)(DisasContext *dc, uint32_t code, uint32_t flags) \ } \ } -gen_i_math_logic(addi, addi, 1, instr.imm16s) -gen_i_math_logic(muli, muli, 0, instr.imm16s) +gen_i_math_logic(addi, addi, 1, instr.imm16.s) +gen_i_math_logic(muli, muli, 0, instr.imm16.s) -gen_i_math_logic(andi, andi, 0, instr.imm16) -gen_i_math_logic(ori, ori, 1, instr.imm16) -gen_i_math_logic(xori, xori, 1, instr.imm16) +gen_i_math_logic(andi, andi, 0, instr.imm16.u) +gen_i_math_logic(ori, ori, 1, instr.imm16.u) +gen_i_math_logic(xori, xori, 1, instr.imm16.u) -gen_i_math_logic(andhi, andi, 0, instr.imm16 << 16) -gen_i_math_logic(orhi , ori, 1, instr.imm16 << 16) -gen_i_math_logic(xorhi, xori, 1, instr.imm16 << 16) +gen_i_math_logic(andhi, andi, 0, instr.imm16.u << 16) +gen_i_math_logic(orhi , ori, 1, instr.imm16.u << 16) +gen_i_math_logic(xorhi, xori, 1, instr.imm16.u << 16) /* Prototype only, defined below */ static void handle_r_type_instr(DisasContext *dc, uint32_t code, diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 1adb55cb01..9f1f132cef 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -2483,16 +2483,6 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cpu) return true; } -int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr) -{ - return 1; -} - -int kvm_arch_on_sigbus(int code, void *addr) -{ - return 1; -} - void kvm_arch_init_irq_routing(KVMState *s) { } diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c index 5ec050cf89..ac47154b83 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c @@ -2140,16 +2140,6 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cpu) return true; } -int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr) -{ - return 1; -} - -int kvm_arch_on_sigbus(int code, void *addr) -{ - return 1; -} - void kvm_s390_io_interrupt(uint16_t subchannel_id, uint16_t subchannel_nr, uint32_t io_int_parm, uint32_t io_int_word) @@ -1101,7 +1101,6 @@ static inline unsigned get_mmuidx(TCGMemOpIdx oi) #define TB_EXIT_MASK 3 #define TB_EXIT_IDX0 0 #define TB_EXIT_IDX1 1 -#define TB_EXIT_ICOUNT_EXPIRED 2 #define TB_EXIT_REQUESTED 3 #ifdef HAVE_TCG_QEMU_TB_EXEC diff --git a/tests/Makefile.include b/tests/Makefile.include index 3310c170a3..364ef1bd23 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include @@ -669,7 +669,7 @@ tests/hd-geo-test$(EXESUF): tests/hd-geo-test.o tests/boot-order-test$(EXESUF): tests/boot-order-test.o $(libqos-obj-y) tests/boot-serial-test$(EXESUF): tests/boot-serial-test.o $(libqos-obj-y) tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o \ - tests/boot-sector.o $(libqos-obj-y) + tests/boot-sector.o tests/acpi-utils.o $(libqos-obj-y) tests/pxe-test$(EXESUF): tests/pxe-test.o tests/boot-sector.o $(libqos-obj-y) tests/tmp105-test$(EXESUF): tests/tmp105-test.o $(libqos-omap-obj-y) tests/ds1338-test$(EXESUF): tests/ds1338-test.o $(libqos-imx-obj-y) diff --git a/tests/acpi-test-data/q35/DSDT b/tests/acpi-test-data/q35/DSDT Binary files differindex d11567c3dc..0dccad439b 100644 --- a/tests/acpi-test-data/q35/DSDT +++ b/tests/acpi-test-data/q35/DSDT diff --git a/tests/acpi-test-data/q35/DSDT.bridge b/tests/acpi-test-data/q35/DSDT.bridge Binary files differindex 412a6e9104..8cd66c3b31 100644 --- a/tests/acpi-test-data/q35/DSDT.bridge +++ b/tests/acpi-test-data/q35/DSDT.bridge diff --git a/tests/acpi-test-data/q35/DSDT.cphp b/tests/acpi-test-data/q35/DSDT.cphp Binary files differindex 79902d0d30..3c28a17a69 100644 --- a/tests/acpi-test-data/q35/DSDT.cphp +++ b/tests/acpi-test-data/q35/DSDT.cphp diff --git a/tests/acpi-test-data/q35/DSDT.ipmibt b/tests/acpi-test-data/q35/DSDT.ipmibt Binary files differindex b658329c5b..3ceb876127 100644 --- a/tests/acpi-test-data/q35/DSDT.ipmibt +++ b/tests/acpi-test-data/q35/DSDT.ipmibt diff --git a/tests/acpi-test-data/q35/DSDT.memhp b/tests/acpi-test-data/q35/DSDT.memhp Binary files differindex e46c1fb5a2..bdbefd47a5 100644 --- a/tests/acpi-test-data/q35/DSDT.memhp +++ b/tests/acpi-test-data/q35/DSDT.memhp diff --git a/tests/acpi-utils.c b/tests/acpi-utils.c new file mode 100644 index 0000000000..41dc1ea9b4 --- /dev/null +++ b/tests/acpi-utils.c @@ -0,0 +1,65 @@ +/* + * ACPI Utility Functions + * + * Copyright (c) 2013 Red Hat Inc. + * Copyright (c) 2017 Skyport Systems + * + * Authors: + * Michael S. Tsirkin <mst@redhat.com>, + * Ben Warren <ben@skyportsystems.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include <glib/gstdio.h> +#include "qemu-common.h" +#include "hw/smbios/smbios.h" +#include "qemu/bitmap.h" +#include "acpi-utils.h" +#include "boot-sector.h" + +uint8_t acpi_calc_checksum(const uint8_t *data, int len) +{ + int i; + uint8_t sum = 0; + + for (i = 0; i < len; i++) { + sum += data[i]; + } + + return sum; +} + +uint32_t acpi_find_rsdp_address(void) +{ + uint32_t off; + + /* RSDP location can vary across a narrow range */ + for (off = 0xf0000; off < 0x100000; off += 0x10) { + uint8_t sig[] = "RSD PTR "; + int i; + + for (i = 0; i < sizeof sig - 1; ++i) { + sig[i] = readb(off + i); + } + + if (!memcmp(sig, "RSD PTR ", sizeof sig)) { + break; + } + } + return off; +} + +void acpi_parse_rsdp_table(uint32_t addr, AcpiRsdpDescriptor *rsdp_table) +{ + ACPI_READ_FIELD(rsdp_table->signature, addr); + ACPI_ASSERT_CMP64(rsdp_table->signature, "RSD PTR "); + + ACPI_READ_FIELD(rsdp_table->checksum, addr); + ACPI_READ_ARRAY(rsdp_table->oem_id, addr); + ACPI_READ_FIELD(rsdp_table->revision, addr); + ACPI_READ_FIELD(rsdp_table->rsdt_physical_address, addr); + ACPI_READ_FIELD(rsdp_table->length, addr); +} diff --git a/tests/acpi-utils.h b/tests/acpi-utils.h new file mode 100644 index 0000000000..9f9a2d532c --- /dev/null +++ b/tests/acpi-utils.h @@ -0,0 +1,94 @@ +/* + * Utilities for working with ACPI tables + * + * Copyright (c) 2013 Red Hat Inc. + * + * Authors: + * Michael S. Tsirkin <mst@redhat.com>, + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef TEST_ACPI_UTILS_H +#define TEST_ACPI_UTILS_H + +#include "hw/acpi/acpi-defs.h" +#include "libqtest.h" + +/* DSDT and SSDTs format */ +typedef struct { + AcpiTableHeader header; + gchar *aml; /* aml bytecode from guest */ + gsize aml_len; + gchar *aml_file; + gchar *asl; /* asl code generated from aml */ + gsize asl_len; + gchar *asl_file; + bool tmp_files_retain; /* do not delete the temp asl/aml */ +} QEMU_PACKED AcpiSdtTable; + +#define ACPI_READ_FIELD(field, addr) \ + do { \ + switch (sizeof(field)) { \ + case 1: \ + field = readb(addr); \ + break; \ + case 2: \ + field = readw(addr); \ + break; \ + case 4: \ + field = readl(addr); \ + break; \ + case 8: \ + field = readq(addr); \ + break; \ + default: \ + g_assert(false); \ + } \ + addr += sizeof(field); \ + } while (0); + +#define ACPI_READ_ARRAY_PTR(arr, length, addr) \ + do { \ + int idx; \ + for (idx = 0; idx < length; ++idx) { \ + ACPI_READ_FIELD(arr[idx], addr); \ + } \ + } while (0); + +#define ACPI_READ_ARRAY(arr, addr) \ + ACPI_READ_ARRAY_PTR(arr, sizeof(arr) / sizeof(arr[0]), addr) + +#define ACPI_READ_TABLE_HEADER(table, addr) \ + do { \ + ACPI_READ_FIELD((table)->signature, addr); \ + ACPI_READ_FIELD((table)->length, addr); \ + ACPI_READ_FIELD((table)->revision, addr); \ + ACPI_READ_FIELD((table)->checksum, addr); \ + ACPI_READ_ARRAY((table)->oem_id, addr); \ + ACPI_READ_ARRAY((table)->oem_table_id, addr); \ + ACPI_READ_FIELD((table)->oem_revision, addr); \ + ACPI_READ_ARRAY((table)->asl_compiler_id, addr); \ + ACPI_READ_FIELD((table)->asl_compiler_revision, addr); \ + } while (0); + +#define ACPI_ASSERT_CMP(actual, expected) do { \ + uint32_t ACPI_ASSERT_CMP_le = cpu_to_le32(actual); \ + char ACPI_ASSERT_CMP_str[5] = {}; \ + memcpy(ACPI_ASSERT_CMP_str, &ACPI_ASSERT_CMP_le, 4); \ + g_assert_cmpstr(ACPI_ASSERT_CMP_str, ==, expected); \ +} while (0) + +#define ACPI_ASSERT_CMP64(actual, expected) do { \ + uint64_t ACPI_ASSERT_CMP_le = cpu_to_le64(actual); \ + char ACPI_ASSERT_CMP_str[9] = {}; \ + memcpy(ACPI_ASSERT_CMP_str, &ACPI_ASSERT_CMP_le, 8); \ + g_assert_cmpstr(ACPI_ASSERT_CMP_str, ==, expected); \ +} while (0) + +uint8_t acpi_calc_checksum(const uint8_t *data, int len); +uint32_t acpi_find_rsdp_address(void); +void acpi_parse_rsdp_table(uint32_t addr, AcpiRsdpDescriptor *rsdp_table); + +#endif /* TEST_ACPI_UTILS_H */ diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c index d54018da73..88dbf97853 100644 --- a/tests/bios-tables-test.c +++ b/tests/bios-tables-test.c @@ -13,10 +13,9 @@ #include "qemu/osdep.h" #include <glib/gstdio.h> #include "qemu-common.h" -#include "libqtest.h" -#include "hw/acpi/acpi-defs.h" #include "hw/smbios/smbios.h" #include "qemu/bitmap.h" +#include "acpi-utils.h" #include "boot-sector.h" #define MACHINE_PC "pc" @@ -24,18 +23,6 @@ #define ACPI_REBUILD_EXPECTED_AML "TEST_ACPI_REBUILD_AML" -/* DSDT and SSDTs format */ -typedef struct { - AcpiTableHeader header; - gchar *aml; /* aml bytecode from guest */ - gsize aml_len; - gchar *aml_file; - gchar *asl; /* asl code generated from aml */ - gsize asl_len; - gchar *asl_file; - bool tmp_files_retain; /* do not delete the temp asl/aml */ -} QEMU_PACKED AcpiSdtTable; - typedef struct { const char *machine; const char *variant; @@ -53,65 +40,6 @@ typedef struct { int required_struct_types_len; } test_data; -#define ACPI_READ_FIELD(field, addr) \ - do { \ - switch (sizeof(field)) { \ - case 1: \ - field = readb(addr); \ - break; \ - case 2: \ - field = readw(addr); \ - break; \ - case 4: \ - field = readl(addr); \ - break; \ - case 8: \ - field = readq(addr); \ - break; \ - default: \ - g_assert(false); \ - } \ - addr += sizeof(field); \ - } while (0); - -#define ACPI_READ_ARRAY_PTR(arr, length, addr) \ - do { \ - int idx; \ - for (idx = 0; idx < length; ++idx) { \ - ACPI_READ_FIELD(arr[idx], addr); \ - } \ - } while (0); - -#define ACPI_READ_ARRAY(arr, addr) \ - ACPI_READ_ARRAY_PTR(arr, sizeof(arr)/sizeof(arr[0]), addr) - -#define ACPI_READ_TABLE_HEADER(table, addr) \ - do { \ - ACPI_READ_FIELD((table)->signature, addr); \ - ACPI_READ_FIELD((table)->length, addr); \ - ACPI_READ_FIELD((table)->revision, addr); \ - ACPI_READ_FIELD((table)->checksum, addr); \ - ACPI_READ_ARRAY((table)->oem_id, addr); \ - ACPI_READ_ARRAY((table)->oem_table_id, addr); \ - ACPI_READ_FIELD((table)->oem_revision, addr); \ - ACPI_READ_ARRAY((table)->asl_compiler_id, addr); \ - ACPI_READ_FIELD((table)->asl_compiler_revision, addr); \ - } while (0); - -#define ACPI_ASSERT_CMP(actual, expected) do { \ - uint32_t ACPI_ASSERT_CMP_le = cpu_to_le32(actual); \ - char ACPI_ASSERT_CMP_str[5] = {}; \ - memcpy(ACPI_ASSERT_CMP_str, &ACPI_ASSERT_CMP_le, 4); \ - g_assert_cmpstr(ACPI_ASSERT_CMP_str, ==, expected); \ -} while (0) - -#define ACPI_ASSERT_CMP64(actual, expected) do { \ - uint64_t ACPI_ASSERT_CMP_le = cpu_to_le64(actual); \ - char ACPI_ASSERT_CMP_str[9] = {}; \ - memcpy(ACPI_ASSERT_CMP_str, &ACPI_ASSERT_CMP_le, 8); \ - g_assert_cmpstr(ACPI_ASSERT_CMP_str, ==, expected); \ -} while (0) - static char disk[] = "tests/acpi-test-disk-XXXXXX"; static const char *data_dir = "tests/acpi-test-data"; #ifdef CONFIG_IASL @@ -147,36 +75,9 @@ static void free_test_data(test_data *data) g_array_free(data->tables, true); } -static uint8_t acpi_checksum(const uint8_t *data, int len) -{ - int i; - uint8_t sum = 0; - - for (i = 0; i < len; i++) { - sum += data[i]; - } - - return sum; -} - static void test_acpi_rsdp_address(test_data *data) { - uint32_t off; - - /* OK, now find RSDP */ - for (off = 0xf0000; off < 0x100000; off += 0x10) { - uint8_t sig[] = "RSD PTR "; - int i; - - for (i = 0; i < sizeof sig - 1; ++i) { - sig[i] = readb(off + i); - } - - if (!memcmp(sig, "RSD PTR ", sizeof sig)) { - break; - } - } - + uint32_t off = acpi_find_rsdp_address(); g_assert_cmphex(off, <, 0x100000); data->rsdp_addr = off; } @@ -186,17 +87,10 @@ static void test_acpi_rsdp_table(test_data *data) AcpiRsdpDescriptor *rsdp_table = &data->rsdp_table; uint32_t addr = data->rsdp_addr; - ACPI_READ_FIELD(rsdp_table->signature, addr); - ACPI_ASSERT_CMP64(rsdp_table->signature, "RSD PTR "); - - ACPI_READ_FIELD(rsdp_table->checksum, addr); - ACPI_READ_ARRAY(rsdp_table->oem_id, addr); - ACPI_READ_FIELD(rsdp_table->revision, addr); - ACPI_READ_FIELD(rsdp_table->rsdt_physical_address, addr); - ACPI_READ_FIELD(rsdp_table->length, addr); + acpi_parse_rsdp_table(addr, rsdp_table); /* rsdp checksum is not for the whole table, but for the first 20 bytes */ - g_assert(!acpi_checksum((uint8_t *)rsdp_table, 20)); + g_assert(!acpi_calc_checksum((uint8_t *)rsdp_table, 20)); } static void test_acpi_rsdt_table(test_data *data) @@ -220,8 +114,9 @@ static void test_acpi_rsdt_table(test_data *data) tables = g_new0(uint32_t, tables_nr); ACPI_READ_ARRAY_PTR(tables, tables_nr, addr); - checksum = acpi_checksum((uint8_t *)rsdt_table, rsdt_table->length) + - acpi_checksum((uint8_t *)tables, tables_nr * sizeof(uint32_t)); + checksum = acpi_calc_checksum((uint8_t *)rsdt_table, rsdt_table->length) + + acpi_calc_checksum((uint8_t *)tables, + tables_nr * sizeof(uint32_t)); g_assert(!checksum); /* SSDT tables after FADT */ @@ -279,7 +174,7 @@ static void test_acpi_fadt_table(test_data *data) ACPI_READ_FIELD(fadt_table->flags, addr); ACPI_ASSERT_CMP(fadt_table->signature, "FACP"); - g_assert(!acpi_checksum((uint8_t *)fadt_table, fadt_table->length)); + g_assert(!acpi_calc_checksum((uint8_t *)fadt_table, fadt_table->length)); } static void test_acpi_facs_table(test_data *data) @@ -308,8 +203,10 @@ static void test_dst_table(AcpiSdtTable *sdt_table, uint32_t addr) sdt_table->aml = g_malloc0(sdt_table->aml_len); ACPI_READ_ARRAY_PTR(sdt_table->aml, sdt_table->aml_len, addr); - checksum = acpi_checksum((uint8_t *)sdt_table, sizeof(AcpiTableHeader)) + - acpi_checksum((uint8_t *)sdt_table->aml, sdt_table->aml_len); + checksum = acpi_calc_checksum((uint8_t *)sdt_table, + sizeof(AcpiTableHeader)) + + acpi_calc_checksum((uint8_t *)sdt_table->aml, + sdt_table->aml_len); g_assert(!checksum); } @@ -608,8 +505,9 @@ static bool smbios_ep_table_ok(test_data *data) return false; } ACPI_READ_FIELD(ep_table->smbios_bcd_revision, addr); - if (acpi_checksum((uint8_t *)ep_table, sizeof *ep_table) || - acpi_checksum((uint8_t *)ep_table + 0x10, sizeof *ep_table - 0x10)) { + if (acpi_calc_checksum((uint8_t *)ep_table, sizeof *ep_table) || + acpi_calc_checksum((uint8_t *)ep_table + 0x10, + sizeof *ep_table - 0x10)) { return false; } return true; diff --git a/translate-all.c b/translate-all.c index 9bac061c9b..d42d003e67 100644 --- a/translate-all.c +++ b/translate-all.c @@ -1930,7 +1930,7 @@ void cpu_interrupt(CPUState *cpu, int mask) { g_assert(qemu_mutex_iothread_locked()); cpu->interrupt_request |= mask; - cpu->tcg_exit_req = 1; + cpu->icount_decr.u16.high = -1; } /* diff --git a/translate-common.c b/translate-common.c index d504dd0d33..40fe5a19bb 100644 --- a/translate-common.c +++ b/translate-common.c @@ -43,14 +43,11 @@ static void tcg_handle_interrupt(CPUState *cpu, int mask) if (!qemu_cpu_is_self(cpu)) { qemu_cpu_kick(cpu); } else { - if (use_icount) { - cpu->icount_decr.u16.high = 0xffff; - if (!cpu->can_do_io - && (mask & ~old_mask) != 0) { - cpu_abort(cpu, "Raised interrupt while not in I/O function"); - } - } else { - cpu->tcg_exit_req = 1; + cpu->icount_decr.u16.high = -1; + if (use_icount && + !cpu->can_do_io + && (mask & ~old_mask) != 0) { + cpu_abort(cpu, "Raised interrupt while not in I/O function"); } } } diff --git a/util/compatfd.c b/util/compatfd.c index 9a43042ae6..980bd33e52 100644 --- a/util/compatfd.c +++ b/util/compatfd.c @@ -15,7 +15,6 @@ #include "qemu/osdep.h" #include "qemu-common.h" -#include "qemu/compatfd.h" #include "qemu/thread.h" #include <sys/syscall.h> diff --git a/util/main-loop.c b/util/main-loop.c index ad10bca211..ca7bb072f9 100644 --- a/util/main-loop.c +++ b/util/main-loop.c @@ -34,8 +34,6 @@ #ifndef _WIN32 -#include "qemu/compatfd.h" - /* If we have signalfd, we mask out the signals we want to handle and then * use signalfd to listen for them. We rely on whatever the current signal * handler is to dispatch the signals when we receive them. @@ -63,8 +61,7 @@ static void sigfd_handler(void *opaque) sigaction(info.ssi_signo, NULL, &action); if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction) { - action.sa_sigaction(info.ssi_signo, - (siginfo_t *)&info, NULL); + sigaction_invoke(&action, &info); } else if (action.sa_handler) { action.sa_handler(info.ssi_signo); } diff --git a/util/oslib-posix.c b/util/oslib-posix.c index f63146407f..cd686aae3d 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -603,3 +603,36 @@ void qemu_free_stack(void *stack, size_t sz) munmap(stack, sz); } + +void sigaction_invoke(struct sigaction *action, + struct qemu_signalfd_siginfo *info) +{ + siginfo_t si = { 0 }; + si.si_signo = info->ssi_signo; + si.si_errno = info->ssi_errno; + si.si_code = info->ssi_code; + + /* Convert the minimal set of fields defined by POSIX. + * Positive si_code values are reserved for kernel-generated + * signals, where the valid siginfo fields are determined by + * the signal number. But according to POSIX, it is unspecified + * whether SI_USER and SI_QUEUE have values less than or equal to + * zero. + */ + if (info->ssi_code == SI_USER || info->ssi_code == SI_QUEUE || + info->ssi_code <= 0) { + /* SIGTERM, etc. */ + si.si_pid = info->ssi_pid; + si.si_uid = info->ssi_uid; + } else if (info->ssi_signo == SIGILL || info->ssi_signo == SIGFPE || + info->ssi_signo == SIGSEGV || info->ssi_signo == SIGBUS) { + si.si_addr = (void *)(uintptr_t)info->ssi_addr; + } else if (info->ssi_signo == SIGCHLD) { + si.si_pid = info->ssi_pid; + si.si_status = info->ssi_status; + si.si_uid = info->ssi_uid; + } else if (info->ssi_signo == SIGIO) { + si.si_band = info->ssi_band; + } + action->sa_sigaction(info->ssi_signo, &si, NULL); +} @@ -227,6 +227,7 @@ static struct { { .driver = "ide-hd", .flag = &default_cdrom }, { .driver = "ide-drive", .flag = &default_cdrom }, { .driver = "scsi-cd", .flag = &default_cdrom }, + { .driver = "scsi-hd", .flag = &default_cdrom }, { .driver = "virtio-serial-pci", .flag = &default_virtcon }, { .driver = "virtio-serial", .flag = &default_virtcon }, { .driver = "VGA", .flag = &default_vga }, @@ -1717,14 +1718,14 @@ void qemu_system_guest_panicked(GuestPanicInformation *info) } if (info) { - if (info->type == GUEST_PANIC_INFORMATION_KIND_HYPER_V) { + if (info->type == GUEST_PANIC_INFORMATION_TYPE_HYPER_V) { qemu_log_mask(LOG_GUEST_ERROR, "HV crash parameters: (%#"PRIx64 " %#"PRIx64" %#"PRIx64" %#"PRIx64" %#"PRIx64")\n", - info->u.hyper_v.data->arg1, - info->u.hyper_v.data->arg2, - info->u.hyper_v.data->arg3, - info->u.hyper_v.data->arg4, - info->u.hyper_v.data->arg5); + info->u.hyper_v.arg1, + info->u.hyper_v.arg2, + info->u.hyper_v.arg3, + info->u.hyper_v.arg4, + info->u.hyper_v.arg5); } qapi_free_GuestPanicInformation(info); } |