diff options
326 files changed, 9446 insertions, 4125 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 420505e995..355982b623 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1232,6 +1232,15 @@ M: Samuel Thibault <samuel.thibault@ens-lyon.org> S: Maintained F: backends/baum.c +Command line option argument parsing +M: Markus Armbruster <armbru@redhat.com> +S: Supported +F: include/qemu/option.h +F: tests/test-keyval.c +F: tests/test-qemu-opts.c +F: util/keyval.c +F: util/qemu-option.c + Coverity model M: Markus Armbruster <armbru@redhat.com> S: Supported @@ -1366,7 +1375,9 @@ X: include/qapi/qmp/ F: include/qapi/qmp/dispatch.h F: tests/qapi-schema/ F: tests/test-*-visitor.c +F: tests/test-qapi-*.c F: tests/test-qmp-*.c +F: tests/test-visitor-serialization.c F: scripts/qapi* F: docs/qapi* T: git git://repo.or.cz/qemu/armbru.git qapi-next @@ -1807,8 +1818,8 @@ S: Supported F: tests/image-fuzzer/ Replication -M: Wen Congyang <wency@cn.fujitsu.com> -M: Changlong Xie <xiecl.fnst@cn.fujitsu.com> +M: Wen Congyang <wencongyang2@huawei.com> +M: Xie Changlong <xiechanglong.d@gmail.com> S: Supported F: replication* F: block/replication.c @@ -346,7 +346,7 @@ dtc/%: mkdir -p $@ $(SUBDIR_RULES): libqemuutil.a libqemustub.a $(common-obj-y) $(chardev-obj-y) \ - $(qom-obj-y) $(crypto-aes-obj-$(CONFIG_USER_ONLY)) $(trace-obj-y) + $(qom-obj-y) $(crypto-aes-obj-$(CONFIG_USER_ONLY)) ROMSUBDIR_RULES=$(patsubst %,romsubdir-%, $(ROMS)) # Only keep -O and -g cflags @@ -366,11 +366,11 @@ Makefile: $(version-obj-y) # Build libraries libqemustub.a: $(stub-obj-y) -libqemuutil.a: $(util-obj-y) +libqemuutil.a: $(util-obj-y) $(trace-obj-y) ###################################################################### -COMMON_LDADDS = $(trace-obj-y) libqemuutil.a libqemustub.a +COMMON_LDADDS = libqemuutil.a libqemustub.a qemu-img.o: qemu-img-cmds.h @@ -392,7 +392,6 @@ qemu-ga$(EXESUF): QEMU_CFLAGS += -I qga/qapi-generated gen-out-type = $(subst .,-,$(suffix $@)) qapi-py = $(SRC_PATH)/scripts/qapi.py $(SRC_PATH)/scripts/ordereddict.py -qapi-py += $(SRC_PATH)/scripts/qapi2texi.py qga/qapi-generated/qga-qapi-types.c qga/qapi-generated/qga-qapi-types.h :\ $(SRC_PATH)/qga/qapi-schema.json $(SRC_PATH)/scripts/qapi-types.py $(qapi-py) @@ -701,10 +700,12 @@ qemu-monitor-info.texi: $(SRC_PATH)/hmp-commands-info.hx $(SRC_PATH)/scripts/hxt qemu-img-cmds.texi: $(SRC_PATH)/qemu-img-cmds.hx $(SRC_PATH)/scripts/hxtool $(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@") -docs/qemu-qmp-qapi.texi: $(qapi-modules) $(qapi-py) +docs/qemu-qmp-qapi.texi docs/qemu-ga-qapi.texi: $(SRC_PATH)/scripts/qapi2texi.py $(qapi-py) + +docs/qemu-qmp-qapi.texi: $(qapi-modules) $(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi2texi.py $< > $@,"GEN","$@") -docs/qemu-ga-qapi.texi: $(SRC_PATH)/qga/qapi-schema.json $(qapi-py) +docs/qemu-ga-qapi.texi: $(SRC_PATH)/qga/qapi-schema.json $(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi2texi.py $< > $@,"GEN","$@") qemu.1: qemu-doc.texi qemu-options.texi qemu-monitor.texi qemu-monitor-info.texi diff --git a/Makefile.target b/Makefile.target index a535980110..465a633367 100644 --- a/Makefile.target +++ b/Makefile.target @@ -182,8 +182,7 @@ dummy := $(call unnest-vars,.., \ qom-obj-y \ io-obj-y \ common-obj-y \ - common-obj-m \ - trace-obj-y) + common-obj-m) target-obj-y := $(target-obj-y-save) all-obj-y += $(common-obj-y) all-obj-y += $(target-obj-y) @@ -195,7 +194,7 @@ all-obj-$(CONFIG_SOFTMMU) += $(io-obj-y) $(QEMU_PROG_BUILD): config-devices.mak -COMMON_LDADDS = $(trace-obj-y) ../libqemuutil.a ../libqemustub.a +COMMON_LDADDS = ../libqemuutil.a ../libqemustub.a # build either PROG or PROGW $(QEMU_PROG_BUILD): $(all-obj-y) $(COMMON_LDADDS) @@ -1 +1 @@ -2.8.91 +2.9.50 diff --git a/backends/cryptodev-builtin.c b/backends/cryptodev-builtin.c index 82a068e792..657c0ba2f3 100644 --- a/backends/cryptodev-builtin.c +++ b/backends/cryptodev-builtin.c @@ -320,10 +320,12 @@ static int cryptodev_builtin_sym_operation( sess = builtin->sessions[op_info->session_id]; - ret = qcrypto_cipher_setiv(sess->cipher, op_info->iv, - op_info->iv_len, errp); - if (ret < 0) { - return -VIRTIO_CRYPTO_ERR; + if (op_info->iv_len > 0) { + ret = qcrypto_cipher_setiv(sess->cipher, op_info->iv, + op_info->iv_len, errp); + if (ret < 0) { + return -VIRTIO_CRYPTO_ERR; + } } if (sess->direction == VIRTIO_CRYPTO_OP_ENCRYPT) { @@ -359,8 +361,6 @@ static void cryptodev_builtin_cleanup( } } - assert(queues == 1); - for (i = 0; i < queues; i++) { cc = backend->conf.peers.ccs[i]; if (cc) { diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c index 42efb2f28a..fc4ef46d11 100644 --- a/backends/hostmem-file.c +++ b/backends/hostmem-file.c @@ -51,7 +51,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) #ifndef CONFIG_LINUX error_setg(errp, "-mem-path not supported on this host"); #else - if (!memory_region_size(&backend->mr)) { + if (!host_memory_backend_mr_inited(backend)) { gchar *path; backend->force_prealloc = mem_prealloc; path = object_get_canonical_path(OBJECT(backend)); @@ -76,7 +76,7 @@ static void set_mem_path(Object *o, const char *str, Error **errp) HostMemoryBackend *backend = MEMORY_BACKEND(o); HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o); - if (memory_region_size(&backend->mr)) { + if (host_memory_backend_mr_inited(backend)) { error_setg(errp, "cannot change property value"); return; } @@ -96,7 +96,7 @@ static void file_memory_backend_set_share(Object *o, bool value, Error **errp) HostMemoryBackend *backend = MEMORY_BACKEND(o); HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o); - if (memory_region_size(&backend->mr)) { + if (host_memory_backend_mr_inited(backend)) { error_setg(errp, "cannot change property value"); return; } diff --git a/backends/hostmem.c b/backends/hostmem.c index 162c2187d8..4606b73849 100644 --- a/backends/hostmem.c +++ b/backends/hostmem.c @@ -45,7 +45,7 @@ host_memory_backend_set_size(Object *obj, Visitor *v, const char *name, Error *local_err = NULL; uint64_t value; - if (memory_region_size(&backend->mr)) { + if (host_memory_backend_mr_inited(backend)) { error_setg(&local_err, "cannot change property value"); goto out; } @@ -64,14 +64,6 @@ out: error_propagate(errp, local_err); } -static uint16List **host_memory_append_node(uint16List **node, - unsigned long value) -{ - *node = g_malloc0(sizeof(**node)); - (*node)->value = value; - return &(*node)->next; -} - static void host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) @@ -82,23 +74,25 @@ host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name, unsigned long value; value = find_first_bit(backend->host_nodes, MAX_NODES); - - node = host_memory_append_node(node, value); - if (value == MAX_NODES) { - goto out; + return; } + *node = g_malloc0(sizeof(**node)); + (*node)->value = value; + node = &(*node)->next; + do { value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1); if (value == MAX_NODES) { break; } - node = host_memory_append_node(node, value); + *node = g_malloc0(sizeof(**node)); + (*node)->value = value; + node = &(*node)->next; } while (true); -out: visit_type_uint16List(v, name, &host_nodes, errp); } @@ -152,7 +146,7 @@ static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp) { HostMemoryBackend *backend = MEMORY_BACKEND(obj); - if (!memory_region_size(&backend->mr)) { + if (!host_memory_backend_mr_inited(backend)) { backend->merge = value; return; } @@ -178,7 +172,7 @@ static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp) { HostMemoryBackend *backend = MEMORY_BACKEND(obj); - if (!memory_region_size(&backend->mr)) { + if (!host_memory_backend_mr_inited(backend)) { backend->dump = value; return; } @@ -214,7 +208,7 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value, } } - if (!memory_region_size(&backend->mr)) { + if (!host_memory_backend_mr_inited(backend)) { backend->prealloc = value; return; } @@ -243,10 +237,19 @@ static void host_memory_backend_init(Object *obj) backend->prealloc = mem_prealloc; } +bool host_memory_backend_mr_inited(HostMemoryBackend *backend) +{ + /* + * NOTE: We forbid zero-length memory backend, so here zero means + * "we haven't inited the backend memory region yet". + */ + return memory_region_size(&backend->mr) != 0; +} + MemoryRegion * host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp) { - return memory_region_size(&backend->mr) ? &backend->mr : NULL; + return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL; } void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped) @@ -192,6 +192,43 @@ void path_combine(char *dest, int dest_size, } } +bool bdrv_is_read_only(BlockDriverState *bs) +{ + return bs->read_only; +} + +int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, Error **errp) +{ + /* Do not set read_only if copy_on_read is enabled */ + if (bs->copy_on_read && read_only) { + error_setg(errp, "Can't set node '%s' to r/o with copy-on-read enabled", + bdrv_get_device_or_node_name(bs)); + return -EINVAL; + } + + /* Do not clear read_only if it is prohibited */ + if (!read_only && !(bs->open_flags & BDRV_O_ALLOW_RDWR)) { + error_setg(errp, "Node '%s' is read only", + bdrv_get_device_or_node_name(bs)); + return -EPERM; + } + + return 0; +} + +int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp) +{ + int ret = 0; + + ret = bdrv_can_set_read_only(bs, read_only, errp); + if (ret < 0) { + return ret; + } + + bs->read_only = read_only; + return 0; +} + void bdrv_get_full_backing_filename_from_filename(const char *backed, const char *backing, char *dest, size_t sz, @@ -1157,6 +1194,13 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file, if (file != NULL) { filename = blk_bs(file)->filename; } else { + /* + * Caution: while qdict_get_try_str() is fine, getting + * non-string types would require more care. When @options + * come from -blockdev or blockdev_add, its members are typed + * according to the QAPI schema, but when they come from + * -drive, they're all QString. + */ filename = qdict_get_try_str(options, "filename"); } @@ -1324,6 +1368,13 @@ static int bdrv_fill_options(QDict **options, const char *filename, BlockDriver *drv = NULL; Error *local_err = NULL; + /* + * Caution: while qdict_get_try_str() is fine, getting non-string + * types would require more care. When @options come from + * -blockdev or blockdev_add, its members are typed according to + * the QAPI schema, but when they come from -drive, they're all + * QString. + */ drvname = qdict_get_try_str(*options, "driver"); if (drvname) { drv = bdrv_find_format(drvname); @@ -1358,6 +1409,7 @@ static int bdrv_fill_options(QDict **options, const char *filename, } /* Find the right block driver */ + /* See cautionary note on accessing @options above */ filename = qdict_get_try_str(*options, "filename"); if (!drvname && protocol) { @@ -1737,6 +1789,9 @@ static void bdrv_replace_child_noperm(BdrvChild *child, { BlockDriverState *old_bs = child->bs; + if (old_bs && new_bs) { + assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); + } if (old_bs) { if (old_bs->quiesce_counter && child->role->drained_end) { child->role->drained_end(child); @@ -1837,6 +1892,7 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm); assert(parent_bs->drv); + assert(bdrv_get_aio_context(parent_bs) == bdrv_get_aio_context(child_bs)); parent_bs->drv->bdrv_child_perm(parent_bs, NULL, child_role, perm, shared_perm, &perm, &shared_perm); @@ -1987,6 +2043,13 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, qdict_extract_subqdict(parent_options, &options, bdref_key_dot); g_free(bdref_key_dot); + /* + * Caution: while qdict_get_try_str() is fine, getting non-string + * types would require more care. When @parent_options come from + * -blockdev or blockdev_add, its members are typed according to + * the QAPI schema, but when they come from -drive, they're all + * QString. + */ reference = qdict_get_try_str(parent_options, bdref_key); if (reference || qdict_haskey(options, "file.filename")) { backing_filename[0] = '\0'; @@ -2059,6 +2122,13 @@ bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key, qdict_extract_subqdict(options, &image_options, bdref_key_dot); g_free(bdref_key_dot); + /* + * Caution: while qdict_get_try_str() is fine, getting non-string + * types would require more care. When @options come from + * -blockdev or blockdev_add, its members are typed according to + * the QAPI schema, but when they come from -drive, they're all + * QString. + */ reference = qdict_get_try_str(options, bdref_key); if (!filename && !reference && !qdict_size(image_options)) { if (!allow_none) { @@ -2274,9 +2344,13 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, goto fail; } - /* Set the BDRV_O_RDWR and BDRV_O_ALLOW_RDWR flags. - * FIXME: we're parsing the QDict to avoid having to create a - * QemuOpts just for this, but neither option is optimal. */ + /* + * Set the BDRV_O_RDWR and BDRV_O_ALLOW_RDWR flags. + * Caution: getting a boolean member of @options requires care. + * When @options come from -blockdev or blockdev_add, members are + * typed according to the QAPI schema, but when they come from + * -drive, they're all QString. + */ if (g_strcmp0(qdict_get_try_str(options, BDRV_OPT_READ_ONLY), "on") && !qdict_get_try_bool(options, BDRV_OPT_READ_ONLY, false)) { flags |= (BDRV_O_RDWR | BDRV_O_ALLOW_RDWR); @@ -2298,6 +2372,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, options = qdict_clone_shallow(options); /* Find the right image format driver */ + /* See cautionary note on accessing @options above */ drvname = qdict_get_try_str(options, "driver"); if (drvname) { drv = bdrv_find_format(drvname); @@ -2309,6 +2384,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, assert(drvname || !(flags & BDRV_O_PROTOCOL)); + /* See cautionary note on accessing @options above */ backing = qdict_get_try_str(options, "backing"); if (backing && *backing == '\0') { flags |= BDRV_O_NO_BACKING; @@ -2713,6 +2789,7 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, BlockDriver *drv; QemuOpts *opts; const char *value; + bool read_only; assert(reopen_state != NULL); assert(reopen_state->bs->drv != NULL); @@ -2741,12 +2818,13 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, qdict_put(reopen_state->options, "driver", qstring_from_str(value)); } - /* if we are to stay read-only, do not allow permission change - * to r/w */ - if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) && - reopen_state->flags & BDRV_O_RDWR) { - error_setg(errp, "Node '%s' is read only", - bdrv_get_device_or_node_name(reopen_state->bs)); + /* If we are to stay read-only, do not allow permission change + * to r/w. Attempting to set to r/w may fail if either BDRV_O_ALLOW_RDWR is + * not set, or if the BDS still has copy_on_read enabled */ + read_only = !(reopen_state->flags & BDRV_O_RDWR); + ret = bdrv_can_set_read_only(reopen_state->bs, read_only, &local_err); + if (local_err) { + error_propagate(errp, local_err); goto error; } @@ -2787,6 +2865,13 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, do { QString *new_obj = qobject_to_qstring(entry->value); const char *new = qstring_get_str(new_obj); + /* + * Caution: while qdict_get_try_str() is fine, getting + * non-string types would require more care. When + * bs->options come from -blockdev or blockdev_add, its + * members are typed according to the QAPI schema, but + * when they come from -drive, they're all QString. + */ const char *old = qdict_get_try_str(reopen_state->bs->options, entry->key); @@ -3228,7 +3313,11 @@ int bdrv_truncate(BdrvChild *child, int64_t offset) BlockDriver *drv = bs->drv; int ret; - assert(child->perm & BLK_PERM_RESIZE); + /* FIXME: Some format block drivers use this function instead of implicitly + * growing their file by writing beyond its end. + * See bdrv_aligned_pwritev() for an explanation why we currently + * cannot assert this permission in that case. */ + // assert(child->perm & BLK_PERM_RESIZE); if (!drv) return -ENOMEDIUM; @@ -3305,11 +3394,6 @@ void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors; } -bool bdrv_is_read_only(BlockDriverState *bs) -{ - return bs->read_only; -} - bool bdrv_is_sg(BlockDriverState *bs) { return bs->sg; @@ -4111,8 +4195,8 @@ bool bdrv_op_blocker_is_empty(BlockDriverState *bs) void bdrv_img_create(const char *filename, const char *fmt, const char *base_filename, const char *base_fmt, - char *options, uint64_t img_size, int flags, - Error **errp, bool quiet) + char *options, uint64_t img_size, int flags, bool quiet, + Error **errp) { QemuOptsList *create_opts = NULL; QemuOpts *opts = NULL; @@ -4278,6 +4362,11 @@ AioContext *bdrv_get_aio_context(BlockDriverState *bs) return bs->aio_context; } +void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co) +{ + aio_co_enter(bdrv_get_aio_context(bs), co); +} + static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban) { QLIST_REMOVE(ban, list); @@ -4350,11 +4439,12 @@ void bdrv_attach_aio_context(BlockDriverState *bs, void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) { - AioContext *ctx; + AioContext *ctx = bdrv_get_aio_context(bs); + aio_disable_external(ctx); + bdrv_parent_drained_begin(bs); bdrv_drain(bs); /* ensure there are no in-flight requests */ - ctx = bdrv_get_aio_context(bs); while (aio_poll(ctx, false)) { /* wait for all bottom halves to execute */ } @@ -4366,6 +4456,8 @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) */ aio_context_acquire(new_context); bdrv_attach_aio_context(bs, new_context); + bdrv_parent_drained_end(bs); + aio_enable_external(ctx); aio_context_release(new_context); } diff --git a/block/Makefile.objs b/block/Makefile.objs index de96f8ee80..ea955302c8 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -19,6 +19,7 @@ block-obj-$(CONFIG_LIBNFS) += nfs.o block-obj-$(CONFIG_CURL) += curl.o block-obj-$(CONFIG_RBD) += rbd.o block-obj-$(CONFIG_GLUSTERFS) += gluster.o +block-obj-$(CONFIG_VXHS) += vxhs.o block-obj-$(CONFIG_LIBSSH2) += ssh.o block-obj-y += accounting.o dirty-bitmap.o block-obj-y += write-threshold.o @@ -38,6 +39,7 @@ rbd.o-cflags := $(RBD_CFLAGS) rbd.o-libs := $(RBD_LIBS) gluster.o-cflags := $(GLUSTERFS_CFLAGS) gluster.o-libs := $(GLUSTERFS_LIBS) +vxhs.o-libs := $(VXHS_LIBS) ssh.o-cflags := $(LIBSSH2_CFLAGS) ssh.o-libs := $(LIBSSH2_LIBS) block-obj-$(if $(CONFIG_BZIP2),m,n) += dmg-bz2.o diff --git a/block/block-backend.c b/block/block-backend.c index 5742c09c2c..7405024e08 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -61,10 +61,13 @@ struct BlockBackend { uint64_t perm; uint64_t shared_perm; + bool disable_perm; bool allow_write_beyond_eof; NotifierList remove_bs_notifiers, insert_bs_notifiers; + + int quiesce_counter; }; typedef struct BlockBackendAIOCB { @@ -228,6 +231,9 @@ static void blk_delete(BlockBackend *blk) assert(!blk->refcnt); assert(!blk->name); assert(!blk->dev); + if (blk->public.throttle_state) { + blk_io_limits_disable(blk); + } if (blk->root) { blk_remove_bs(blk); } @@ -576,7 +582,7 @@ int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, { int ret; - if (blk->root) { + if (blk->root && !blk->disable_perm) { ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp); if (ret < 0) { return ret; @@ -595,15 +601,52 @@ void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm) *shared_perm = blk->shared_perm; } +/* + * Notifies the user of all BlockBackends that migration has completed. qdev + * devices can tighten their permissions in response (specifically revoke + * shared write permissions that we needed for storage migration). + * + * If an error is returned, the VM cannot be allowed to be resumed. + */ +void blk_resume_after_migration(Error **errp) +{ + BlockBackend *blk; + Error *local_err = NULL; + + for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) { + if (!blk->disable_perm) { + continue; + } + + blk->disable_perm = false; + + blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err); + if (local_err) { + error_propagate(errp, local_err); + blk->disable_perm = true; + return; + } + } +} + static int blk_do_attach_dev(BlockBackend *blk, void *dev) { if (blk->dev) { return -EBUSY; } + + /* While migration is still incoming, we don't need to apply the + * permissions of guest device BlockBackends. We might still have a block + * job or NBD server writing to the image for storage migration. */ + if (runstate_check(RUN_STATE_INMIGRATE)) { + blk->disable_perm = true; + } + blk_ref(blk); blk->dev = dev; blk->legacy_dev = false; blk_iostatus_reset(blk); + return 0; } @@ -699,12 +742,17 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque) { /* All drivers that use blk_set_dev_ops() are qdevified and we want to keep - * it that way, so we can assume blk->dev is a DeviceState if blk->dev_ops - * is set. */ + * it that way, so we can assume blk->dev, if present, is a DeviceState if + * blk->dev_ops is set. Non-device users may use dev_ops without device. */ assert(!blk->legacy_dev); blk->dev_ops = ops; blk->dev_opaque = opaque; + + /* Are we currently quiesced? Should we enforce this right now? */ + if (blk->quiesce_counter && ops->drained_begin) { + ops->drained_begin(opaque); + } } /* @@ -1000,7 +1048,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, co_entry(&rwco); } else { Coroutine *co = qemu_coroutine_create(co_entry, &rwco); - qemu_coroutine_enter(co); + bdrv_coroutine_enter(blk_bs(blk), co); BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE); } @@ -1107,7 +1155,7 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, acb->has_returned = false; co = qemu_coroutine_create(co_entry, acb); - qemu_coroutine_enter(co); + bdrv_coroutine_enter(blk_bs(blk), co); acb->has_returned = true; if (acb->rwco.ret != NOT_DONE) { @@ -1870,6 +1918,12 @@ static void blk_root_drained_begin(BdrvChild *child) { BlockBackend *blk = child->opaque; + if (++blk->quiesce_counter == 1) { + if (blk->dev_ops && blk->dev_ops->drained_begin) { + blk->dev_ops->drained_begin(blk->dev_opaque); + } + } + /* Note that blk->root may not be accessible here yet if we are just * attaching to a BlockDriverState that is drained. Use child instead. */ @@ -1881,7 +1935,14 @@ static void blk_root_drained_begin(BdrvChild *child) static void blk_root_drained_end(BdrvChild *child) { BlockBackend *blk = child->opaque; + assert(blk->quiesce_counter); assert(blk->public.io_limits_disabled); --blk->public.io_limits_disabled; + + if (--blk->quiesce_counter == 0) { + if (blk->dev_ops && blk->dev_ops->drained_end) { + blk->dev_ops->drained_end(blk->dev_opaque); + } + } } diff --git a/block/bochs.c b/block/bochs.c index 516da56c3b..a759b6eff0 100644 --- a/block/bochs.c +++ b/block/bochs.c @@ -110,7 +110,10 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags, return -EINVAL; } - bs->read_only = true; /* no write support yet */ + ret = bdrv_set_read_only(bs, true, errp); /* no write support yet */ + if (ret < 0) { + return ret; + } ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs)); if (ret < 0) { diff --git a/block/cloop.c b/block/cloop.c index a6c7b9dbe6..d6597fcf78 100644 --- a/block/cloop.c +++ b/block/cloop.c @@ -72,7 +72,10 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags, return -EINVAL; } - bs->read_only = true; + ret = bdrv_set_read_only(bs, true, errp); + if (ret < 0) { + return ret; + } /* read header */ ret = bdrv_pread(bs->file, 128, &s->block_size, 4); diff --git a/block/commit.c b/block/commit.c index 28324820a4..91d2c344f6 100644 --- a/block/commit.c +++ b/block/commit.c @@ -335,6 +335,8 @@ void commit_start(const char *job_id, BlockDriverState *bs, if (commit_top_bs == NULL) { goto fail; } + commit_top_bs->total_sectors = top->total_sectors; + bdrv_set_aio_context(commit_top_bs, bdrv_get_aio_context(top)); bdrv_set_backing_hd(commit_top_bs, top, &local_err); if (local_err) { @@ -482,6 +484,7 @@ int bdrv_commit(BlockDriverState *bs) error_report_err(local_err); goto ro_cleanup; } + bdrv_set_aio_context(commit_top_bs, bdrv_get_aio_context(backing_file_bs)); bdrv_set_backing_hd(commit_top_bs, backing_file_bs, &error_abort); bdrv_set_backing_hd(bs, commit_top_bs, &error_abort); diff --git a/block/crypto.c b/block/crypto.c index 4a2038888d..34549b28a5 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -56,11 +56,11 @@ static int block_crypto_probe_generic(QCryptoBlockFormat format, static ssize_t block_crypto_read_func(QCryptoBlock *block, + void *opaque, size_t offset, uint8_t *buf, size_t buflen, - Error **errp, - void *opaque) + Error **errp) { BlockDriverState *bs = opaque; ssize_t ret; @@ -83,11 +83,11 @@ struct BlockCryptoCreateData { static ssize_t block_crypto_write_func(QCryptoBlock *block, + void *opaque, size_t offset, const uint8_t *buf, size_t buflen, - Error **errp, - void *opaque) + Error **errp) { struct BlockCryptoCreateData *data = opaque; ssize_t ret; @@ -102,9 +102,9 @@ static ssize_t block_crypto_write_func(QCryptoBlock *block, static ssize_t block_crypto_init_func(QCryptoBlock *block, + void *opaque, size_t headerlen, - Error **errp, - void *opaque) + Error **errp) { struct BlockCryptoCreateData *data = opaque; int ret; diff --git a/block/curl.c b/block/curl.c index 34dbd335f4..2708d57c2f 100644 --- a/block/curl.c +++ b/block/curl.c @@ -659,6 +659,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags, const char *cookie; double d; const char *secretid; + const char *protocol_delimiter; static int inited = 0; @@ -700,6 +701,15 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags, goto out_noclean; } + if (!strstart(file, bs->drv->protocol_name, &protocol_delimiter) || + !strstart(protocol_delimiter, "://", NULL)) + { + error_setg(errp, "%s curl driver cannot handle the URL '%s' (does not " + "start with '%s://')", bs->drv->protocol_name, file, + bs->drv->protocol_name); + goto out_noclean; + } + s->username = g_strdup(qemu_opt_get(opts, CURL_BLOCK_OPT_USERNAME)); secretid = qemu_opt_get(opts, CURL_BLOCK_OPT_PASSWORD_SECRET); diff --git a/block/dmg.c b/block/dmg.c index a7d25fc47b..900ae5a678 100644 --- a/block/dmg.c +++ b/block/dmg.c @@ -419,8 +419,12 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags, return -EINVAL; } + ret = bdrv_set_read_only(bs, true, errp); + if (ret < 0) { + return ret; + } + block_module_load_one("dmg-bz2"); - bs->read_only = true; s->n_chunks = 0; s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL; diff --git a/block/file-posix.c b/block/file-posix.c index 53febd3767..0c4896876e 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -144,6 +144,7 @@ typedef struct BDRVRawState { bool has_write_zeroes:1; bool discard_zeroes:1; bool use_linux_aio:1; + bool page_cache_inconsistent:1; bool has_fallocate; bool needs_alignment; } BDRVRawState; @@ -219,28 +220,28 @@ static int probe_logical_blocksize(int fd, unsigned int *sector_size_p) { unsigned int sector_size; bool success = false; + int i; errno = ENOTSUP; - - /* Try a few ioctls to get the right size */ + static const unsigned long ioctl_list[] = { #ifdef BLKSSZGET - if (ioctl(fd, BLKSSZGET, §or_size) >= 0) { - *sector_size_p = sector_size; - success = true; - } + BLKSSZGET, #endif #ifdef DKIOCGETBLOCKSIZE - if (ioctl(fd, DKIOCGETBLOCKSIZE, §or_size) >= 0) { - *sector_size_p = sector_size; - success = true; - } + DKIOCGETBLOCKSIZE, #endif #ifdef DIOCGSECTORSIZE - if (ioctl(fd, DIOCGSECTORSIZE, §or_size) >= 0) { - *sector_size_p = sector_size; - success = true; - } + DIOCGSECTORSIZE, #endif + }; + + /* Try a few ioctls to get the right size */ + for (i = 0; i < (int)ARRAY_SIZE(ioctl_list); i++) { + if (ioctl(fd, ioctl_list[i], §or_size) >= 0) { + *sector_size_p = sector_size; + success = true; + } + } return success ? 0 : -errno; } @@ -824,10 +825,31 @@ static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb) static ssize_t handle_aiocb_flush(RawPosixAIOData *aiocb) { + BDRVRawState *s = aiocb->bs->opaque; int ret; + if (s->page_cache_inconsistent) { + return -EIO; + } + ret = qemu_fdatasync(aiocb->aio_fildes); if (ret == -1) { + /* There is no clear definition of the semantics of a failing fsync(), + * so we may have to assume the worst. The sad truth is that this + * assumption is correct for Linux. Some pages are now probably marked + * clean in the page cache even though they are inconsistent with the + * on-disk contents. The next fdatasync() call would succeed, but no + * further writeback attempt will be made. We can't get back to a state + * in which we know what is on disk (we would have to rewrite + * everything that was touched since the last fdatasync() at least), so + * make bdrv_flush() fail permanently. Given that the behaviour isn't + * really defined, I have little hope that other OSes are doing better. + * + * Obviously, this doesn't affect O_DIRECT, which bypasses the page + * cache. */ + if ((s->open_flags & O_DIRECT) == 0) { + s->page_cache_inconsistent = true; + } return -errno; } return 0; @@ -2171,6 +2193,12 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags, int ret; #if defined(__APPLE__) && defined(__MACH__) + /* + * Caution: while qdict_get_str() is fine, getting non-string types + * would require more care. When @options come from -blockdev or + * blockdev_add, its members are typed according to the QAPI + * schema, but when they come from -drive, they're all QString. + */ const char *filename = qdict_get_str(options, "filename"); char bsd_path[MAXPATHLEN] = ""; bool error_occurred = false; diff --git a/block/gluster.c b/block/gluster.c index a577daef10..cf29b5f9a4 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -412,10 +412,12 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf, glfs_set_preopened(gconf->volume, glfs); for (server = gconf->server; server; server = server->next) { - if (server->value->type == SOCKET_ADDRESS_FLAT_TYPE_UNIX) { + switch (server->value->type) { + case SOCKET_ADDRESS_FLAT_TYPE_UNIX: ret = glfs_set_volfile_server(glfs, "unix", server->value->u.q_unix.path, 0); - } else { + break; + case SOCKET_ADDRESS_FLAT_TYPE_INET: if (parse_uint_full(server->value->u.inet.port, &port, 10) < 0 || port > 65535) { error_setg(errp, "'%s' is not a valid port number", @@ -426,6 +428,11 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf, ret = glfs_set_volfile_server(glfs, "tcp", server->value->u.inet.host, (int)port); + break; + case SOCKET_ADDRESS_FLAT_TYPE_VSOCK: + case SOCKET_ADDRESS_FLAT_TYPE_FD: + default: + abort(); } if (ret < 0) { @@ -487,7 +494,7 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf, char *str = NULL; const char *ptr; size_t num_servers; - int i; + int i, type; /* create opts info from runtime_json_opts list */ opts = qemu_opts_create(&runtime_json_opts, NULL, 0, &error_abort); @@ -539,16 +546,17 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf, if (!strcmp(ptr, "tcp")) { ptr = "inet"; /* accept legacy "tcp" */ } - gsconf->type = qapi_enum_parse(SocketAddressFlatType_lookup, ptr, - SOCKET_ADDRESS_FLAT_TYPE__MAX, -1, - &local_err); - if (local_err) { - error_append_hint(&local_err, - "Parameter '%s' may be 'inet' or 'unix'\n", - GLUSTER_OPT_TYPE); + type = qapi_enum_parse(SocketAddressFlatType_lookup, ptr, + SOCKET_ADDRESS_FLAT_TYPE__MAX, -1, NULL); + if (type != SOCKET_ADDRESS_FLAT_TYPE_INET + && type != SOCKET_ADDRESS_FLAT_TYPE_UNIX) { + error_setg(&local_err, + "Parameter '%s' may be 'inet' or 'unix'", + GLUSTER_OPT_TYPE); error_append_hint(&local_err, GERR_INDEX_HINT, i); goto out; } + gsconf->type = type; qemu_opts_del(opts); if (gsconf->type == SOCKET_ADDRESS_FLAT_TYPE_INET) { diff --git a/block/io.c b/block/io.c index 2709a7007f..a7142e00e8 100644 --- a/block/io.c +++ b/block/io.c @@ -44,7 +44,7 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque); static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int count, BdrvRequestFlags flags); -static void bdrv_parent_drained_begin(BlockDriverState *bs) +void bdrv_parent_drained_begin(BlockDriverState *bs) { BdrvChild *c; @@ -55,7 +55,7 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs) } } -static void bdrv_parent_drained_end(BlockDriverState *bs) +void bdrv_parent_drained_end(BlockDriverState *bs) { BdrvChild *c; @@ -158,7 +158,7 @@ bool bdrv_requests_pending(BlockDriverState *bs) static bool bdrv_drain_recurse(BlockDriverState *bs) { - BdrvChild *child; + BdrvChild *child, *tmp; bool waited; waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0); @@ -167,8 +167,25 @@ static bool bdrv_drain_recurse(BlockDriverState *bs) bs->drv->bdrv_drain(bs); } - QLIST_FOREACH(child, &bs->children, next) { - waited |= bdrv_drain_recurse(child->bs); + QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) { + BlockDriverState *bs = child->bs; + bool in_main_loop = + qemu_get_current_aio_context() == qemu_get_aio_context(); + assert(bs->refcnt > 0); + if (in_main_loop) { + /* In case the recursive bdrv_drain_recurse processes a + * block_job_defer_to_main_loop BH and modifies the graph, + * let's hold a reference to bs until we are done. + * + * IOThread doesn't have such a BH, and it is not safe to call + * bdrv_unref without BQL, so skip doing it there. + */ + bdrv_ref(bs); + } + waited |= bdrv_drain_recurse(bs); + if (in_main_loop) { + bdrv_unref(bs); + } } return waited; @@ -616,7 +633,7 @@ static int bdrv_prwv_co(BdrvChild *child, int64_t offset, bdrv_rw_co_entry(&rwco); } else { co = qemu_coroutine_create(bdrv_rw_co_entry, &rwco); - qemu_coroutine_enter(co); + bdrv_coroutine_enter(child->bs, co); BDRV_POLL_WHILE(child->bs, rwco.ret == NOT_DONE); } return rwco.ret; @@ -945,7 +962,14 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, size_t skip_bytes; int ret; - assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE)); + /* FIXME We cannot require callers to have write permissions when all they + * are doing is a read request. If we did things right, write permissions + * would be obtained anyway, but internally by the copy-on-read code. As + * long as it is implemented here rather than in a separat filter driver, + * the copy-on-read code doesn't have its own BdrvChild, however, for which + * it could request permissions. Therefore we have to bypass the permission + * system for the moment. */ + // assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE)); /* Cover entire cluster so no additional backing file I/O is required when * allocating cluster in the image file. @@ -1338,8 +1362,16 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, assert(!waited || !req->serialising); assert(req->overlap_offset <= offset); assert(offset + bytes <= req->overlap_offset + req->overlap_bytes); - assert(child->perm & BLK_PERM_WRITE); - assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE); + /* FIXME: Block migration uses the BlockBackend of the guest device at a + * point when it has not yet taken write permissions. This will be + * fixed by a future patch, but for now we have to bypass this + * assertion for block migration to work. */ + // assert(child->perm & BLK_PERM_WRITE); + /* FIXME: Because of the above, we also cannot guarantee that all format + * BDS take the BLK_PERM_RESIZE permission on their file BDS, since + * they are not obligated to do so if they do not have any parent + * that has taken the permission to write to them. */ + // assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE); ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req); @@ -1873,7 +1905,7 @@ int64_t bdrv_get_block_status_above(BlockDriverState *bs, } else { co = qemu_coroutine_create(bdrv_get_block_status_above_co_entry, &data); - qemu_coroutine_enter(co); + bdrv_coroutine_enter(bs, co); BDRV_POLL_WHILE(bs, !data.done); } return data.ret; @@ -1999,7 +2031,7 @@ bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos, }; Coroutine *co = qemu_coroutine_create(bdrv_co_rw_vmstate_entry, &data); - qemu_coroutine_enter(co); + bdrv_coroutine_enter(bs, co); while (data.ret == -EINPROGRESS) { aio_poll(bdrv_get_aio_context(bs), true); } @@ -2216,7 +2248,7 @@ static BlockAIOCB *bdrv_co_aio_prw_vector(BdrvChild *child, acb->is_write = is_write; co = qemu_coroutine_create(bdrv_co_do_rw, acb); - qemu_coroutine_enter(co); + bdrv_coroutine_enter(child->bs, co); bdrv_co_maybe_schedule_bh(acb); return &acb->common; @@ -2247,7 +2279,7 @@ BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs, acb->req.error = -EINPROGRESS; co = qemu_coroutine_create(bdrv_aio_flush_co_entry, acb); - qemu_coroutine_enter(co); + bdrv_coroutine_enter(bs, co); bdrv_co_maybe_schedule_bh(acb); return &acb->common; @@ -2271,16 +2303,17 @@ static void coroutine_fn bdrv_flush_co_entry(void *opaque) int coroutine_fn bdrv_co_flush(BlockDriverState *bs) { - int ret; + int current_gen; + int ret = 0; + + bdrv_inc_in_flight(bs); if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs) || bdrv_is_sg(bs)) { - return 0; + goto early_exit; } - bdrv_inc_in_flight(bs); - - int current_gen = bs->write_gen; + current_gen = bs->write_gen; /* Wait until any previous flushes are completed */ while (bs->active_flush_req) { @@ -2363,6 +2396,7 @@ out: /* Return value is ignored - it's ok if wait queue is empty */ qemu_co_queue_next(&bs->flush_queue); +early_exit: bdrv_dec_in_flight(bs); return ret; } @@ -2380,7 +2414,7 @@ int bdrv_flush(BlockDriverState *bs) bdrv_flush_co_entry(&flush_co); } else { co = qemu_coroutine_create(bdrv_flush_co_entry, &flush_co); - qemu_coroutine_enter(co); + bdrv_coroutine_enter(bs, co); BDRV_POLL_WHILE(bs, flush_co.ret == NOT_DONE); } @@ -2527,7 +2561,7 @@ int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int count) bdrv_pdiscard_co_entry(&rwco); } else { co = qemu_coroutine_create(bdrv_pdiscard_co_entry, &rwco); - qemu_coroutine_enter(co); + bdrv_coroutine_enter(bs, co); BDRV_POLL_WHILE(bs, rwco.ret == NOT_DONE); } diff --git a/block/iscsi.c b/block/iscsi.c index 75d890538e..42fb0b019c 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -103,7 +103,6 @@ typedef struct IscsiTask { typedef struct IscsiAIOCB { BlockAIOCB common; - QEMUIOVector *qiov; QEMUBH *bh; IscsiLun *iscsilun; struct scsi_task *task; @@ -2093,6 +2092,7 @@ static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp) BlockDriverState *bs; IscsiLun *iscsilun = NULL; QDict *bs_options; + Error *local_err = NULL; bs = bdrv_new(); @@ -2103,8 +2103,13 @@ static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp) iscsilun = bs->opaque; bs_options = qdict_new(); - qdict_put(bs_options, "filename", qstring_from_str(filename)); - ret = iscsi_open(bs, bs_options, 0, NULL); + iscsi_parse_filename(filename, bs_options, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + } else { + ret = iscsi_open(bs, bs_options, 0, NULL); + } QDECREF(bs_options); if (ret != 0) { diff --git a/block/mirror.c b/block/mirror.c index ca4baa510a..9f5eb692fd 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -634,7 +634,8 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) } if (s->in_flight >= MAX_IN_FLIGHT) { - trace_mirror_yield(s, s->in_flight, s->buf_free_count, -1); + trace_mirror_yield(s, UINT64_MAX, s->buf_free_count, + s->in_flight); mirror_wait_for_io(s); continue; } @@ -809,7 +810,7 @@ static void coroutine_fn mirror_run(void *opaque) s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) { if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 || (cnt == 0 && s->in_flight > 0)) { - trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt); + trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight); mirror_wait_for_io(s); continue; } else if (cnt != 0) { @@ -1111,10 +1112,11 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, BlockdevOnError on_target_error, bool unmap, BlockCompletionFunc *cb, - void *opaque, Error **errp, + void *opaque, const BlockJobDriver *driver, bool is_none_mode, BlockDriverState *base, - bool auto_complete, const char *filter_node_name) + bool auto_complete, const char *filter_node_name, + Error **errp) { MirrorBlockJob *s; BlockDriverState *mirror_top_bs; @@ -1147,9 +1149,10 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, return; } mirror_top_bs->total_sectors = bs->total_sectors; + bdrv_set_aio_context(mirror_top_bs, bdrv_get_aio_context(bs)); /* bdrv_append takes ownership of the mirror_top_bs reference, need to keep - * it alive until block_job_create() even if bs has no parent. */ + * it alive until block_job_create() succeeds even if bs has no parent. */ bdrv_ref(mirror_top_bs); bdrv_drained_begin(bs); bdrv_append(mirror_top_bs, bs, &local_err); @@ -1167,10 +1170,12 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD, speed, creation_flags, cb, opaque, errp); - bdrv_unref(mirror_top_bs); if (!s) { goto fail; } + /* The block job now has a reference to this node */ + bdrv_unref(mirror_top_bs); + s->source = bs; s->mirror_top_bs = mirror_top_bs; @@ -1241,6 +1246,10 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, fail: if (s) { + /* Make sure this BDS does not go away until we have completed the graph + * changes below */ + bdrv_ref(mirror_top_bs); + g_free(s->replaces); blk_unref(s->target); block_job_unref(&s->common); @@ -1249,6 +1258,8 @@ fail: bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL, &error_abort); bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort); + + bdrv_unref(mirror_top_bs); } void mirror_start(const char *job_id, BlockDriverState *bs, @@ -1270,17 +1281,17 @@ void mirror_start(const char *job_id, BlockDriverState *bs, base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL; mirror_start_job(job_id, bs, BLOCK_JOB_DEFAULT, target, replaces, speed, granularity, buf_size, backing_mode, - on_source_error, on_target_error, unmap, NULL, NULL, errp, + on_source_error, on_target_error, unmap, NULL, NULL, &mirror_job_driver, is_none_mode, base, false, - filter_node_name); + filter_node_name, errp); } void commit_active_start(const char *job_id, BlockDriverState *bs, BlockDriverState *base, int creation_flags, int64_t speed, BlockdevOnError on_error, const char *filter_node_name, - BlockCompletionFunc *cb, void *opaque, Error **errp, - bool auto_complete) + BlockCompletionFunc *cb, void *opaque, + bool auto_complete, Error **errp) { int orig_base_flags; Error *local_err = NULL; @@ -1293,9 +1304,9 @@ void commit_active_start(const char *job_id, BlockDriverState *bs, mirror_start_job(job_id, bs, creation_flags, base, NULL, speed, 0, 0, MIRROR_LEAVE_BACKING_CHAIN, - on_error, on_error, true, cb, opaque, &local_err, + on_error, on_error, true, cb, opaque, &commit_active_job_driver, false, base, auto_complete, - filter_node_name); + filter_node_name, &local_err); if (local_err) { error_propagate(errp, local_err); goto error_restore_flags; diff --git a/block/nbd-client.c b/block/nbd-client.c index 0dc12c2d67..1e2952fdae 100644 --- a/block/nbd-client.c +++ b/block/nbd-client.c @@ -33,17 +33,15 @@ #define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs)) #define INDEX_TO_HANDLE(bs, index) ((index) ^ ((uint64_t)(intptr_t)bs)) -static void nbd_recv_coroutines_enter_all(BlockDriverState *bs) +static void nbd_recv_coroutines_enter_all(NBDClientSession *s) { - NBDClientSession *s = nbd_get_client_session(bs); int i; for (i = 0; i < MAX_NBD_REQUESTS; i++) { if (s->recv_coroutine[i]) { - qemu_coroutine_enter(s->recv_coroutine[i]); + aio_co_wake(s->recv_coroutine[i]); } } - BDRV_POLL_WHILE(bs, s->read_reply_co); } static void nbd_teardown_connection(BlockDriverState *bs) @@ -58,7 +56,7 @@ static void nbd_teardown_connection(BlockDriverState *bs) qio_channel_shutdown(client->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); - nbd_recv_coroutines_enter_all(bs); + BDRV_POLL_WHILE(bs, client->read_reply_co); nbd_client_detach_aio_context(bs); object_unref(OBJECT(client->sioc)); @@ -76,7 +74,7 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque) for (;;) { assert(s->reply.handle == 0); ret = nbd_receive_reply(s->ioc, &s->reply); - if (ret < 0) { + if (ret <= 0) { break; } @@ -103,6 +101,8 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque) aio_co_wake(s->recv_coroutine[i]); qemu_coroutine_yield(); } + + nbd_recv_coroutines_enter_all(s); s->read_reply_co = NULL; } diff --git a/block/nbd-client.h b/block/nbd-client.h index 8cdfc92e94..891ba44a20 100644 --- a/block/nbd-client.h +++ b/block/nbd-client.h @@ -30,8 +30,6 @@ typedef struct NBDClientSession { Coroutine *recv_coroutine[MAX_NBD_REQUESTS]; NBDReply reply; - - bool is_unix; } NBDClientSession; NBDClientSession *nbd_get_client_session(BlockDriverState *bs); diff --git a/block/nbd.c b/block/nbd.c index f478f80b4a..814ab26dce 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -47,7 +47,7 @@ typedef struct BDRVNBDState { NBDClientSession client; /* For nbd_refresh_filename() */ - SocketAddress *saddr; + SocketAddressFlat *saddr; char *export, *tlscredsid; } BDRVNBDState; @@ -95,7 +95,7 @@ static int nbd_parse_uri(const char *filename, QDict *options) goto out; } qdict_put(options, "server.type", qstring_from_str("unix")); - qdict_put(options, "server.data.path", + qdict_put(options, "server.path", qstring_from_str(qp->p[0].value)); } else { QString *host; @@ -116,10 +116,10 @@ static int nbd_parse_uri(const char *filename, QDict *options) } qdict_put(options, "server.type", qstring_from_str("inet")); - qdict_put(options, "server.data.host", host); + qdict_put(options, "server.host", host); port_str = g_strdup_printf("%d", uri->port ?: NBD_DEFAULT_PORT); - qdict_put(options, "server.data.port", qstring_from_str(port_str)); + qdict_put(options, "server.port", qstring_from_str(port_str)); g_free(port_str); } @@ -197,7 +197,7 @@ static void nbd_parse_filename(const char *filename, QDict *options, /* are we a UNIX or TCP socket? */ if (strstart(host_spec, "unix:", &unixpath)) { qdict_put(options, "server.type", qstring_from_str("unix")); - qdict_put(options, "server.data.path", qstring_from_str(unixpath)); + qdict_put(options, "server.path", qstring_from_str(unixpath)); } else { InetSocketAddress *addr = NULL; @@ -207,8 +207,8 @@ static void nbd_parse_filename(const char *filename, QDict *options, } qdict_put(options, "server.type", qstring_from_str("inet")); - qdict_put(options, "server.data.host", qstring_from_str(addr->host)); - qdict_put(options, "server.data.port", qstring_from_str(addr->port)); + qdict_put(options, "server.host", qstring_from_str(addr->host)); + qdict_put(options, "server.port", qstring_from_str(addr->port)); qapi_free_InetSocketAddress(addr); } @@ -248,20 +248,21 @@ static bool nbd_process_legacy_socket_options(QDict *output_options, } qdict_put(output_options, "server.type", qstring_from_str("unix")); - qdict_put(output_options, "server.data.path", qstring_from_str(path)); + qdict_put(output_options, "server.path", qstring_from_str(path)); } else if (host) { qdict_put(output_options, "server.type", qstring_from_str("inet")); - qdict_put(output_options, "server.data.host", qstring_from_str(host)); - qdict_put(output_options, "server.data.port", + qdict_put(output_options, "server.host", qstring_from_str(host)); + qdict_put(output_options, "server.port", qstring_from_str(port ?: stringify(NBD_DEFAULT_PORT))); } return true; } -static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, Error **errp) +static SocketAddressFlat *nbd_config(BDRVNBDState *s, QDict *options, + Error **errp) { - SocketAddress *saddr = NULL; + SocketAddressFlat *saddr = NULL; QDict *addr = NULL; QObject *crumpled_addr = NULL; Visitor *iv = NULL; @@ -278,15 +279,21 @@ static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, Error **errp) goto done; } + /* + * FIXME .numeric, .to, .ipv4 or .ipv6 don't work with -drive + * server.type=inet. .to doesn't matter, it's ignored anyway. + * That's because when @options come from -blockdev or + * blockdev_add, members are typed according to the QAPI schema, + * but when they come from -drive, they're all QString. The + * visitor expects the former. + */ iv = qobject_input_visitor_new(crumpled_addr); - visit_type_SocketAddress(iv, NULL, &saddr, &local_err); + visit_type_SocketAddressFlat(iv, NULL, &saddr, &local_err); if (local_err) { error_propagate(errp, local_err); goto done; } - s->client.is_unix = saddr->type == SOCKET_ADDRESS_KIND_UNIX; - done: QDECREF(addr); qobject_decref(crumpled_addr); @@ -300,9 +307,10 @@ NBDClientSession *nbd_get_client_session(BlockDriverState *bs) return &s->client; } -static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr, +static QIOChannelSocket *nbd_establish_connection(SocketAddressFlat *saddr_flat, Error **errp) { + SocketAddress *saddr = socket_address_crumple(saddr_flat); QIOChannelSocket *sioc; Error *local_err = NULL; @@ -312,7 +320,9 @@ static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr, qio_channel_socket_connect_sync(sioc, saddr, &local_err); + qapi_free_SocketAddress(saddr); if (local_err) { + object_unref(OBJECT(sioc)); error_propagate(errp, local_err); return NULL; } @@ -403,7 +413,7 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags, goto error; } - /* Translate @host, @port, and @path to a SocketAddress */ + /* Translate @host, @port, and @path to a SocketAddressFlat */ if (!nbd_process_legacy_socket_options(options, opts, errp)) { goto error; } @@ -423,11 +433,12 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags, goto error; } - if (s->saddr->type != SOCKET_ADDRESS_KIND_INET) { + /* TODO SOCKET_ADDRESS_KIND_FD where fd has AF_INET or AF_INET6 */ + if (s->saddr->type != SOCKET_ADDRESS_FLAT_TYPE_INET) { error_setg(errp, "TLS only supported over IP sockets"); goto error; } - hostname = s->saddr->u.inet.data->host; + hostname = s->saddr->u.inet.host; } /* establish TCP connection, return error if it fails @@ -450,7 +461,7 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags, object_unref(OBJECT(tlscreds)); } if (ret < 0) { - qapi_free_SocketAddress(s->saddr); + qapi_free_SocketAddressFlat(s->saddr); g_free(s->export); g_free(s->tlscredsid); } @@ -476,7 +487,7 @@ static void nbd_close(BlockDriverState *bs) nbd_client_close(bs); - qapi_free_SocketAddress(s->saddr); + qapi_free_SocketAddressFlat(s->saddr); g_free(s->export); g_free(s->tlscredsid); } @@ -507,15 +518,15 @@ static void nbd_refresh_filename(BlockDriverState *bs, QDict *options) Visitor *ov; const char *host = NULL, *port = NULL, *path = NULL; - if (s->saddr->type == SOCKET_ADDRESS_KIND_INET) { - const InetSocketAddress *inet = s->saddr->u.inet.data; + if (s->saddr->type == SOCKET_ADDRESS_FLAT_TYPE_INET) { + const InetSocketAddress *inet = &s->saddr->u.inet; if (!inet->has_ipv4 && !inet->has_ipv6 && !inet->has_to) { host = inet->host; port = inet->port; } - } else if (s->saddr->type == SOCKET_ADDRESS_KIND_UNIX) { - path = s->saddr->u.q_unix.data->path; - } + } else if (s->saddr->type == SOCKET_ADDRESS_FLAT_TYPE_UNIX) { + path = s->saddr->u.q_unix.path; + } /* else can't represent as pseudo-filename */ qdict_put(opts, "driver", qstring_from_str("nbd")); @@ -534,7 +545,7 @@ static void nbd_refresh_filename(BlockDriverState *bs, QDict *options) } ov = qobject_output_visitor_new(&saddr_qdict); - visit_type_SocketAddress(ov, NULL, &s->saddr, &error_abort); + visit_type_SocketAddressFlat(ov, NULL, &s->saddr, &error_abort); visit_complete(ov, &saddr_qdict); visit_free(ov); qdict_put_obj(opts, "server", saddr_qdict); diff --git a/block/nfs.c b/block/nfs.c index 3f43f6e26a..6541dec1fc 100644 --- a/block/nfs.c +++ b/block/nfs.c @@ -474,6 +474,13 @@ static NFSServer *nfs_config(QDict *options, Error **errp) goto out; } + /* + * Caution: this works only because all scalar members of + * NFSServer are QString in @crumpled_addr. The visitor expects + * @crumpled_addr to be typed according to the QAPI schema. It + * is when @options come from -blockdev or blockdev_add. But when + * they come from -drive, they're all QString. + */ iv = qobject_input_visitor_new(crumpled_addr); visit_type_NFSServer(iv, NULL, &server, &local_error); if (local_error) { @@ -490,7 +497,7 @@ out: static int64_t nfs_client_open(NFSClient *client, QDict *options, - int flags, Error **errp, int open_flags) + int flags, int open_flags, Error **errp) { int ret = -EINVAL; QemuOpts *opts = NULL; @@ -656,7 +663,7 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags, ret = nfs_client_open(client, options, (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY, - errp, bs->open_flags); + bs->open_flags, errp); if (ret < 0) { return ret; } @@ -698,7 +705,7 @@ static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp) goto out; } - ret = nfs_client_open(client, options, O_CREAT, errp, 0); + ret = nfs_client_open(client, options, O_CREAT, 0, errp); if (ret < 0) { goto out; } diff --git a/block/parallels.c b/block/parallels.c index 19935e29a9..90acf79687 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -114,7 +114,7 @@ static QemuOptsList parallels_runtime_opts = { .name = PARALLELS_OPT_PREALLOC_SIZE, .type = QEMU_OPT_SIZE, .help = "Preallocation size on image expansion", - .def_value_str = "128MiB", + .def_value_str = "128M", }, { .name = PARALLELS_OPT_PREALLOC_MODE, @@ -192,8 +192,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) { BDRVParallelsState *s = bs->opaque; - uint32_t idx, to_allocate, i; - int64_t pos, space; + int64_t pos, space, idx, to_allocate, i; pos = block_status(s, sector_num, nb_sectors, pnum); if (pos > 0) { @@ -201,11 +200,19 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, } idx = sector_num / s->tracks; - if (idx >= s->bat_size) { - return -EINVAL; - } - to_allocate = DIV_ROUND_UP(sector_num + *pnum, s->tracks) - idx; + + /* This function is called only by parallels_co_writev(), which will never + * pass a sector_num at or beyond the end of the image (because the block + * layer never passes such a sector_num to that function). Therefore, idx + * is always below s->bat_size. + * block_status() will limit *pnum so that sector_num + *pnum will not + * exceed the image end. Therefore, idx + to_allocate cannot exceed + * s->bat_size. + * Note that s->bat_size is an unsigned int, therefore idx + to_allocate + * will always fit into a uint32_t. */ + assert(idx < s->bat_size && idx + to_allocate <= s->bat_size); + space = to_allocate * s->tracks; if (s->data_end + space > bdrv_getlength(bs->file->bs) >> BDRV_SECTOR_BITS) { int ret; @@ -687,7 +694,8 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, if (local_err != NULL) { goto fail_options; } - if (!bdrv_has_zero_init(bs->file->bs) || + + if (!(flags & BDRV_O_RESIZE) || !bdrv_has_zero_init(bs->file->bs) || bdrv_truncate(bs->file, bdrv_getlength(bs->file->bs)) != 0) { s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE; } diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 78c11d4948..100398c565 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -1519,12 +1519,10 @@ int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset, end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS); - /* Round start up and end down */ - offset = align_offset(offset, s->cluster_size); - end_offset = start_of_cluster(s, end_offset); - - if (offset > end_offset) { - return 0; + /* The caller must cluster-align start; round end down except at EOF */ + assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); + if (end_offset != bs->total_sectors * BDRV_SECTOR_SIZE) { + end_offset = start_of_cluster(s, end_offset); } nb_clusters = size_to_clusters(s, end_offset - offset); diff --git a/block/rbd.c b/block/rbd.c index ee13f3d9d3..6471f4fd2b 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -13,14 +13,14 @@ #include "qemu/osdep.h" +#include <rbd/librbd.h> #include "qapi/error.h" #include "qemu/error-report.h" #include "block/block_int.h" #include "crypto/secret.h" #include "qemu/cutils.h" #include "qapi/qmp/qstring.h" - -#include <rbd/librbd.h> +#include "qapi/qmp/qjson.h" /* * When specifying the image filename use: @@ -56,11 +56,6 @@ #define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER) -#define RBD_MAX_CONF_NAME_SIZE 128 -#define RBD_MAX_CONF_VAL_SIZE 512 -#define RBD_MAX_CONF_SIZE 1024 -#define RBD_MAX_POOL_NAME_SIZE 128 -#define RBD_MAX_SNAP_NAME_SIZE 128 #define RBD_MAX_SNAPS 100 /* The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h */ @@ -99,43 +94,28 @@ typedef struct BDRVRBDState { rados_t cluster; rados_ioctx_t io_ctx; rbd_image_t image; - char name[RBD_MAX_IMAGE_NAME_SIZE]; + char *image_name; char *snap; } BDRVRBDState; -static char *qemu_rbd_next_tok(int max_len, - char *src, char delim, - const char *name, - char **p, Error **errp) +static char *qemu_rbd_next_tok(char *src, char delim, char **p) { - int l; char *end; *p = NULL; - if (delim != '\0') { - for (end = src; *end; ++end) { - if (*end == delim) { - break; - } - if (*end == '\\' && end[1] != '\0') { - end++; - } - } + for (end = src; *end; ++end) { if (*end == delim) { - *p = end + 1; - *end = '\0'; + break; + } + if (*end == '\\' && end[1] != '\0') { + end++; } } - l = strlen(src); - if (l >= max_len) { - error_setg(errp, "%s too long", name); - return NULL; - } else if (l == 0) { - error_setg(errp, "%s too short", name); - return NULL; + if (*end == delim) { + *p = end + 1; + *end = '\0'; } - return src; } @@ -156,26 +136,19 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options, Error **errp) { const char *start; - char *p, *buf, *keypairs; + char *p, *buf; + QList *keypairs = NULL; char *found_str; - size_t max_keypair_size; - Error *local_err = NULL; if (!strstart(filename, "rbd:", &start)) { error_setg(errp, "File name must start with 'rbd:'"); return; } - max_keypair_size = strlen(start) + 1; buf = g_strdup(start); - keypairs = g_malloc0(max_keypair_size); p = buf; - found_str = qemu_rbd_next_tok(RBD_MAX_POOL_NAME_SIZE, p, - '/', "pool name", &p, &local_err); - if (local_err) { - goto done; - } + found_str = qemu_rbd_next_tok(p, '/', &p); if (!p) { error_setg(errp, "Pool name is required"); goto done; @@ -184,27 +157,15 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options, qdict_put(options, "pool", qstring_from_str(found_str)); if (strchr(p, '@')) { - found_str = qemu_rbd_next_tok(RBD_MAX_IMAGE_NAME_SIZE, p, - '@', "object name", &p, &local_err); - if (local_err) { - goto done; - } + found_str = qemu_rbd_next_tok(p, '@', &p); qemu_rbd_unescape(found_str); qdict_put(options, "image", qstring_from_str(found_str)); - found_str = qemu_rbd_next_tok(RBD_MAX_SNAP_NAME_SIZE, p, - ':', "snap name", &p, &local_err); - if (local_err) { - goto done; - } + found_str = qemu_rbd_next_tok(p, ':', &p); qemu_rbd_unescape(found_str); qdict_put(options, "snapshot", qstring_from_str(found_str)); } else { - found_str = qemu_rbd_next_tok(RBD_MAX_IMAGE_NAME_SIZE, p, - ':', "object name", &p, &local_err); - if (local_err) { - goto done; - } + found_str = qemu_rbd_next_tok(p, ':', &p); qemu_rbd_unescape(found_str); qdict_put(options, "image", qstring_from_str(found_str)); } @@ -212,24 +173,11 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options, goto done; } - found_str = qemu_rbd_next_tok(RBD_MAX_CONF_NAME_SIZE, p, - '\0', "configuration", &p, &local_err); - if (local_err) { - goto done; - } - - p = found_str; - /* The following are essentially all key/value pairs, and we treat * 'id' and 'conf' a bit special. Key/value pairs may be in any order. */ while (p) { char *name, *value; - name = qemu_rbd_next_tok(RBD_MAX_CONF_NAME_SIZE, p, - '=', "conf option name", &p, &local_err); - if (local_err) { - break; - } - + name = qemu_rbd_next_tok(p, '=', &p); if (!p) { error_setg(errp, "conf option %s has no value", name); break; @@ -237,11 +185,7 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options, qemu_rbd_unescape(name); - value = qemu_rbd_next_tok(RBD_MAX_CONF_VAL_SIZE, p, - ':', "conf option value", &p, &local_err); - if (local_err) { - break; - } + value = qemu_rbd_next_tok(p, ':', &p); qemu_rbd_unescape(value); if (!strcmp(name, "conf")) { @@ -249,36 +193,30 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options, } else if (!strcmp(name, "id")) { qdict_put(options, "user" , qstring_from_str(value)); } else { - /* FIXME: This is pretty ugly, and not the right way to do this. - * These should be contained in a structure, and then - * passed explicitly as individual key/value pairs to - * rados. Consider this legacy code that needs to be - * updated. */ - char *tmp = g_malloc0(max_keypair_size); - /* only use a delimiter if it is not the first keypair found */ - /* These are sets of unknown key/value pairs we'll pass along - * to ceph */ - if (keypairs[0]) { - snprintf(tmp, max_keypair_size, ":%s=%s", name, value); - pstrcat(keypairs, max_keypair_size, tmp); - } else { - snprintf(keypairs, max_keypair_size, "%s=%s", name, value); + /* + * We pass these internally to qemu_rbd_set_keypairs(), so + * we can get away with the simpler list of [ "key1", + * "value1", "key2", "value2" ] rather than a raw dict + * { "key1": "value1", "key2": "value2" } where we can't + * guarantee order, or even a more correct but complex + * [ { "key1": "value1" }, { "key2": "value2" } ] + */ + if (!keypairs) { + keypairs = qlist_new(); } - g_free(tmp); + qlist_append(keypairs, qstring_from_str(name)); + qlist_append(keypairs, qstring_from_str(value)); } } - if (keypairs[0]) { - qdict_put(options, "keyvalue-pairs", qstring_from_str(keypairs)); + if (keypairs) { + qdict_put(options, "=keyvalue-pairs", + qobject_to_json(QOBJECT(keypairs))); } - done: - if (local_err) { - error_propagate(errp, local_err); - } g_free(buf); - g_free(keypairs); + QDECREF(keypairs); return; } @@ -302,50 +240,41 @@ static int qemu_rbd_set_auth(rados_t cluster, const char *secretid, return 0; } -static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs, +static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs_json, Error **errp) { - char *p, *buf; - char *name; - char *value; - Error *local_err = NULL; + QList *keypairs; + QString *name; + QString *value; + const char *key; + size_t remaining; int ret = 0; - buf = g_strdup(keypairs); - p = buf; - - while (p) { - name = qemu_rbd_next_tok(RBD_MAX_CONF_NAME_SIZE, p, - '=', "conf option name", &p, &local_err); - if (local_err) { - break; - } - - if (!p) { - error_setg(errp, "conf option %s has no value", name); - ret = -EINVAL; - break; - } + if (!keypairs_json) { + return ret; + } + keypairs = qobject_to_qlist(qobject_from_json(keypairs_json, + &error_abort)); + remaining = qlist_size(keypairs) / 2; + assert(remaining); - value = qemu_rbd_next_tok(RBD_MAX_CONF_VAL_SIZE, p, - ':', "conf option value", &p, &local_err); - if (local_err) { - break; - } + while (remaining--) { + name = qobject_to_qstring(qlist_pop(keypairs)); + value = qobject_to_qstring(qlist_pop(keypairs)); + assert(name && value); + key = qstring_get_str(name); - ret = rados_conf_set(cluster, name, value); + ret = rados_conf_set(cluster, key, qstring_get_str(value)); + QDECREF(name); + QDECREF(value); if (ret < 0) { - error_setg_errno(errp, -ret, "invalid conf option %s", name); + error_setg_errno(errp, -ret, "invalid conf option %s", key); ret = -EINVAL; break; } } - if (local_err) { - error_propagate(errp, local_err); - ret = -EINVAL; - } - g_free(buf); + QDECREF(keypairs); return ret; } @@ -365,14 +294,14 @@ static QemuOptsList runtime_opts = { .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), .desc = { { - .name = "filename", + .name = "pool", .type = QEMU_OPT_STRING, - .help = "Specification of the rbd image", + .help = "Rados pool name", }, { - .name = "password-secret", + .name = "image", .type = QEMU_OPT_STRING, - .help = "ID of secret providing the password", + .help = "Image name in the pool", }, { .name = "conf", @@ -380,16 +309,6 @@ static QemuOptsList runtime_opts = { .help = "Rados config file location", }, { - .name = "pool", - .type = QEMU_OPT_STRING, - .help = "Rados pool name", - }, - { - .name = "image", - .type = QEMU_OPT_STRING, - .help = "Image name in the pool", - }, - { .name = "snapshot", .type = QEMU_OPT_STRING, .help = "Ceph snapshot name", @@ -400,23 +319,26 @@ static QemuOptsList runtime_opts = { .type = QEMU_OPT_STRING, .help = "Rados id name", }, + /* + * server.* extracted manually, see qemu_rbd_mon_host() + */ { - .name = "keyvalue-pairs", - .type = QEMU_OPT_STRING, - .help = "Legacy rados key/value option parameters", - }, - { - .name = "host", - .type = QEMU_OPT_STRING, - }, - { - .name = "port", + .name = "password-secret", .type = QEMU_OPT_STRING, + .help = "ID of secret providing the password", }, + + /* + * Keys for qemu_rbd_parse_filename(), not in the QAPI schema + */ { - .name = "auth", + /* + * HACK: name starts with '=' so that qemu_opts_parse() + * can't set it + */ + .name = "=keyvalue-pairs", .type = QEMU_OPT_STRING, - .help = "Supported authentication method, either cephx or none", + .help = "Legacy rados key/value option parameters", }, { /* end of list */ } }, @@ -428,12 +350,11 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp) int64_t bytes = 0; int64_t objsize; int obj_order = 0; - const char *pool, *name, *conf, *clientname, *keypairs; + const char *pool, *image_name, *conf, *user, *keypairs; const char *secretid; rados_t cluster; rados_ioctx_t io_ctx; QDict *options = NULL; - QemuOpts *rbd_opts = NULL; int ret = 0; secretid = qemu_opt_get(opts, "password-secret"); @@ -464,21 +385,19 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp) goto exit; } - rbd_opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); - qemu_opts_absorb_qdict(rbd_opts, options, &local_err); - if (local_err) { - error_propagate(errp, local_err); - ret = -EINVAL; - goto exit; - } - - pool = qemu_opt_get(rbd_opts, "pool"); - conf = qemu_opt_get(rbd_opts, "conf"); - clientname = qemu_opt_get(rbd_opts, "user"); - name = qemu_opt_get(rbd_opts, "image"); - keypairs = qemu_opt_get(rbd_opts, "keyvalue-pairs"); + /* + * Caution: while qdict_get_try_str() is fine, getting non-string + * types would require more care. When @options come from -blockdev + * or blockdev_add, its members are typed according to the QAPI + * schema, but when they come from -drive, they're all QString. + */ + pool = qdict_get_try_str(options, "pool"); + conf = qdict_get_try_str(options, "conf"); + user = qdict_get_try_str(options, "user"); + image_name = qdict_get_try_str(options, "image"); + keypairs = qdict_get_try_str(options, "=keyvalue-pairs"); - ret = rados_create(&cluster, clientname); + ret = rados_create(&cluster, user); if (ret < 0) { error_setg_errno(errp, -ret, "error initializing"); goto exit; @@ -515,7 +434,7 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp) goto shutdown; } - ret = rbd_create(io_ctx, name, bytes, &obj_order); + ret = rbd_create(io_ctx, image_name, bytes, &obj_order); if (ret < 0) { error_setg_errno(errp, -ret, "error rbd create"); } @@ -527,7 +446,6 @@ shutdown: exit: QDECREF(options); - qemu_opts_del(rbd_opts); return ret; } @@ -578,91 +496,43 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) qemu_aio_unref(acb); } -#define RBD_MON_HOST 0 -#define RBD_AUTH_SUPPORTED 1 - -static char *qemu_rbd_array_opts(QDict *options, const char *prefix, int type, - Error **errp) +static char *qemu_rbd_mon_host(QDict *options, Error **errp) { - int num_entries; - QemuOpts *opts = NULL; - QDict *sub_options; - const char *host; - const char *port; - char *str; - char *rados_str = NULL; - Error *local_err = NULL; + const char **vals = g_new(const char *, qdict_size(options) + 1); + char keybuf[32]; + const char *host, *port; + char *rados_str; int i; - assert(type == RBD_MON_HOST || type == RBD_AUTH_SUPPORTED); - - num_entries = qdict_array_entries(options, prefix); - - if (num_entries < 0) { - error_setg(errp, "Parse error on RBD QDict array"); - return NULL; - } - - for (i = 0; i < num_entries; i++) { - char *strbuf = NULL; - const char *value; - char *rados_str_tmp; - - str = g_strdup_printf("%s%d.", prefix, i); - qdict_extract_subqdict(options, &sub_options, str); - g_free(str); - - opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); - qemu_opts_absorb_qdict(opts, sub_options, &local_err); - QDECREF(sub_options); - if (local_err) { - error_propagate(errp, local_err); - g_free(rados_str); + for (i = 0;; i++) { + sprintf(keybuf, "server.%d.host", i); + host = qdict_get_try_str(options, keybuf); + qdict_del(options, keybuf); + sprintf(keybuf, "server.%d.port", i); + port = qdict_get_try_str(options, keybuf); + qdict_del(options, keybuf); + if (!host && !port) { + break; + } + if (!host) { + error_setg(errp, "Parameter server.%d.host is missing", i); rados_str = NULL; - goto exit; + goto out; } - if (type == RBD_MON_HOST) { - host = qemu_opt_get(opts, "host"); - port = qemu_opt_get(opts, "port"); - - value = host; - if (port) { - /* check for ipv6 */ - if (strchr(host, ':')) { - strbuf = g_strdup_printf("[%s]:%s", host, port); - } else { - strbuf = g_strdup_printf("%s:%s", host, port); - } - value = strbuf; - } else if (strchr(host, ':')) { - strbuf = g_strdup_printf("[%s]", host); - value = strbuf; - } + if (strchr(host, ':')) { + vals[i] = port ? g_strdup_printf("[%s]:%s", host, port) + : g_strdup_printf("[%s]", host); } else { - value = qemu_opt_get(opts, "auth"); + vals[i] = port ? g_strdup_printf("%s:%s", host, port) + : g_strdup(host); } - - - /* each iteration in the for loop will build upon the string, and if - * rados_str is NULL then it is our first pass */ - if (rados_str) { - /* separate options with ';', as that is what rados_conf_set() - * requires */ - rados_str_tmp = rados_str; - rados_str = g_strdup_printf("%s;%s", rados_str_tmp, value); - g_free(rados_str_tmp); - } else { - rados_str = g_strdup(value); - } - - g_free(strbuf); - qemu_opts_del(opts); - opts = NULL; } + vals[i] = NULL; -exit: - qemu_opts_del(opts); + rados_str = i ? g_strjoinv(";", (char **)vals) : NULL; +out: + g_strfreev((char **)vals); return rados_str; } @@ -670,32 +540,22 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { BDRVRBDState *s = bs->opaque; - const char *pool, *snap, *conf, *clientname, *name, *keypairs; + const char *pool, *snap, *conf, *user, *image_name, *keypairs; const char *secretid; QemuOpts *opts; Error *local_err = NULL; char *mon_host = NULL; - char *auth_supported = NULL; int r; opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); qemu_opts_absorb_qdict(opts, options, &local_err); if (local_err) { error_propagate(errp, local_err); - qemu_opts_del(opts); - return -EINVAL; - } - - auth_supported = qemu_rbd_array_opts(options, "auth-supported.", - RBD_AUTH_SUPPORTED, &local_err); - if (local_err) { - error_propagate(errp, local_err); r = -EINVAL; goto failed_opts; } - mon_host = qemu_rbd_array_opts(options, "server.", - RBD_MON_HOST, &local_err); + mon_host = qemu_rbd_mon_host(options, &local_err); if (local_err) { error_propagate(errp, local_err); r = -EINVAL; @@ -707,20 +567,24 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, pool = qemu_opt_get(opts, "pool"); conf = qemu_opt_get(opts, "conf"); snap = qemu_opt_get(opts, "snapshot"); - clientname = qemu_opt_get(opts, "user"); - name = qemu_opt_get(opts, "image"); - keypairs = qemu_opt_get(opts, "keyvalue-pairs"); + user = qemu_opt_get(opts, "user"); + image_name = qemu_opt_get(opts, "image"); + keypairs = qemu_opt_get(opts, "=keyvalue-pairs"); - r = rados_create(&s->cluster, clientname); + if (!pool || !image_name) { + error_setg(errp, "Parameters 'pool' and 'image' are required"); + r = -EINVAL; + goto failed_opts; + } + + r = rados_create(&s->cluster, user); if (r < 0) { error_setg_errno(errp, -r, "error initializing"); goto failed_opts; } s->snap = g_strdup(snap); - if (name) { - pstrcpy(s->name, RBD_MAX_IMAGE_NAME_SIZE, name); - } + s->image_name = g_strdup(image_name); /* try default location when conf=NULL, but ignore failure */ r = rados_conf_read_file(s->cluster, conf); @@ -741,13 +605,6 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, } } - if (auth_supported) { - r = rados_conf_set(s->cluster, "auth_supported", auth_supported); - if (r < 0) { - goto failed_shutdown; - } - } - if (qemu_rbd_set_auth(s->cluster, secretid, errp) < 0) { r = -EIO; goto failed_shutdown; @@ -778,13 +635,23 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, goto failed_shutdown; } - r = rbd_open(s->io_ctx, s->name, &s->image, s->snap); + /* rbd_open is always r/w */ + r = rbd_open(s->io_ctx, s->image_name, &s->image, s->snap); if (r < 0) { - error_setg_errno(errp, -r, "error reading header from %s", s->name); + error_setg_errno(errp, -r, "error reading header from %s", + s->image_name); goto failed_open; } - bs->read_only = (s->snap != NULL); + /* If we are using an rbd snapshot, we must be r/o, otherwise + * leave as-is */ + if (s->snap != NULL) { + r = bdrv_set_read_only(bs, true, &local_err); + if (r < 0) { + error_propagate(errp, local_err); + goto failed_open; + } + } qemu_opts_del(opts); return 0; @@ -794,13 +661,33 @@ failed_open: failed_shutdown: rados_shutdown(s->cluster); g_free(s->snap); + g_free(s->image_name); failed_opts: qemu_opts_del(opts); g_free(mon_host); - g_free(auth_supported); return r; } + +/* Since RBD is currently always opened R/W via the API, + * we just need to check if we are using a snapshot or not, in + * order to determine if we will allow it to be R/W */ +static int qemu_rbd_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + BDRVRBDState *s = state->bs->opaque; + int ret = 0; + + if (s->snap && state->flags & BDRV_O_RDWR) { + error_setg(errp, + "Cannot change node '%s' to r/w when using RBD snapshot", + bdrv_get_device_or_node_name(state->bs)); + ret = -EINVAL; + } + + return ret; +} + static void qemu_rbd_close(BlockDriverState *bs) { BDRVRBDState *s = bs->opaque; @@ -808,6 +695,7 @@ static void qemu_rbd_close(BlockDriverState *bs) rbd_close(s->image); rados_ioctx_destroy(s->io_ctx); g_free(s->snap); + g_free(s->image_name); rados_shutdown(s->cluster); } @@ -1206,6 +1094,7 @@ static BlockDriver bdrv_rbd = { .bdrv_parse_filename = qemu_rbd_parse_filename, .bdrv_file_open = qemu_rbd_open, .bdrv_close = qemu_rbd_close, + .bdrv_reopen_prepare = qemu_rbd_reopen_prepare, .bdrv_create = qemu_rbd_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_get_info = qemu_rbd_getinfo, diff --git a/block/replication.c b/block/replication.c index bf3c395eb4..d300c15475 100644 --- a/block/replication.c +++ b/block/replication.c @@ -656,7 +656,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp) s->replication_state = BLOCK_REPLICATION_FAILOVER; commit_active_start(NULL, s->active_disk->bs, s->secondary_disk->bs, BLOCK_JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT, - NULL, replication_done, bs, errp, true); + NULL, replication_done, bs, true, errp); break; default: aio_context_release(aio_context); diff --git a/block/sheepdog.c b/block/sheepdog.c index 89e98edab6..b2a5998188 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -13,9 +13,11 @@ */ #include "qemu/osdep.h" +#include "qapi-visit.h" #include "qapi/error.h" #include "qapi/qmp/qdict.h" #include "qapi/qmp/qint.h" +#include "qapi/qobject-input-visitor.h" #include "qemu/uri.h" #include "qemu/error-report.h" #include "qemu/sockets.h" @@ -547,12 +549,53 @@ static SocketAddress *sd_socket_address(const char *path, return addr; } +static SocketAddress *sd_server_config(QDict *options, Error **errp) +{ + QDict *server = NULL; + QObject *crumpled_server = NULL; + Visitor *iv = NULL; + SocketAddressFlat *saddr_flat = NULL; + SocketAddress *saddr = NULL; + Error *local_err = NULL; + + qdict_extract_subqdict(options, &server, "server."); + + crumpled_server = qdict_crumple(server, errp); + if (!crumpled_server) { + goto done; + } + + /* + * FIXME .numeric, .to, .ipv4 or .ipv6 don't work with -drive + * server.type=inet. .to doesn't matter, it's ignored anyway. + * That's because when @options come from -blockdev or + * blockdev_add, members are typed according to the QAPI schema, + * but when they come from -drive, they're all QString. The + * visitor expects the former. + */ + iv = qobject_input_visitor_new(crumpled_server); + visit_type_SocketAddressFlat(iv, NULL, &saddr_flat, &local_err); + if (local_err) { + error_propagate(errp, local_err); + goto done; + } + + saddr = socket_address_crumple(saddr_flat); + +done: + qapi_free_SocketAddressFlat(saddr_flat); + visit_free(iv); + qobject_decref(crumpled_server); + QDECREF(server); + return saddr; +} + /* Return -EIO in case of error, file descriptor on success */ static int connect_to_sdog(BDRVSheepdogState *s, Error **errp) { int fd; - fd = socket_connect(s->addr, errp, NULL, NULL); + fd = socket_connect(s->addr, NULL, NULL, errp); if (s->addr->type == SOCKET_ADDRESS_KIND_INET && fd >= 0) { int ret = socket_set_nodelay(fd); @@ -693,7 +736,7 @@ static int do_req(int sockfd, BlockDriverState *bs, SheepdogReq *hdr, } else { co = qemu_coroutine_create(do_co_req, &srco); if (bs) { - qemu_coroutine_enter(co); + bdrv_coroutine_enter(bs, co); BDRV_POLL_WHILE(bs, !srco.finished); } else { qemu_coroutine_enter(co); @@ -899,7 +942,7 @@ static void co_read_response(void *opaque) s->co_recv = qemu_coroutine_create(aio_read_response, opaque); } - aio_co_wake(s->co_recv); + aio_co_enter(s->aio_context, s->co_recv); } static void co_write_request(void *opaque) @@ -1174,15 +1217,15 @@ static void sd_parse_filename(const char *filename, QDict *options, return; } - if (cfg.host) { - qdict_set_default_str(options, "host", cfg.host); - } - if (cfg.port) { - snprintf(buf, sizeof(buf), "%d", cfg.port); - qdict_set_default_str(options, "port", buf); - } if (cfg.path) { - qdict_set_default_str(options, "path", cfg.path); + qdict_set_default_str(options, "server.path", cfg.path); + qdict_set_default_str(options, "server.type", "unix"); + } else { + qdict_set_default_str(options, "server.type", "inet"); + qdict_set_default_str(options, "server.host", + cfg.host ?: SD_DEFAULT_ADDR); + snprintf(buf, sizeof(buf), "%d", cfg.port ?: SD_DEFAULT_PORT); + qdict_set_default_str(options, "server.port", buf); } qdict_set_default_str(options, "vdi", cfg.vdi); qdict_set_default_str(options, "tag", cfg.tag); @@ -1510,18 +1553,6 @@ static QemuOptsList runtime_opts = { .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), .desc = { { - .name = "host", - .type = QEMU_OPT_STRING, - }, - { - .name = "port", - .type = QEMU_OPT_STRING, - }, - { - .name = "path", - .type = QEMU_OPT_STRING, - }, - { .name = "vdi", .type = QEMU_OPT_STRING, }, @@ -1543,7 +1574,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags, int ret, fd; uint32_t vid = 0; BDRVSheepdogState *s = bs->opaque; - const char *host, *port, *path, *vdi, *snap_id_str, *tag; + const char *vdi, *snap_id_str, *tag; uint64_t snap_id; char *buf = NULL; QemuOpts *opts; @@ -1560,20 +1591,17 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags, goto err_no_fd; } - host = qemu_opt_get(opts, "host"); - port = qemu_opt_get(opts, "port"); - path = qemu_opt_get(opts, "path"); + s->addr = sd_server_config(options, errp); + if (!s->addr) { + ret = -EINVAL; + goto err_no_fd; + } + vdi = qemu_opt_get(opts, "vdi"); snap_id_str = qemu_opt_get(opts, "snap-id"); snap_id = qemu_opt_get_number(opts, "snap-id", CURRENT_VDI_ID); tag = qemu_opt_get(opts, "tag"); - if ((host || port) && path) { - error_setg(errp, "can't use 'path' together with 'host' or 'port'"); - ret = -EINVAL; - goto err_no_fd; - } - if (!vdi) { error_setg(errp, "parameter 'vdi' is missing"); ret = -EINVAL; @@ -1604,8 +1632,6 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags, goto err_no_fd; } - s->addr = sd_socket_address(path, host, port); - QLIST_INIT(&s->inflight_aio_head); QLIST_INIT(&s->failed_aio_head); QLIST_INIT(&s->inflight_aiocb_head); diff --git a/block/snapshot.c b/block/snapshot.c index bf5c2ca5e1..06b1185d27 100644 --- a/block/snapshot.c +++ b/block/snapshot.c @@ -27,6 +27,7 @@ #include "block/block_int.h" #include "qapi/error.h" #include "qapi/qmp/qerror.h" +#include "qapi/qmp/qstring.h" QemuOptsList internal_snapshot_opts = { .name = "snapshot", @@ -189,14 +190,33 @@ int bdrv_snapshot_goto(BlockDriverState *bs, } if (bs->file) { + BlockDriverState *file; + QDict *options = qdict_clone_shallow(bs->options); + QDict *file_options; + + file = bs->file->bs; + /* Prevent it from getting deleted when detached from bs */ + bdrv_ref(file); + + qdict_extract_subqdict(options, &file_options, "file."); + QDECREF(file_options); + qdict_put(options, "file", qstring_from_str(bdrv_get_node_name(file))); + drv->bdrv_close(bs); - ret = bdrv_snapshot_goto(bs->file->bs, snapshot_id); - open_ret = drv->bdrv_open(bs, NULL, bs->open_flags, NULL); + bdrv_unref_child(bs, bs->file); + bs->file = NULL; + + ret = bdrv_snapshot_goto(file, snapshot_id); + open_ret = drv->bdrv_open(bs, options, bs->open_flags, NULL); + QDECREF(options); if (open_ret < 0) { - bdrv_unref(bs->file->bs); + bdrv_unref(file); bs->drv = NULL; return open_ret; } + + assert(bs->file->bs == file); + bdrv_unref(file); return ret; } diff --git a/block/ssh.c b/block/ssh.c index 278e66faa6..df09f6c5ba 100644 --- a/block/ssh.c +++ b/block/ssh.c @@ -601,6 +601,14 @@ static InetSocketAddress *ssh_config(QDict *options, Error **errp) goto out; } + /* + * FIXME .numeric, .to, .ipv4 or .ipv6 don't work with -drive. + * .to doesn't matter, it's ignored anyway. + * That's because when @options come from -blockdev or + * blockdev_add, members are typed according to the QAPI schema, + * but when they come from -drive, they're all QString. The + * visitor expects the former. + */ iv = qobject_input_visitor_new(crumpled_addr); visit_type_InetSocketAddress(iv, NULL, &inet, &local_error); if (local_error) { @@ -673,7 +681,7 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options, } /* Open the socket and connect. */ - s->sock = inet_connect_saddr(s->inet, errp, NULL, NULL); + s->sock = inet_connect_saddr(s->inet, NULL, NULL, errp); if (s->sock < 0) { ret = -EIO; goto err; diff --git a/block/trace-events b/block/trace-events index 0bc5c0adf1..9a71c7fb04 100644 --- a/block/trace-events +++ b/block/trace-events @@ -110,3 +110,20 @@ qed_aio_write_data(void *s, void *acb, int ret, uint64_t offset, size_t len) "s qed_aio_write_prefill(void *s, void *acb, uint64_t start, size_t len, uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64 qed_aio_write_postfill(void *s, void *acb, uint64_t start, size_t len, uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64 qed_aio_write_main(void *s, void *acb, int ret, uint64_t offset, size_t len) "s %p acb %p ret %d offset %"PRIu64" len %zu" + +# block/vxhs.c +vxhs_iio_callback(int error) "ctx is NULL: error %d" +vxhs_iio_callback_chnfail(int err, int error) "QNIO channel failed, no i/o %d, %d" +vxhs_iio_callback_unknwn(int opcode, int err) "unexpected opcode %d, errno %d" +vxhs_aio_rw_invalid(int req) "Invalid I/O request iodir %d" +vxhs_aio_rw_ioerr(char *guid, int iodir, uint64_t size, uint64_t off, void *acb, int ret, int err) "IO ERROR (vDisk %s) FOR : Read/Write = %d size = %"PRIu64" offset = %"PRIu64" ACB = %p. Error = %d, errno = %d" +vxhs_get_vdisk_stat_err(char *guid, int ret, int err) "vDisk (%s) stat ioctl failed, ret = %d, errno = %d" +vxhs_get_vdisk_stat(char *vdisk_guid, uint64_t vdisk_size) "vDisk %s stat ioctl returned size %"PRIu64 +vxhs_complete_aio(void *acb, uint64_t ret) "aio failed acb %p ret %"PRIu64 +vxhs_parse_uri_filename(const char *filename) "URI passed via bdrv_parse_filename %s" +vxhs_open_vdiskid(const char *vdisk_id) "Opening vdisk-id %s" +vxhs_open_hostinfo(char *of_vsa_addr, int port) "Adding host %s:%d to BDRVVXHSState" +vxhs_open_iio_open(const char *host) "Failed to connect to storage agent on host %s" +vxhs_parse_uri_hostinfo(char *host, int port) "Host: IP %s, Port %d" +vxhs_close(char *vdisk_guid) "Closing vdisk %s" +vxhs_get_creds(const char *cacert, const char *client_key, const char *client_cert) "cacert %s, client_key %s, client_cert %s" diff --git a/block/vvfat.c b/block/vvfat.c index af5153d27d..b509d55642 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -1156,8 +1156,6 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags, s->current_cluster=0xffffffff; - /* read only is the default for safety */ - bs->read_only = true; s->qcow = NULL; s->qcow_filename = NULL; s->fat2 = NULL; @@ -1169,11 +1167,24 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags, s->sector_count = cyls * heads * secs - (s->first_sectors_number - 1); if (qemu_opt_get_bool(opts, "rw", false)) { - ret = enable_write_target(bs, errp); + if (!bdrv_is_read_only(bs)) { + ret = enable_write_target(bs, errp); + if (ret < 0) { + goto fail; + } + } else { + ret = -EPERM; + error_setg(errp, + "Unable to set VVFAT to 'rw' when drive is read-only"); + goto fail; + } + } else { + /* read only is the default for safety */ + ret = bdrv_set_read_only(bs, true, &local_err); if (ret < 0) { + error_propagate(errp, local_err); goto fail; } - bs->read_only = false; } bs->total_sectors = cyls * heads * secs; diff --git a/block/vxhs.c b/block/vxhs.c new file mode 100644 index 0000000000..9ffe9d3814 --- /dev/null +++ b/block/vxhs.c @@ -0,0 +1,575 @@ +/* + * QEMU Block driver for Veritas HyperScale (VxHS) + * + * Copyright (c) 2017 Veritas Technologies LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include <qnio/qnio_api.h> +#include <sys/param.h> +#include "block/block_int.h" +#include "qapi/qmp/qerror.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qstring.h" +#include "trace.h" +#include "qemu/uri.h" +#include "qapi/error.h" +#include "qemu/uuid.h" +#include "crypto/tlscredsx509.h" + +#define VXHS_OPT_FILENAME "filename" +#define VXHS_OPT_VDISK_ID "vdisk-id" +#define VXHS_OPT_SERVER "server" +#define VXHS_OPT_HOST "host" +#define VXHS_OPT_PORT "port" + +/* Only accessed under QEMU global mutex */ +static uint32_t vxhs_ref; + +typedef enum { + VDISK_AIO_READ, + VDISK_AIO_WRITE, +} VDISKAIOCmd; + +/* + * HyperScale AIO callbacks structure + */ +typedef struct VXHSAIOCB { + BlockAIOCB common; + int err; +} VXHSAIOCB; + +typedef struct VXHSvDiskHostsInfo { + void *dev_handle; /* Device handle */ + char *host; /* Host name or IP */ + int port; /* Host's port number */ +} VXHSvDiskHostsInfo; + +/* + * Structure per vDisk maintained for state + */ +typedef struct BDRVVXHSState { + VXHSvDiskHostsInfo vdisk_hostinfo; /* Per host info */ + char *vdisk_guid; + char *tlscredsid; /* tlscredsid */ +} BDRVVXHSState; + +static void vxhs_complete_aio_bh(void *opaque) +{ + VXHSAIOCB *acb = opaque; + BlockCompletionFunc *cb = acb->common.cb; + void *cb_opaque = acb->common.opaque; + int ret = 0; + + if (acb->err != 0) { + trace_vxhs_complete_aio(acb, acb->err); + ret = (-EIO); + } + + qemu_aio_unref(acb); + cb(cb_opaque, ret); +} + +/* + * Called from a libqnio thread + */ +static void vxhs_iio_callback(void *ctx, uint32_t opcode, uint32_t error) +{ + VXHSAIOCB *acb = NULL; + + switch (opcode) { + case IRP_READ_REQUEST: + case IRP_WRITE_REQUEST: + + /* + * ctx is VXHSAIOCB* + * ctx is NULL if error is QNIOERROR_CHANNEL_HUP + */ + if (ctx) { + acb = ctx; + } else { + trace_vxhs_iio_callback(error); + goto out; + } + + if (error) { + if (!acb->err) { + acb->err = error; + } + trace_vxhs_iio_callback(error); + } + + aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs), + vxhs_complete_aio_bh, acb); + break; + + default: + if (error == QNIOERROR_HUP) { + /* + * Channel failed, spontaneous notification, + * not in response to I/O + */ + trace_vxhs_iio_callback_chnfail(error, errno); + } else { + trace_vxhs_iio_callback_unknwn(opcode, error); + } + break; + } +out: + return; +} + +static QemuOptsList runtime_opts = { + .name = "vxhs", + .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), + .desc = { + { + .name = VXHS_OPT_FILENAME, + .type = QEMU_OPT_STRING, + .help = "URI to the Veritas HyperScale image", + }, + { + .name = VXHS_OPT_VDISK_ID, + .type = QEMU_OPT_STRING, + .help = "UUID of the VxHS vdisk", + }, + { + .name = "tls-creds", + .type = QEMU_OPT_STRING, + .help = "ID of the TLS/SSL credentials to use", + }, + { /* end of list */ } + }, +}; + +static QemuOptsList runtime_tcp_opts = { + .name = "vxhs_tcp", + .head = QTAILQ_HEAD_INITIALIZER(runtime_tcp_opts.head), + .desc = { + { + .name = VXHS_OPT_HOST, + .type = QEMU_OPT_STRING, + .help = "host address (ipv4 addresses)", + }, + { + .name = VXHS_OPT_PORT, + .type = QEMU_OPT_NUMBER, + .help = "port number on which VxHSD is listening (default 9999)", + .def_value_str = "9999" + }, + { /* end of list */ } + }, +}; + +/* + * Parse incoming URI and populate *options with the host + * and device information + */ +static int vxhs_parse_uri(const char *filename, QDict *options) +{ + URI *uri = NULL; + char *port; + int ret = 0; + + trace_vxhs_parse_uri_filename(filename); + uri = uri_parse(filename); + if (!uri || !uri->server || !uri->path) { + uri_free(uri); + return -EINVAL; + } + + qdict_put(options, VXHS_OPT_SERVER".host", qstring_from_str(uri->server)); + + if (uri->port) { + port = g_strdup_printf("%d", uri->port); + qdict_put(options, VXHS_OPT_SERVER".port", qstring_from_str(port)); + g_free(port); + } + + qdict_put(options, "vdisk-id", qstring_from_str(uri->path)); + + trace_vxhs_parse_uri_hostinfo(uri->server, uri->port); + uri_free(uri); + + return ret; +} + +static void vxhs_parse_filename(const char *filename, QDict *options, + Error **errp) +{ + if (qdict_haskey(options, "vdisk-id") || qdict_haskey(options, "server")) { + error_setg(errp, "vdisk-id/server and a file name may not be specified " + "at the same time"); + return; + } + + if (strstr(filename, "://")) { + int ret = vxhs_parse_uri(filename, options); + if (ret < 0) { + error_setg(errp, "Invalid URI. URI should be of the form " + " vxhs://<host_ip>:<port>/<vdisk-id>"); + } + } +} + +static int vxhs_init_and_ref(void) +{ + if (vxhs_ref++ == 0) { + if (iio_init(QNIO_VERSION, vxhs_iio_callback)) { + return -ENODEV; + } + } + return 0; +} + +static void vxhs_unref(void) +{ + if (--vxhs_ref == 0) { + iio_fini(); + } +} + +static void vxhs_get_tls_creds(const char *id, char **cacert, + char **key, char **cert, Error **errp) +{ + Object *obj; + QCryptoTLSCreds *creds; + QCryptoTLSCredsX509 *creds_x509; + + obj = object_resolve_path_component( + object_get_objects_root(), id); + + if (!obj) { + error_setg(errp, "No TLS credentials with id '%s'", + id); + return; + } + + creds_x509 = (QCryptoTLSCredsX509 *) + object_dynamic_cast(obj, TYPE_QCRYPTO_TLS_CREDS_X509); + + if (!creds_x509) { + error_setg(errp, "Object with id '%s' is not TLS credentials", + id); + return; + } + + creds = &creds_x509->parent_obj; + + if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) { + error_setg(errp, + "Expecting TLS credentials with a client endpoint"); + return; + } + + /* + * Get the cacert, client_cert and client_key file names. + */ + if (!creds->dir) { + error_setg(errp, "TLS object missing 'dir' property value"); + return; + } + + *cacert = g_strdup_printf("%s/%s", creds->dir, + QCRYPTO_TLS_CREDS_X509_CA_CERT); + *cert = g_strdup_printf("%s/%s", creds->dir, + QCRYPTO_TLS_CREDS_X509_CLIENT_CERT); + *key = g_strdup_printf("%s/%s", creds->dir, + QCRYPTO_TLS_CREDS_X509_CLIENT_KEY); +} + +static int vxhs_open(BlockDriverState *bs, QDict *options, + int bdrv_flags, Error **errp) +{ + BDRVVXHSState *s = bs->opaque; + void *dev_handlep; + QDict *backing_options = NULL; + QemuOpts *opts = NULL; + QemuOpts *tcp_opts = NULL; + char *of_vsa_addr = NULL; + Error *local_err = NULL; + const char *vdisk_id_opt; + const char *server_host_opt; + int ret = 0; + char *cacert = NULL; + char *client_key = NULL; + char *client_cert = NULL; + + ret = vxhs_init_and_ref(); + if (ret < 0) { + ret = -EINVAL; + goto out; + } + + /* Create opts info from runtime_opts and runtime_tcp_opts list */ + opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); + tcp_opts = qemu_opts_create(&runtime_tcp_opts, NULL, 0, &error_abort); + + qemu_opts_absorb_qdict(opts, options, &local_err); + if (local_err) { + ret = -EINVAL; + goto out; + } + + /* vdisk-id is the disk UUID */ + vdisk_id_opt = qemu_opt_get(opts, VXHS_OPT_VDISK_ID); + if (!vdisk_id_opt) { + error_setg(&local_err, QERR_MISSING_PARAMETER, VXHS_OPT_VDISK_ID); + ret = -EINVAL; + goto out; + } + + /* vdisk-id may contain a leading '/' */ + if (strlen(vdisk_id_opt) > UUID_FMT_LEN + 1) { + error_setg(&local_err, "vdisk-id cannot be more than %d characters", + UUID_FMT_LEN); + ret = -EINVAL; + goto out; + } + + s->vdisk_guid = g_strdup(vdisk_id_opt); + trace_vxhs_open_vdiskid(vdisk_id_opt); + + /* get the 'server.' arguments */ + qdict_extract_subqdict(options, &backing_options, VXHS_OPT_SERVER"."); + + qemu_opts_absorb_qdict(tcp_opts, backing_options, &local_err); + if (local_err != NULL) { + ret = -EINVAL; + goto out; + } + + server_host_opt = qemu_opt_get(tcp_opts, VXHS_OPT_HOST); + if (!server_host_opt) { + error_setg(&local_err, QERR_MISSING_PARAMETER, + VXHS_OPT_SERVER"."VXHS_OPT_HOST); + ret = -EINVAL; + goto out; + } + + if (strlen(server_host_opt) > MAXHOSTNAMELEN) { + error_setg(&local_err, "server.host cannot be more than %d characters", + MAXHOSTNAMELEN); + ret = -EINVAL; + goto out; + } + + /* check if we got tls-creds via the --object argument */ + s->tlscredsid = g_strdup(qemu_opt_get(opts, "tls-creds")); + if (s->tlscredsid) { + vxhs_get_tls_creds(s->tlscredsid, &cacert, &client_key, + &client_cert, &local_err); + if (local_err != NULL) { + ret = -EINVAL; + goto out; + } + trace_vxhs_get_creds(cacert, client_key, client_cert); + } + + s->vdisk_hostinfo.host = g_strdup(server_host_opt); + s->vdisk_hostinfo.port = g_ascii_strtoll(qemu_opt_get(tcp_opts, + VXHS_OPT_PORT), + NULL, 0); + + trace_vxhs_open_hostinfo(s->vdisk_hostinfo.host, + s->vdisk_hostinfo.port); + + of_vsa_addr = g_strdup_printf("of://%s:%d", + s->vdisk_hostinfo.host, + s->vdisk_hostinfo.port); + + /* + * Open qnio channel to storage agent if not opened before + */ + dev_handlep = iio_open(of_vsa_addr, s->vdisk_guid, 0, + cacert, client_key, client_cert); + if (dev_handlep == NULL) { + trace_vxhs_open_iio_open(of_vsa_addr); + ret = -ENODEV; + goto out; + } + s->vdisk_hostinfo.dev_handle = dev_handlep; + +out: + g_free(of_vsa_addr); + QDECREF(backing_options); + qemu_opts_del(tcp_opts); + qemu_opts_del(opts); + g_free(cacert); + g_free(client_key); + g_free(client_cert); + + if (ret < 0) { + vxhs_unref(); + error_propagate(errp, local_err); + g_free(s->vdisk_hostinfo.host); + g_free(s->vdisk_guid); + g_free(s->tlscredsid); + s->vdisk_guid = NULL; + } + + return ret; +} + +static const AIOCBInfo vxhs_aiocb_info = { + .aiocb_size = sizeof(VXHSAIOCB) +}; + +/* + * This allocates QEMU-VXHS callback for each IO + * and is passed to QNIO. When QNIO completes the work, + * it will be passed back through the callback. + */ +static BlockAIOCB *vxhs_aio_rw(BlockDriverState *bs, int64_t sector_num, + QEMUIOVector *qiov, int nb_sectors, + BlockCompletionFunc *cb, void *opaque, + VDISKAIOCmd iodir) +{ + VXHSAIOCB *acb = NULL; + BDRVVXHSState *s = bs->opaque; + size_t size; + uint64_t offset; + int iio_flags = 0; + int ret = 0; + void *dev_handle = s->vdisk_hostinfo.dev_handle; + + offset = sector_num * BDRV_SECTOR_SIZE; + size = nb_sectors * BDRV_SECTOR_SIZE; + acb = qemu_aio_get(&vxhs_aiocb_info, bs, cb, opaque); + + /* + * Initialize VXHSAIOCB. + */ + acb->err = 0; + + iio_flags = IIO_FLAG_ASYNC; + + switch (iodir) { + case VDISK_AIO_WRITE: + ret = iio_writev(dev_handle, acb, qiov->iov, qiov->niov, + offset, (uint64_t)size, iio_flags); + break; + case VDISK_AIO_READ: + ret = iio_readv(dev_handle, acb, qiov->iov, qiov->niov, + offset, (uint64_t)size, iio_flags); + break; + default: + trace_vxhs_aio_rw_invalid(iodir); + goto errout; + } + + if (ret != 0) { + trace_vxhs_aio_rw_ioerr(s->vdisk_guid, iodir, size, offset, + acb, ret, errno); + goto errout; + } + return &acb->common; + +errout: + qemu_aio_unref(acb); + return NULL; +} + +static BlockAIOCB *vxhs_aio_readv(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, + int nb_sectors, + BlockCompletionFunc *cb, void *opaque) +{ + return vxhs_aio_rw(bs, sector_num, qiov, nb_sectors, cb, + opaque, VDISK_AIO_READ); +} + +static BlockAIOCB *vxhs_aio_writev(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, + int nb_sectors, + BlockCompletionFunc *cb, void *opaque) +{ + return vxhs_aio_rw(bs, sector_num, qiov, nb_sectors, + cb, opaque, VDISK_AIO_WRITE); +} + +static void vxhs_close(BlockDriverState *bs) +{ + BDRVVXHSState *s = bs->opaque; + + trace_vxhs_close(s->vdisk_guid); + + g_free(s->vdisk_guid); + s->vdisk_guid = NULL; + + /* + * Close vDisk device + */ + if (s->vdisk_hostinfo.dev_handle) { + iio_close(s->vdisk_hostinfo.dev_handle); + s->vdisk_hostinfo.dev_handle = NULL; + } + + vxhs_unref(); + + /* + * Free the dynamically allocated host string etc + */ + g_free(s->vdisk_hostinfo.host); + g_free(s->tlscredsid); + s->tlscredsid = NULL; + s->vdisk_hostinfo.host = NULL; + s->vdisk_hostinfo.port = 0; +} + +static int64_t vxhs_get_vdisk_stat(BDRVVXHSState *s) +{ + int64_t vdisk_size = -1; + int ret = 0; + void *dev_handle = s->vdisk_hostinfo.dev_handle; + + ret = iio_ioctl(dev_handle, IOR_VDISK_STAT, &vdisk_size, 0); + if (ret < 0) { + trace_vxhs_get_vdisk_stat_err(s->vdisk_guid, ret, errno); + return -EIO; + } + + trace_vxhs_get_vdisk_stat(s->vdisk_guid, vdisk_size); + return vdisk_size; +} + +/* + * Returns the size of vDisk in bytes. This is required + * by QEMU block upper block layer so that it is visible + * to guest. + */ +static int64_t vxhs_getlength(BlockDriverState *bs) +{ + BDRVVXHSState *s = bs->opaque; + int64_t vdisk_size; + + vdisk_size = vxhs_get_vdisk_stat(s); + if (vdisk_size < 0) { + return -EIO; + } + + return vdisk_size; +} + +static BlockDriver bdrv_vxhs = { + .format_name = "vxhs", + .protocol_name = "vxhs", + .instance_size = sizeof(BDRVVXHSState), + .bdrv_file_open = vxhs_open, + .bdrv_parse_filename = vxhs_parse_filename, + .bdrv_close = vxhs_close, + .bdrv_getlength = vxhs_getlength, + .bdrv_aio_readv = vxhs_aio_readv, + .bdrv_aio_writev = vxhs_aio_writev, +}; + +static void bdrv_vxhs_init(void) +{ + bdrv_register(&bdrv_vxhs); +} + +block_init(bdrv_vxhs_init); diff --git a/blockdev-nbd.c b/blockdev-nbd.c index 7ea836b46e..8a11807df3 100644 --- a/blockdev-nbd.c +++ b/blockdev-nbd.c @@ -124,6 +124,7 @@ void qmp_nbd_server_start(SocketAddress *addr, goto error; } + /* TODO SOCKET_ADDRESS_KIND_FD where fd has AF_INET or AF_INET6 */ if (addr->type != SOCKET_ADDRESS_KIND_INET) { error_setg(errp, "TLS is only supported with IPv4/IPv6"); goto error; diff --git a/blockdev.c b/blockdev.c index c5b2c2c209..64282065d8 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1728,7 +1728,7 @@ static void external_snapshot_prepare(BlkActionState *common, bdrv_img_create(new_image_file, format, state->old_bs->filename, state->old_bs->drv->format_name, - NULL, size, flags, &local_err, false); + NULL, size, flags, false, &local_err); if (local_err) { error_propagate(errp, local_err); return; @@ -1772,6 +1772,8 @@ static void external_snapshot_prepare(BlkActionState *common, return; } + bdrv_set_aio_context(state->new_bs, state->aio_context); + /* This removes our old bs and adds the new bs. This is an operation that * can fail, so we need to do it in .prepare; undoing it for abort is * always possible. */ @@ -1789,8 +1791,6 @@ static void external_snapshot_commit(BlkActionState *common) ExternalSnapshotState *state = DO_UPCAST(ExternalSnapshotState, common, common); - bdrv_set_aio_context(state->new_bs, state->aio_context); - /* We don't need (or want) to use the transactional * bdrv_reopen_multiple() across all the entries at once, because we * don't want to abort all of them if one of them fails the reopen */ @@ -2835,7 +2835,7 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict) bs = bdrv_find_node(id); if (bs) { - qmp_x_blockdev_del(id, &local_err); + qmp_blockdev_del(id, &local_err); if (local_err) { error_report_err(local_err); } @@ -3142,7 +3142,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, } commit_active_start(has_job_id ? job_id : NULL, bs, base_bs, BLOCK_JOB_DEFAULT, speed, on_error, - filter_node_name, NULL, NULL, &local_err, false); + filter_node_name, NULL, NULL, false, &local_err); } else { BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs); if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) { @@ -3237,10 +3237,10 @@ static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn, if (source) { bdrv_img_create(backup->target, backup->format, source->filename, source->drv->format_name, NULL, - size, flags, &local_err, false); + size, flags, false, &local_err); } else { bdrv_img_create(backup->target, backup->format, NULL, NULL, NULL, - size, flags, &local_err, false); + size, flags, false, &local_err); } } @@ -3531,7 +3531,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) /* create new image w/o backing file */ assert(format); bdrv_img_create(arg->target, format, - NULL, NULL, NULL, size, flags, &local_err, false); + NULL, NULL, NULL, size, flags, false, &local_err); } else { switch (arg->mode) { case NEW_IMAGE_MODE_EXISTING: @@ -3541,7 +3541,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) bdrv_img_create(arg->target, format, source->filename, source->drv->format_name, - NULL, size, flags, &local_err, false); + NULL, size, flags, false, &local_err); break; default: abort(); @@ -3900,7 +3900,7 @@ fail: visit_free(v); } -void qmp_x_blockdev_del(const char *node_name, Error **errp) +void qmp_blockdev_del(const char *node_name, Error **errp) { AioContext *aio_context; BlockDriverState *bs; diff --git a/blockjob.c b/blockjob.c index 69126af97f..6e489327ff 100644 --- a/blockjob.c +++ b/blockjob.c @@ -68,6 +68,23 @@ static const BdrvChildRole child_job = { .stay_at_node = true, }; +static void block_job_drained_begin(void *opaque) +{ + BlockJob *job = opaque; + block_job_pause(job); +} + +static void block_job_drained_end(void *opaque) +{ + BlockJob *job = opaque; + block_job_resume(job); +} + +static const BlockDevOps block_job_dev_ops = { + .drained_begin = block_job_drained_begin, + .drained_end = block_job_drained_end, +}; + BlockJob *block_job_next(BlockJob *job) { if (!job) { @@ -205,11 +222,6 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, } job = g_malloc0(driver->instance_size); - error_setg(&job->blocker, "block device is in use by block job: %s", - BlockJobType_lookup[driver->job_type]); - block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort); - bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); - job->driver = driver; job->id = g_strdup(job_id); job->blk = blk; @@ -219,8 +231,15 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, job->paused = true; job->pause_count = 1; job->refcnt = 1; + + error_setg(&job->blocker, "block device is in use by block job: %s", + BlockJobType_lookup[driver->job_type]); + block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort); bs->job = job; + blk_set_dev_ops(blk, &block_job_dev_ops, job); + bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); + QLIST_INSERT_HEAD(&block_jobs, job, job_list); blk_add_aio_context_notifier(blk, block_job_attached_aio_context, @@ -250,16 +269,28 @@ static bool block_job_started(BlockJob *job) return job->co; } +/** + * All jobs must allow a pause point before entering their job proper. This + * ensures that jobs can be paused prior to being started, then resumed later. + */ +static void coroutine_fn block_job_co_entry(void *opaque) +{ + BlockJob *job = opaque; + + assert(job && job->driver && job->driver->start); + block_job_pause_point(job); + job->driver->start(job); +} + void block_job_start(BlockJob *job) { assert(job && !block_job_started(job) && job->paused && - !job->busy && job->driver->start); - job->co = qemu_coroutine_create(job->driver->start, job); - if (--job->pause_count == 0) { - job->paused = false; - job->busy = true; - qemu_coroutine_enter(job->co); - } + job->driver && job->driver->start); + job->co = qemu_coroutine_create(block_job_co_entry, job); + job->pause_count--; + job->busy = true; + job->paused = false; + bdrv_coroutine_enter(blk_bs(job->blk), job->co); } void block_job_ref(BlockJob *job) @@ -501,7 +532,7 @@ void block_job_user_resume(BlockJob *job) void block_job_enter(BlockJob *job) { if (job->co && !job->busy) { - qemu_coroutine_enter(job->co); + bdrv_coroutine_enter(blk_bs(job->blk), job->co); } } @@ -755,12 +786,16 @@ static void block_job_defer_to_main_loop_bh(void *opaque) /* Fetch BDS AioContext again, in case it has changed */ aio_context = blk_get_aio_context(data->job->blk); - aio_context_acquire(aio_context); + if (aio_context != data->aio_context) { + aio_context_acquire(aio_context); + } data->job->deferred_to_main_loop = false; data->fn(data->job, data->opaque); - aio_context_release(aio_context); + if (aio_context != data->aio_context) { + aio_context_release(aio_context); + } aio_context_release(data->aio_context); diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c index 1ad018a127..7f2018ede0 100644 --- a/bsd-user/mmap.c +++ b/bsd-user/mmap.c @@ -24,8 +24,7 @@ //#define DEBUG_MMAP -#if defined(CONFIG_USE_NPTL) -pthread_mutex_t mmap_mutex; +static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER; static int __thread mmap_lock_count; void mmap_lock(void) @@ -62,16 +61,6 @@ void mmap_fork_end(int child) else pthread_mutex_unlock(&mmap_mutex); } -#else -/* We aren't threadsafe to start with, so no need to worry about locking. */ -void mmap_lock(void) -{ -} - -void mmap_unlock(void) -{ -} -#endif /* NOTE: all the constants are the HOST ones, but addresses are target. */ int target_mprotect(abi_ulong start, abi_ulong len, int prot) diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h index 2b2b9184e0..b550cee0cb 100644 --- a/bsd-user/qemu.h +++ b/bsd-user/qemu.h @@ -209,10 +209,8 @@ abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size, abi_ulong new_addr); int target_msync(abi_ulong start, abi_ulong len, int flags); extern unsigned long last_brk; -#if defined(CONFIG_USE_NPTL) void mmap_fork_start(void); void mmap_fork_end(int child); -#endif /* main.c */ extern unsigned long x86_stack_size; diff --git a/chardev/char-socket.c b/chardev/char-socket.c index d7e92e1bd3..36ab0d633a 100644 --- a/chardev/char-socket.c +++ b/chardev/char-socket.c @@ -47,7 +47,6 @@ typedef struct { int max_size; int do_telnetopt; int do_nodelay; - int is_unix; int *read_msgfds; size_t read_msgfds_num; int *write_msgfds; @@ -358,6 +357,10 @@ static char *SocketAddress_to_str(const char *prefix, SocketAddress *addr, return g_strdup_printf("%sfd:%s%s", prefix, addr->u.fd.data->str, is_listen ? ",server" : ""); break; + case SOCKET_ADDRESS_KIND_VSOCK: + return g_strdup_printf("%svsock:%s:%s", prefix, + addr->u.vsock.data->cid, + addr->u.vsock.data->port); default: abort(); } @@ -825,7 +828,6 @@ static void qmp_chardev_open_socket(Chardev *chr, int64_t reconnect = sock->has_reconnect ? sock->reconnect : 0; QIOChannelSocket *sioc = NULL; - s->is_unix = addr->type == SOCKET_ADDRESS_KIND_UNIX; s->is_listen = is_listen; s->is_telnet = is_telnet; s->do_nodelay = do_nodelay; @@ -865,7 +867,8 @@ static void qmp_chardev_open_socket(Chardev *chr, s->addr = QAPI_CLONE(SocketAddress, sock->addr); qemu_chr_set_feature(chr, QEMU_CHAR_FEATURE_RECONNECTABLE); - if (s->is_unix) { + /* TODO SOCKET_ADDRESS_FD where fd has AF_UNIX */ + if (addr->type == SOCKET_ADDRESS_KIND_UNIX) { qemu_chr_set_feature(chr, QEMU_CHAR_FEATURE_FD_PASS); } @@ -320,9 +320,11 @@ numa="" tcmalloc="no" jemalloc="no" replication="yes" +vxhs="" supported_cpu="no" supported_os="no" +bogus_os="no" # parse CC options first for opt do @@ -520,11 +522,11 @@ ARCH= # Normalise host CPU name and set ARCH. # Note that this case should only have supported host CPUs, not guests. case "$cpu" in - ppc|ppc64|s390|s390x|x32) + ppc|ppc64|s390|s390x|sparc64|x32) cpu="$cpu" supported_cpu="yes" ;; - ia64|sparc64) + ia64) cpu="$cpu" ;; i386|i486|i586|i686|i86pc|BePC) @@ -549,6 +551,7 @@ case "$cpu" in ;; sparc|sun4[cdmuv]) cpu="sparc" + supported_cpu="yes" ;; *) # This will result in either an error or falling back to TCI later @@ -694,7 +697,10 @@ Linux) supported_os="yes" ;; *) - error_exit "Unsupported host OS $targetos" + # This is a fatal error, but don't report it yet, because we + # might be going to just print the --help text, or it might + # be the result of a missing compiler. + bogus_os="yes" ;; esac @@ -1178,6 +1184,10 @@ for opt do ;; --enable-replication) replication="yes" ;; + --disable-vxhs) vxhs="no" + ;; + --enable-vxhs) vxhs="yes" + ;; *) echo "ERROR: unknown option $opt" echo "Try '$0 --help' for more information" @@ -1186,21 +1196,6 @@ for opt do esac done -if ! has $python; then - error_exit "Python not found. Use --python=/path/to/python" -fi - -# Note that if the Python conditional here evaluates True we will exit -# with status 1 which is a shell 'false' value. -if ! $python -c 'import sys; sys.exit(sys.version_info < (2,6) or sys.version_info >= (3,))'; then - error_exit "Cannot use '$python', Python 2.6 or later is required." \ - "Note that Python 3 or later is not yet supported." \ - "Use --python=/path/to/python to specify a supported Python." -fi - -# Suppress writing compiled files -python="$python -B" - case "$cpu" in ppc) CPU_CFLAGS="-m32" @@ -1275,6 +1270,9 @@ for config in $mak_wilds; do default_target_list="${default_target_list} $(basename "$config" .mak)" done +# Enumerate public trace backends for --help output +trace_backend_list=$(echo $(grep -le '^PUBLIC = True$' scripts/tracetool/backend/*.py | sed -e 's/^.*\/\(.*\)\.py$/\1/')) + if test x"$show_help" = x"yes" ; then cat << EOF @@ -1328,7 +1326,7 @@ Advanced options (experts only): set block driver read-only whitelist (affects only QEMU, not qemu-img) --enable-trace-backends=B Set trace backend - Available backends: $($python $source_path/scripts/tracetool.py --list-backends) + Available backends: $trace_backend_list --with-trace-file=NAME Full PATH,NAME of file to store traces Default:trace-<pid> --disable-slirp disable SLIRP userspace network connectivity @@ -1422,12 +1420,28 @@ disabled with --disable-FEATURE, default is enabled if available: xfsctl xfsctl support qom-cast-debug cast debugging support tools build qemu-io, qemu-nbd and qemu-image tools + vxhs Veritas HyperScale vDisk backend support NOTE: The object files are built at the place where configure is launched EOF exit 0 fi +if ! has $python; then + error_exit "Python not found. Use --python=/path/to/python" +fi + +# Note that if the Python conditional here evaluates True we will exit +# with status 1 which is a shell 'false' value. +if ! $python -c 'import sys; sys.exit(sys.version_info < (2,6) or sys.version_info >= (3,))'; then + error_exit "Cannot use '$python', Python 2.6 or later is required." \ + "Note that Python 3 or later is not yet supported." \ + "Use --python=/path/to/python to specify a supported Python." +fi + +# Suppress writing compiled files +python="$python -B" + # Now we have handled --enable-tcg-interpreter and know we're not just # printing the help message, bail out if the host CPU isn't supported. if test "$ARCH" = "unknown"; then @@ -1460,6 +1474,14 @@ if ! compile_prog ; then error_exit "\"$cc\" cannot build an executable (is your linker broken?)" fi +if test "$bogus_os" = "yes"; then + # Now that we know that we're not printing the help and that + # the compiler works (so the results of the check_defines we used + # to identify the OS are reliable), if we didn't recognize the + # host OS we should stop now. + error_exit "Unrecognized host OS $targetos" +fi + # Check that the C++ compiler exists and works with the C compiler if has $cxx; then cat > $TMPC <<EOF @@ -3096,12 +3118,23 @@ fi ########################################## # glib support probe -glib_req_ver=2.22 +if test "$mingw32" = yes; then + glib_req_ver=2.30 +else + glib_req_ver=2.22 +fi glib_modules=gthread-2.0 if test "$modules" = yes; then glib_modules="$glib_modules gmodule-2.0" fi +# This workaround is required due to a bug in pkg-config file for glib as it +# doesn't define GLIB_STATIC_COMPILATION for pkg-config --static + +if test "$static" = yes -a "$mingw32" = yes; then + QEMU_CFLAGS="-DGLIB_STATIC_COMPILATION $QEMU_CFLAGS" +fi + for i in $glib_modules; do if $pkg_config --atleast-version=$glib_req_ver $i; then glib_cflags=$($pkg_config --cflags $i) @@ -4793,6 +4826,33 @@ if compile_prog "" "" ; then fi ########################################## +# Veritas HyperScale block driver VxHS +# Check if libvxhs is installed + +if test "$vxhs" != "no" ; then + cat > $TMPC <<EOF +#include <stdint.h> +#include <qnio/qnio_api.h> + +void *vxhs_callback; + +int main(void) { + iio_init(QNIO_VERSION, vxhs_callback); + return 0; +} +EOF + vxhs_libs="-lvxhs -lssl" + if compile_prog "" "$vxhs_libs" ; then + vxhs=yes + else + if test "$vxhs" = "yes" ; then + feature_not_found "vxhs block device" "Install libvxhs See github" + fi + vxhs=no + fi +fi + +########################################## # End of CC checks # After here, no more $cc or $ld runs @@ -5158,6 +5218,7 @@ echo "tcmalloc support $tcmalloc" echo "jemalloc support $jemalloc" echo "avx2 optimization $avx2_opt" echo "replication support $replication" +echo "VxHS block device $vxhs" if test "$sdl_too_old" = "yes"; then echo "-> Your SDL version is too old - please upgrade to have SDL support" @@ -5180,8 +5241,8 @@ if test "$supported_os" = "no"; then echo echo "WARNING: SUPPORT FOR THIS HOST OS WILL GO AWAY IN FUTURE RELEASES!" echo - echo "CPU host OS $targetos support is not currently maintained." - echo "The QEMU project intends to remove support for this host CPU in" + echo "Host OS $targetos support is not currently maintained." + echo "The QEMU project intends to remove support for this host OS in" echo "a future release if nobody volunteers to maintain it and to" echo "provide a build host for our continuous integration setup." echo "configure has succeeded and you can continue to build, but" @@ -5797,6 +5858,11 @@ if test "$pthread_setname_np" = "yes" ; then echo "CONFIG_PTHREAD_SETNAME_NP=y" >> $config_host_mak fi +if test "$vxhs" = "yes" ; then + echo "CONFIG_VXHS=y" >> $config_host_mak + echo "VXHS_LIBS=$vxhs_libs" >> $config_host_mak +fi + if test "$tcg_interpreter" = "yes"; then QEMU_INCLUDES="-I\$(SRC_PATH)/tcg/tci $QEMU_INCLUDES" elif test "$ARCH" = "sparc64" ; then diff --git a/cpu-exec-common.c b/cpu-exec-common.c index 0504a9457b..e81da276bb 100644 --- a/cpu-exec-common.c +++ b/cpu-exec-common.c @@ -35,7 +35,7 @@ void cpu_loop_exit_noexc(CPUState *cpu) #if defined(CONFIG_SOFTMMU) void cpu_reloading_memory_map(void) { - if (qemu_in_vcpu_thread()) { + if (qemu_in_vcpu_thread() && current_cpu->running) { /* The guest can in theory prolong the RCU critical section as long * as it feels like. The major problem with this is that because it * can do multiple reconfigurations of the memory map within the diff --git a/cpu-exec.c b/cpu-exec.c index 748cb66bca..63a56d0407 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -600,13 +600,13 @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb, /* Instruction counter expired. */ assert(use_icount); #ifndef CONFIG_USER_ONLY - if (cpu->icount_extra) { - /* Refill decrementer and continue execution. */ - cpu->icount_extra += insns_left; - insns_left = MIN(0xffff, cpu->icount_extra); - cpu->icount_extra -= insns_left; - cpu->icount_decr.u16.low = insns_left; - } else { + /* Ensure global icount has gone forward */ + cpu_update_icount(cpu); + /* Refill decrementer and continue execution. */ + insns_left = MIN(0xffff, cpu->icount_budget); + cpu->icount_decr.u16.low = insns_left; + cpu->icount_extra = cpu->icount_budget - insns_left; + if (!cpu->icount_extra) { /* Execute any remaining instructions, then let the main loop * handle the next event. */ @@ -202,14 +202,14 @@ void qemu_tcg_configure(QemuOpts *opts, Error **errp) } else if (use_icount) { error_setg(errp, "No MTTCG when icount is enabled"); } else { -#ifndef TARGET_SUPPORT_MTTCG +#ifndef TARGET_SUPPORTS_MTTCG error_report("Guest not yet converted to MTTCG - " "you may get unexpected results"); #endif if (!check_tcg_memory_orders_compatible()) { error_report("Guest expects a stronger memory ordering " "than the host provides"); - error_printf("This may cause strange/hard to debug errors"); + error_printf("This may cause strange/hard to debug errors\n"); } mttcg_enabled = true; } @@ -223,20 +223,51 @@ void qemu_tcg_configure(QemuOpts *opts, Error **errp) } } +/* The current number of executed instructions is based on what we + * originally budgeted minus the current state of the decrementing + * icount counters in extra/u16.low. + */ +static int64_t cpu_get_icount_executed(CPUState *cpu) +{ + return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra); +} + +/* + * Update the global shared timer_state.qemu_icount to take into + * account executed instructions. This is done by the TCG vCPU + * thread so the main-loop can see time has moved forward. + */ +void cpu_update_icount(CPUState *cpu) +{ + int64_t executed = cpu_get_icount_executed(cpu); + cpu->icount_budget -= executed; + +#ifdef CONFIG_ATOMIC64 + atomic_set__nocheck(&timers_state.qemu_icount, + atomic_read__nocheck(&timers_state.qemu_icount) + + executed); +#else /* FIXME: we need 64bit atomics to do this safely */ + timers_state.qemu_icount += executed; +#endif +} + int64_t cpu_get_icount_raw(void) { - int64_t icount; CPUState *cpu = current_cpu; - icount = timers_state.qemu_icount; - if (cpu) { + if (cpu && cpu->running) { if (!cpu->can_do_io) { fprintf(stderr, "Bad icount read\n"); exit(1); } - icount -= (cpu->icount_decr.u16.low + cpu->icount_extra); + /* Take into account what has run */ + cpu_update_icount(cpu); } - return icount; +#ifdef CONFIG_ATOMIC64 + return atomic_read__nocheck(&timers_state.qemu_icount); +#else /* FIXME: we need 64bit atomics to do this safely */ + return timers_state.qemu_icount; +#endif } /* Return the virtual CPU time, based on the instruction counter. */ @@ -1179,6 +1210,41 @@ static void handle_icount_deadline(void) } } +static void prepare_icount_for_run(CPUState *cpu) +{ + if (use_icount) { + int insns_left; + + /* These should always be cleared by process_icount_data after + * each vCPU execution. However u16.high can be raised + * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt + */ + g_assert(cpu->icount_decr.u16.low == 0); + g_assert(cpu->icount_extra == 0); + + cpu->icount_budget = tcg_get_icount_limit(); + insns_left = MIN(0xffff, cpu->icount_budget); + cpu->icount_decr.u16.low = insns_left; + cpu->icount_extra = cpu->icount_budget - insns_left; + } +} + +static void process_icount_data(CPUState *cpu) +{ + if (use_icount) { + /* Account for executed instructions */ + cpu_update_icount(cpu); + + /* Reset the counters */ + cpu->icount_decr.u16.low = 0; + cpu->icount_extra = 0; + cpu->icount_budget = 0; + + replay_account_executed_instructions(); + } +} + + static int tcg_cpu_exec(CPUState *cpu) { int ret; @@ -1189,20 +1255,6 @@ static int tcg_cpu_exec(CPUState *cpu) #ifdef CONFIG_PROFILER ti = profile_getclock(); #endif - if (use_icount) { - int64_t count; - int decr; - timers_state.qemu_icount -= (cpu->icount_decr.u16.low - + cpu->icount_extra); - cpu->icount_decr.u16.low = 0; - cpu->icount_extra = 0; - count = tcg_get_icount_limit(); - timers_state.qemu_icount += count; - decr = (count > 0xffff) ? 0xffff : count; - count -= decr; - cpu->icount_decr.u16.low = decr; - cpu->icount_extra = count; - } qemu_mutex_unlock_iothread(); cpu_exec_start(cpu); ret = cpu_exec(cpu); @@ -1211,15 +1263,6 @@ static int tcg_cpu_exec(CPUState *cpu) #ifdef CONFIG_PROFILER tcg_time += profile_getclock() - ti; #endif - if (use_icount) { - /* Fold pending instructions back into the - instruction counter, and clear the interrupt flag. */ - timers_state.qemu_icount -= (cpu->icount_decr.u16.low - + cpu->icount_extra); - cpu->icount_decr.u32 = 0; - cpu->icount_extra = 0; - replay_account_executed_instructions(); - } return ret; } @@ -1306,7 +1349,13 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg) if (cpu_can_run(cpu)) { int r; + + prepare_icount_for_run(cpu); + r = tcg_cpu_exec(cpu); + + process_icount_data(cpu); + if (r == EXCP_DEBUG) { cpu_handle_guest_debug(cpu); break; @@ -1392,6 +1441,8 @@ static void *qemu_tcg_cpu_thread_fn(void *arg) { CPUState *cpu = arg; + g_assert(!use_icount); + rcu_register_thread(); qemu_mutex_lock_iothread(); @@ -1434,8 +1485,6 @@ static void *qemu_tcg_cpu_thread_fn(void *arg) } } - handle_icount_deadline(); - atomic_mb_set(&cpu->exit_request, 0); qemu_tcg_wait_io_event(cpu); } diff --git a/crypto/block-luks.c b/crypto/block-luks.c index 4530f8241c..d5a31bbaeb 100644 --- a/crypto/block-luks.c +++ b/crypto/block-luks.c @@ -473,10 +473,10 @@ qcrypto_block_luks_load_key(QCryptoBlock *block, * then encrypted. */ rv = readfunc(block, + opaque, slot->key_offset * QCRYPTO_BLOCK_LUKS_SECTOR_SIZE, splitkey, splitkeylen, - errp, - opaque); + errp); if (rv < 0) { goto cleanup; } @@ -676,11 +676,10 @@ qcrypto_block_luks_open(QCryptoBlock *block, /* Read the entire LUKS header, minus the key material from * the underlying device */ - rv = readfunc(block, 0, + rv = readfunc(block, opaque, 0, (uint8_t *)&luks->header, sizeof(luks->header), - errp, - opaque); + errp); if (rv < 0) { ret = rv; goto fail; @@ -1246,7 +1245,7 @@ qcrypto_block_luks_create(QCryptoBlock *block, QCRYPTO_BLOCK_LUKS_SECTOR_SIZE; /* Reserve header space to match payload offset */ - initfunc(block, block->payload_offset, &local_err, opaque); + initfunc(block, opaque, block->payload_offset, &local_err); if (local_err) { error_propagate(errp, local_err); goto error; @@ -1268,11 +1267,10 @@ qcrypto_block_luks_create(QCryptoBlock *block, /* Write out the partition header and key slot headers */ - writefunc(block, 0, + writefunc(block, opaque, 0, (const uint8_t *)&luks->header, sizeof(luks->header), - &local_err, - opaque); + &local_err); /* Delay checking local_err until we've byte-swapped */ @@ -1297,12 +1295,11 @@ qcrypto_block_luks_create(QCryptoBlock *block, /* Write out the master key material, starting at the * sector immediately following the partition header. */ - if (writefunc(block, + if (writefunc(block, opaque, luks->header.key_slots[0].key_offset * QCRYPTO_BLOCK_LUKS_SECTOR_SIZE, splitkey, splitkeylen, - errp, - opaque) != splitkeylen) { + errp) != splitkeylen) { goto error; } diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak index 1e3bd2b8ca..78d7af03a2 100644 --- a/default-configs/arm-softmmu.mak +++ b/default-configs/arm-softmmu.mak @@ -29,6 +29,7 @@ CONFIG_LAN9118=y CONFIG_SMC91C111=y CONFIG_ALLWINNER_EMAC=y CONFIG_IMX_FEC=y +CONFIG_FTGMAC100=y CONFIG_DS1338=y CONFIG_PFLASH_CFI01=y CONFIG_PFLASH_CFI02=y diff --git a/default-configs/ppc-softmmu.mak b/default-configs/ppc-softmmu.mak index 09c1d45633..1f1cd85b12 100644 --- a/default-configs/ppc-softmmu.mak +++ b/default-configs/ppc-softmmu.mak @@ -45,6 +45,7 @@ CONFIG_OPENPIC_KVM=$(and $(CONFIG_E500),$(CONFIG_KVM)) CONFIG_PLATFORM_BUS=y CONFIG_ETSEC=y CONFIG_LIBDECNUMBER=y +CONFIG_SM501=y # For PReP CONFIG_SERIAL_ISA=y CONFIG_MC146818RTC=y diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak index 05c83356e1..f6ccb1bd86 100644 --- a/default-configs/ppc64-softmmu.mak +++ b/default-configs/ppc64-softmmu.mak @@ -47,6 +47,7 @@ CONFIG_OPENPIC_KVM=$(and $(CONFIG_E500),$(CONFIG_KVM)) CONFIG_PLATFORM_BUS=y CONFIG_ETSEC=y CONFIG_LIBDECNUMBER=y +CONFIG_SM501=y # For pSeries CONFIG_XICS=$(CONFIG_PSERIES) CONFIG_XICS_SPAPR=$(CONFIG_PSERIES) diff --git a/default-configs/ppcemb-softmmu.mak b/default-configs/ppcemb-softmmu.mak index 7f56004cda..94340de356 100644 --- a/default-configs/ppcemb-softmmu.mak +++ b/default-configs/ppcemb-softmmu.mak @@ -15,3 +15,4 @@ CONFIG_I8259=y CONFIG_XILINX=y CONFIG_XILINX_ETHLITE=y CONFIG_LIBDECNUMBER=y +CONFIG_SM501=y diff --git a/disas/cris.c b/disas/cris.c index 30217f17f9..2dd56deea4 100644 --- a/disas/cris.c +++ b/disas/cris.c @@ -2048,7 +2048,7 @@ print_with_operands (const struct cris_opcode *opcodep, { /* We're looking at [pc+], i.e. we need to output an immediate number, where the size can depend on different things. */ - long number; + int32_t number; int signedp = ((*cs == 'z' && (insn & 0x20)) || opcodep->match == BDAP_QUICK_OPCODE); @@ -2290,7 +2290,7 @@ print_with_operands (const struct cris_opcode *opcodep, if ((prefix_insn & 0x400) && (prefix_insn & 15) == 15) { - long number; + int32_t number; unsigned int nbytes; /* It's a value. Get its size. */ diff --git a/disas/microblaze.c b/disas/microblaze.c index 407c0a3ffa..7795a0bdb9 100644 --- a/disas/microblaze.c +++ b/disas/microblaze.c @@ -159,7 +159,7 @@ enum microblaze_instr_type { #define MIN_PVR_REGNUM 0 #define MAX_PVR_REGNUM 15 -#define REG_PC 32 /* PC */ +/* 32 is REG_PC */ #define REG_MSR 33 /* machine status reg */ #define REG_EAR 35 /* Exception reg */ #define REG_ESR 37 /* Exception reg */ diff --git a/docs/tracing.txt b/docs/tracing.txt index e14bb6dccc..8c0029beca 100644 --- a/docs/tracing.txt +++ b/docs/tracing.txt @@ -405,6 +405,9 @@ information. If used together with the "tcg" property, it adds a second "TCGv_env" argument that must point to the per-target global TCG register that points to the vCPU when guest code is executed (usually the "cpu_env" variable). +The "tcg" and "vcpu" properties are currently only honored in the root +./trace-events file. + The following example events: foo(uint32_t a) "a=%x" @@ -223,6 +223,12 @@ struct CPUAddressSpace { MemoryListener tcg_as_listener; }; +struct DirtyBitmapSnapshot { + ram_addr_t start; + ram_addr_t end; + unsigned long dirty[]; +}; + #endif #if !defined(CONFIG_USER_ONLY) @@ -1061,6 +1067,75 @@ bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, return dirty; } +DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty + (ram_addr_t start, ram_addr_t length, unsigned client) +{ + DirtyMemoryBlocks *blocks; + unsigned long align = 1UL << (TARGET_PAGE_BITS + BITS_PER_LEVEL); + ram_addr_t first = QEMU_ALIGN_DOWN(start, align); + ram_addr_t last = QEMU_ALIGN_UP(start + length, align); + DirtyBitmapSnapshot *snap; + unsigned long page, end, dest; + + snap = g_malloc0(sizeof(*snap) + + ((last - first) >> (TARGET_PAGE_BITS + 3))); + snap->start = first; + snap->end = last; + + page = first >> TARGET_PAGE_BITS; + end = last >> TARGET_PAGE_BITS; + dest = 0; + + rcu_read_lock(); + + blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); + + while (page < end) { + unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE; + unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE; + unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset); + + assert(QEMU_IS_ALIGNED(offset, (1 << BITS_PER_LEVEL))); + assert(QEMU_IS_ALIGNED(num, (1 << BITS_PER_LEVEL))); + offset >>= BITS_PER_LEVEL; + + bitmap_copy_and_clear_atomic(snap->dirty + dest, + blocks->blocks[idx] + offset, + num); + page += num; + dest += num >> BITS_PER_LEVEL; + } + + rcu_read_unlock(); + + if (tcg_enabled()) { + tlb_reset_dirty_range_all(start, length); + } + + return snap; +} + +bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap, + ram_addr_t start, + ram_addr_t length) +{ + unsigned long page, end; + + assert(start >= snap->start); + assert(start + length <= snap->end); + + end = TARGET_PAGE_ALIGN(start + length - snap->start) >> TARGET_PAGE_BITS; + page = (start - snap->start) >> TARGET_PAGE_BITS; + + while (page < end) { + if (test_bit(page, snap->dirty)) { + return true; + } + page++; + } + return false; +} + /* Called from RCU critical section */ hwaddr memory_region_section_get_iotlb(CPUState *cpu, MemoryRegionSection *section, @@ -1528,7 +1603,7 @@ static ram_addr_t find_ram_offset(ram_addr_t size) return offset; } -ram_addr_t last_ram_offset(void) +unsigned long last_ram_page(void) { RAMBlock *block; ram_addr_t last = 0; @@ -1538,7 +1613,7 @@ ram_addr_t last_ram_offset(void) last = MAX(last, block->offset + block->max_length); } rcu_read_unlock(); - return last; + return last >> TARGET_PAGE_BITS; } static void qemu_ram_setup_dump(void *addr, ram_addr_t size) @@ -1727,7 +1802,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) ram_addr_t old_ram_size, new_ram_size; Error *err = NULL; - old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS; + old_ram_size = last_ram_page(); qemu_mutex_lock_ramlist(); new_block->offset = find_ram_offset(new_block->max_length); @@ -1758,7 +1833,6 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) new_ram_size = MAX(old_ram_size, (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS); if (new_ram_size > old_ram_size) { - migration_bitmap_extend(old_ram_size, new_ram_size); dirty_memory_extend(old_ram_size, new_ram_size); } /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ, @@ -3236,75 +3310,33 @@ int64_t address_space_cache_init(MemoryRegionCache *cache, hwaddr len, bool is_write) { - hwaddr l, xlat; - MemoryRegion *mr; - void *ptr; - - assert(len > 0); - - l = len; - mr = address_space_translate(as, addr, &xlat, &l, is_write); - if (!memory_access_is_direct(mr, is_write)) { - return -EINVAL; - } - - l = address_space_extend_translation(as, addr, len, mr, xlat, l, is_write); - ptr = qemu_ram_ptr_length(mr->ram_block, xlat, &l); - - cache->xlat = xlat; - cache->is_write = is_write; - cache->mr = mr; - cache->ptr = ptr; - cache->len = l; - memory_region_ref(cache->mr); - - return l; + cache->len = len; + cache->as = as; + cache->xlat = addr; + return len; } void address_space_cache_invalidate(MemoryRegionCache *cache, hwaddr addr, hwaddr access_len) { - assert(cache->is_write); - invalidate_and_set_dirty(cache->mr, addr + cache->xlat, access_len); } void address_space_cache_destroy(MemoryRegionCache *cache) { - if (!cache->mr) { - return; - } - - if (xen_enabled()) { - xen_invalidate_map_cache_entry(cache->ptr); - } - memory_region_unref(cache->mr); - cache->mr = NULL; -} - -/* Called from RCU critical section. This function has the same - * semantics as address_space_translate, but it only works on a - * predefined range of a MemoryRegion that was mapped with - * address_space_cache_init. - */ -static inline MemoryRegion *address_space_translate_cached( - MemoryRegionCache *cache, hwaddr addr, hwaddr *xlat, - hwaddr *plen, bool is_write) -{ - assert(addr < cache->len && *plen <= cache->len - addr); - *xlat = addr + cache->xlat; - return cache->mr; + cache->as = NULL; } #define ARG1_DECL MemoryRegionCache *cache #define ARG1 cache #define SUFFIX _cached -#define TRANSLATE(...) address_space_translate_cached(cache, __VA_ARGS__) +#define TRANSLATE(addr, ...) \ + address_space_translate(cache->as, cache->xlat + (addr), __VA_ARGS__) #define IS_DIRECT(mr, is_write) true -#define MAP_RAM(mr, ofs) (cache->ptr + (ofs - cache->xlat)) -#define INVALIDATE(mr, ofs, len) ((void)0) -#define RCU_READ_LOCK() ((void)0) -#define RCU_READ_UNLOCK() ((void)0) +#define MAP_RAM(mr, ofs) qemu_map_ram_ptr((mr)->ram_block, ofs) +#define INVALIDATE(mr, ofs, len) invalidate_and_set_dirty(mr, ofs, len) +#define RCU_READ_LOCK() rcu_read_lock() +#define RCU_READ_UNLOCK() rcu_read_unlock() #include "memory_ldst.inc.c" /* virtual memory access for debug (includes writing to ROM) */ @@ -3349,9 +3381,9 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, * Allows code that needs to deal with migration bitmaps etc to still be built * target independent. */ -size_t qemu_target_page_bits(void) +size_t qemu_target_page_size(void) { - return TARGET_PAGE_BITS; + return TARGET_PAGE_SIZE; } #endif @@ -215,6 +215,9 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) info->ram->normal_bytes >> 10); monitor_printf(mon, "dirty sync count: %" PRIu64 "\n", info->ram->dirty_sync_count); + monitor_printf(mon, "page size: %" PRIu64 " kbytes\n", + info->ram->page_size >> 10); + if (info->ram->dirty_pages_rate) { monitor_printf(mon, "dirty pages rate: %" PRIu64 " pages\n", info->ram->dirty_pages_rate); @@ -265,13 +268,11 @@ void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict) caps = qmp_query_migrate_capabilities(NULL); if (caps) { - monitor_printf(mon, "capabilities: "); for (cap = caps; cap; cap = cap->next) { - monitor_printf(mon, "%s: %s ", + monitor_printf(mon, "%s: %s\n", MigrationCapability_lookup[cap->value->capability], cap->value->state ? "on" : "off"); } - monitor_printf(mon, "\n"); } qapi_free_MigrationCapabilityStatusList(caps); @@ -284,46 +285,44 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) params = qmp_query_migrate_parameters(NULL); if (params) { - monitor_printf(mon, "parameters:"); assert(params->has_compress_level); - monitor_printf(mon, " %s: %" PRId64, + monitor_printf(mon, "%s: %" PRId64 "\n", MigrationParameter_lookup[MIGRATION_PARAMETER_COMPRESS_LEVEL], params->compress_level); assert(params->has_compress_threads); - monitor_printf(mon, " %s: %" PRId64, + monitor_printf(mon, "%s: %" PRId64 "\n", MigrationParameter_lookup[MIGRATION_PARAMETER_COMPRESS_THREADS], params->compress_threads); assert(params->has_decompress_threads); - monitor_printf(mon, " %s: %" PRId64, + monitor_printf(mon, "%s: %" PRId64 "\n", MigrationParameter_lookup[MIGRATION_PARAMETER_DECOMPRESS_THREADS], params->decompress_threads); assert(params->has_cpu_throttle_initial); - monitor_printf(mon, " %s: %" PRId64, + monitor_printf(mon, "%s: %" PRId64 "\n", MigrationParameter_lookup[MIGRATION_PARAMETER_CPU_THROTTLE_INITIAL], params->cpu_throttle_initial); assert(params->has_cpu_throttle_increment); - monitor_printf(mon, " %s: %" PRId64, + monitor_printf(mon, "%s: %" PRId64 "\n", MigrationParameter_lookup[MIGRATION_PARAMETER_CPU_THROTTLE_INCREMENT], params->cpu_throttle_increment); - monitor_printf(mon, " %s: '%s'", + monitor_printf(mon, "%s: '%s'\n", MigrationParameter_lookup[MIGRATION_PARAMETER_TLS_CREDS], params->has_tls_creds ? params->tls_creds : ""); - monitor_printf(mon, " %s: '%s'", + monitor_printf(mon, "%s: '%s'\n", MigrationParameter_lookup[MIGRATION_PARAMETER_TLS_HOSTNAME], params->has_tls_hostname ? params->tls_hostname : ""); assert(params->has_max_bandwidth); - monitor_printf(mon, " %s: %" PRId64 " bytes/second", + monitor_printf(mon, "%s: %" PRId64 " bytes/second\n", MigrationParameter_lookup[MIGRATION_PARAMETER_MAX_BANDWIDTH], params->max_bandwidth); assert(params->has_downtime_limit); - monitor_printf(mon, " %s: %" PRId64 " milliseconds", + monitor_printf(mon, "%s: %" PRId64 " milliseconds\n", MigrationParameter_lookup[MIGRATION_PARAMETER_DOWNTIME_LIMIT], params->downtime_limit); assert(params->has_x_checkpoint_delay); - monitor_printf(mon, " %s: %" PRId64, + monitor_printf(mon, "%s: %" PRId64 "\n", MigrationParameter_lookup[MIGRATION_PARAMETER_X_CHECKPOINT_DELAY], params->x_checkpoint_delay); - monitor_printf(mon, "\n"); } qapi_free_MigrationParameters(params); diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c index 45e9a1f9b0..f3ebca4f7a 100644 --- a/hw/9pfs/9p-local.c +++ b/hw/9pfs/9p-local.c @@ -1098,8 +1098,13 @@ static int local_name_to_path(FsContext *ctx, V9fsPath *dir_path, { if (dir_path) { v9fs_path_sprintf(target, "%s/%s", dir_path->data, name); - } else { + } else if (strcmp(name, "/")) { v9fs_path_sprintf(target, "%s", name); + } else { + /* We want the path of the export root to be relative, otherwise + * "*at()" syscalls would treat it as "/" in the host. + */ + v9fs_path_sprintf(target, "%s", "."); } return 0; } diff --git a/hw/9pfs/9p-xattr.c b/hw/9pfs/9p-xattr.c index eec160b3c2..d05c1a1c1d 100644 --- a/hw/9pfs/9p-xattr.c +++ b/hw/9pfs/9p-xattr.c @@ -108,6 +108,7 @@ ssize_t v9fs_list_xattr(FsContext *ctx, const char *path, g_free(name); close_preserve_errno(dirfd); if (xattr_len < 0) { + g_free(orig_value); return -1; } diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index b8c0b99358..c80ba67389 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -539,14 +539,15 @@ static void coroutine_fn virtfs_reset(V9fsPDU *pdu) /* Free all fids */ while (s->fid_list) { + /* Get fid */ fidp = s->fid_list; + fidp->ref++; + + /* Clunk fid */ s->fid_list = fidp->next; + fidp->clunked = 1; - if (fidp->ref) { - fidp->clunked = 1; - } else { - free_fid(pdu, fidp); - } + put_fid(pdu, fidp); } } @@ -1550,6 +1551,10 @@ static void coroutine_fn v9fs_lcreate(void *opaque) err = -ENOENT; goto out_nofid; } + if (fidp->fid_type != P9_FID_NONE) { + err = -EINVAL; + goto out; + } flags = get_dotl_openflags(pdu->s, flags); err = v9fs_co_open2(pdu, fidp, &name, gid, @@ -2153,6 +2158,10 @@ static void coroutine_fn v9fs_create(void *opaque) err = -EINVAL; goto out_nofid; } + if (fidp->fid_type != P9_FID_NONE) { + err = -EINVAL; + goto out; + } if (perm & P9_STAT_MODE_DIR) { err = v9fs_co_mkdir(pdu, fidp, &name, perm & 0777, fidp->uid, -1, &stbuf); @@ -2379,8 +2388,10 @@ static void coroutine_fn v9fs_flush(void *opaque) * Wait for pdu to complete. */ qemu_co_queue_wait(&cancel_pdu->complete, NULL); - cancel_pdu->cancelled = 0; - pdu_free(cancel_pdu); + if (!qemu_co_queue_next(&cancel_pdu->complete)) { + cancel_pdu->cancelled = 0; + pdu_free(cancel_pdu); + } } pdu_complete(pdu, 7); } diff --git a/hw/acpi/tco.c b/hw/acpi/tco.c index b4adac88cd..05b9d7ba36 100644 --- a/hw/acpi/tco.c +++ b/hw/acpi/tco.c @@ -75,8 +75,6 @@ static void tco_timer_expired(void *opaque) if (pm->smi_en & ICH9_PMIO_SMI_EN_TCO_EN) { ich9_generate_smi(); - } else { - ich9_generate_nmi(); } tr->tco.rld = tr->tco.tmr; tco_timer_reload(tr); diff --git a/hw/acpi/vmgenid.c b/hw/acpi/vmgenid.c index 7a3ad17d66..a32b847fe0 100644 --- a/hw/acpi/vmgenid.c +++ b/hw/acpi/vmgenid.c @@ -205,9 +205,30 @@ static void vmgenid_handle_reset(void *opaque) memset(vms->vmgenid_addr_le, 0, ARRAY_SIZE(vms->vmgenid_addr_le)); } +static Property vmgenid_properties[] = { + DEFINE_PROP_BOOL("x-write-pointer-available", VmGenIdState, + write_pointer_available, true), + DEFINE_PROP_END_OF_LIST(), +}; + static void vmgenid_realize(DeviceState *dev, Error **errp) { VmGenIdState *vms = VMGENID(dev); + + if (!vms->write_pointer_available) { + error_setg(errp, "%s requires DMA write support in fw_cfg, " + "which this machine type does not provide", VMGENID_DEVICE); + return; + } + + /* Given that this function is executing, there is at least one VMGENID + * device. Check if there are several. + */ + if (!find_vmgenid_dev()) { + error_setg(errp, "at most one %s device is permitted", VMGENID_DEVICE); + return; + } + qemu_register_reset(vmgenid_handle_reset, vms); } @@ -218,6 +239,7 @@ static void vmgenid_device_class_init(ObjectClass *klass, void *data) dc->vmsd = &vmstate_vmgenid; dc->realize = vmgenid_realize; dc->hotpluggable = false; + dc->props = vmgenid_properties; object_class_property_add_str(klass, VMGENID_GUID, NULL, vmgenid_set_guid, NULL); diff --git a/hw/arm/allwinner-a10.c b/hw/arm/allwinner-a10.c index ca15d1c8cc..f62a9a3541 100644 --- a/hw/arm/allwinner-a10.c +++ b/hw/arm/allwinner-a10.c @@ -118,12 +118,6 @@ static void aw_a10_class_init(ObjectClass *oc, void *data) DeviceClass *dc = DEVICE_CLASS(oc); dc->realize = aw_a10_realize; - - /* - * Reason: creates an ARM CPU, thus use after free(), see - * arm_cpu_class_init() - */ - dc->cannot_destroy_with_object_finalize_yet = true; } static const TypeInfo aw_a10_type_info = { diff --git a/hw/arm/aspeed_soc.c b/hw/arm/aspeed_soc.c index 571e4f097b..4937e2bc83 100644 --- a/hw/arm/aspeed_soc.c +++ b/hw/arm/aspeed_soc.c @@ -19,6 +19,7 @@ #include "hw/char/serial.h" #include "qemu/log.h" #include "hw/i2c/aspeed_i2c.h" +#include "net/net.h" #define ASPEED_SOC_UART_5_BASE 0x00184000 #define ASPEED_SOC_IOMEM_SIZE 0x00200000 @@ -33,6 +34,8 @@ #define ASPEED_SOC_TIMER_BASE 0x1E782000 #define ASPEED_SOC_WDT_BASE 0x1E785000 #define ASPEED_SOC_I2C_BASE 0x1E78A000 +#define ASPEED_SOC_ETH1_BASE 0x1E660000 +#define ASPEED_SOC_ETH2_BASE 0x1E680000 static const int uart_irqs[] = { 9, 32, 33, 34, 10 }; static const int timer_irqs[] = { 16, 17, 18, 35, 36, 37, 38, 39, }; @@ -175,6 +178,10 @@ static void aspeed_soc_init(Object *obj) object_initialize(&s->wdt, sizeof(s->wdt), TYPE_ASPEED_WDT); object_property_add_child(obj, "wdt", OBJECT(&s->wdt), NULL); qdev_set_parent_bus(DEVICE(&s->wdt), sysbus_get_default()); + + object_initialize(&s->ftgmac100, sizeof(s->ftgmac100), TYPE_FTGMAC100); + object_property_add_child(obj, "ftgmac100", OBJECT(&s->ftgmac100), NULL); + qdev_set_parent_bus(DEVICE(&s->ftgmac100), sysbus_get_default()); } static void aspeed_soc_realize(DeviceState *dev, Error **errp) @@ -299,6 +306,20 @@ static void aspeed_soc_realize(DeviceState *dev, Error **errp) return; } sysbus_mmio_map(SYS_BUS_DEVICE(&s->wdt), 0, ASPEED_SOC_WDT_BASE); + + /* Net */ + qdev_set_nic_properties(DEVICE(&s->ftgmac100), &nd_table[0]); + object_property_set_bool(OBJECT(&s->ftgmac100), true, "aspeed", &err); + object_property_set_bool(OBJECT(&s->ftgmac100), true, "realized", + &local_err); + error_propagate(&err, local_err); + if (err) { + error_propagate(errp, err); + return; + } + sysbus_mmio_map(SYS_BUS_DEVICE(&s->ftgmac100), 0, ASPEED_SOC_ETH1_BASE); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->ftgmac100), 0, + qdev_get_gpio_in(DEVICE(&s->vic), 2)); } static void aspeed_soc_class_init(ObjectClass *oc, void *data) diff --git a/hw/arm/bcm2836.c b/hw/arm/bcm2836.c index 8451190a19..8c43291112 100644 --- a/hw/arm/bcm2836.c +++ b/hw/arm/bcm2836.c @@ -160,12 +160,6 @@ static void bcm2836_class_init(ObjectClass *oc, void *data) dc->props = bcm2836_props; dc->realize = bcm2836_realize; - - /* - * Reason: creates an ARM CPU, thus use after free(), see - * arm_cpu_class_init() - */ - dc->cannot_destroy_with_object_finalize_yet = true; } static const TypeInfo bcm2836_type_info = { diff --git a/hw/arm/boot.c b/hw/arm/boot.c index ff621e4b6a..c2720c8046 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -31,6 +31,9 @@ #define KERNEL_LOAD_ADDR 0x00010000 #define KERNEL64_LOAD_ADDR 0x00080000 +#define ARM64_TEXT_OFFSET_OFFSET 8 +#define ARM64_MAGIC_OFFSET 56 + typedef enum { FIXUP_NONE = 0, /* do nothing */ FIXUP_TERMINATOR, /* end of insns */ @@ -768,6 +771,49 @@ static uint64_t arm_load_elf(struct arm_boot_info *info, uint64_t *pentry, return ret; } +static uint64_t load_aarch64_image(const char *filename, hwaddr mem_base, + hwaddr *entry) +{ + hwaddr kernel_load_offset = KERNEL64_LOAD_ADDR; + uint8_t *buffer; + int size; + + /* On aarch64, it's the bootloader's job to uncompress the kernel. */ + size = load_image_gzipped_buffer(filename, LOAD_IMAGE_MAX_GUNZIP_BYTES, + &buffer); + + if (size < 0) { + gsize len; + + /* Load as raw file otherwise */ + if (!g_file_get_contents(filename, (char **)&buffer, &len, NULL)) { + return -1; + } + size = len; + } + + /* check the arm64 magic header value -- very old kernels may not have it */ + if (memcmp(buffer + ARM64_MAGIC_OFFSET, "ARM\x64", 4) == 0) { + uint64_t hdrvals[2]; + + /* The arm64 Image header has text_offset and image_size fields at 8 and + * 16 bytes into the Image header, respectively. The text_offset field + * is only valid if the image_size is non-zero. + */ + memcpy(&hdrvals, buffer + ARM64_TEXT_OFFSET_OFFSET, sizeof(hdrvals)); + if (hdrvals[1] != 0) { + kernel_load_offset = le64_to_cpu(hdrvals[0]); + } + } + + *entry = mem_base + kernel_load_offset; + rom_add_blob_fixed(filename, buffer, size, *entry); + + g_free(buffer); + + return size; +} + static void arm_load_kernel_notify(Notifier *notifier, void *data) { CPUState *cs; @@ -776,7 +822,7 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data) int is_linux = 0; uint64_t elf_entry, elf_low_addr, elf_high_addr; int elf_machine; - hwaddr entry, kernel_load_offset; + hwaddr entry; static const ARMInsnFixup *primary_loader; ArmLoadKernelNotifier *n = DO_UPCAST(ArmLoadKernelNotifier, notifier, notifier); @@ -841,14 +887,12 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data) if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { primary_loader = bootloader_aarch64; - kernel_load_offset = KERNEL64_LOAD_ADDR; elf_machine = EM_AARCH64; } else { primary_loader = bootloader; if (!info->write_board_setup) { primary_loader += BOOTLOADER_NO_BOARD_SETUP_OFFSET; } - kernel_load_offset = KERNEL_LOAD_ADDR; elf_machine = EM_ARM; } @@ -900,17 +944,15 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data) kernel_size = load_uimage(info->kernel_filename, &entry, NULL, &is_linux, NULL, NULL); } - /* On aarch64, it's the bootloader's job to uncompress the kernel. */ if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64) && kernel_size < 0) { - entry = info->loader_start + kernel_load_offset; - kernel_size = load_image_gzipped(info->kernel_filename, entry, - info->ram_size - kernel_load_offset); + kernel_size = load_aarch64_image(info->kernel_filename, + info->loader_start, &entry); is_linux = 1; - } - if (kernel_size < 0) { - entry = info->loader_start + kernel_load_offset; + } else if (kernel_size < 0) { + /* 32-bit ARM */ + entry = info->loader_start + KERNEL_LOAD_ADDR; kernel_size = load_image_targphys(info->kernel_filename, entry, - info->ram_size - kernel_load_offset); + info->ram_size - KERNEL_LOAD_ADDR); is_linux = 1; } if (kernel_size < 0) { diff --git a/hw/arm/digic.c b/hw/arm/digic.c index d60ea395f4..94f32637f0 100644 --- a/hw/arm/digic.c +++ b/hw/arm/digic.c @@ -101,12 +101,6 @@ static void digic_class_init(ObjectClass *oc, void *data) DeviceClass *dc = DEVICE_CLASS(oc); dc->realize = digic_realize; - - /* - * Reason: creates an ARM CPU, thus use after free(), see - * arm_cpu_class_init() - */ - dc->cannot_destroy_with_object_finalize_yet = true; } static const TypeInfo digic_type_info = { diff --git a/hw/arm/exynos4210.c b/hw/arm/exynos4210.c index 1d2b50cc4e..960f27e45a 100644 --- a/hw/arm/exynos4210.c +++ b/hw/arm/exynos4210.c @@ -32,6 +32,7 @@ #include "hw/arm/arm.h" #include "hw/loader.h" #include "hw/arm/exynos4210.h" +#include "hw/sd/sd.h" #include "hw/usb/hcd-ehci.h" #define EXYNOS4210_CHIPID_ADDR 0x10000000 @@ -72,6 +73,13 @@ #define EXYNOS4210_EXT_COMBINER_BASE_ADDR 0x10440000 #define EXYNOS4210_INT_COMBINER_BASE_ADDR 0x10448000 +/* SD/MMC host controllers */ +#define EXYNOS4210_SDHCI_CAPABILITIES 0x05E80080 +#define EXYNOS4210_SDHCI_BASE_ADDR 0x12510000 +#define EXYNOS4210_SDHCI_ADDR(n) (EXYNOS4210_SDHCI_BASE_ADDR + \ + 0x00010000 * (n)) +#define EXYNOS4210_SDHCI_NUMBER 4 + /* PMU SFR base address */ #define EXYNOS4210_PMU_BASE_ADDR 0x10020000 @@ -382,6 +390,27 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem, EXYNOS4210_UART3_FIFO_SIZE, 3, NULL, s->irq_table[exynos4210_get_irq(EXYNOS4210_UART_INT_GRP, 3)]); + /*** SD/MMC host controllers ***/ + for (n = 0; n < EXYNOS4210_SDHCI_NUMBER; n++) { + DeviceState *carddev; + BlockBackend *blk; + DriveInfo *di; + + dev = qdev_create(NULL, "generic-sdhci"); + qdev_prop_set_uint32(dev, "capareg", EXYNOS4210_SDHCI_CAPABILITIES); + qdev_init_nofail(dev); + + busdev = SYS_BUS_DEVICE(dev); + sysbus_mmio_map(busdev, 0, EXYNOS4210_SDHCI_ADDR(n)); + sysbus_connect_irq(busdev, 0, s->irq_table[exynos4210_get_irq(29, n)]); + + di = drive_get(IF_SD, 0, n); + blk = di ? blk_by_legacy_dinfo(di) : NULL; + carddev = qdev_create(qdev_get_child_bus(dev, "sd-bus"), TYPE_SD_CARD); + qdev_prop_set_drive(carddev, "drive", blk, &error_abort); + qdev_init_nofail(carddev); + } + /*** Display controller (FIMD) ***/ sysbus_create_varargs("exynos4210.fimd", EXYNOS4210_FIMD0_BASE_ADDR, s->irq_table[exynos4210_get_irq(11, 0)], diff --git a/hw/arm/exynos4_boards.c b/hw/arm/exynos4_boards.c index 0efa194054..4853c31802 100644 --- a/hw/arm/exynos4_boards.c +++ b/hw/arm/exynos4_boards.c @@ -22,6 +22,7 @@ */ #include "qemu/osdep.h" +#include "qemu/error-report.h" #include "qemu-common.h" #include "cpu.h" #include "sysemu/sysemu.h" @@ -101,9 +102,9 @@ static Exynos4210State *exynos4_boards_init_common(MachineState *machine, MachineClass *mc = MACHINE_GET_CLASS(machine); if (smp_cpus != EXYNOS4210_NCPUS && !qtest_enabled()) { - fprintf(stderr, "%s board supports only %d CPU cores. Ignoring smp_cpus" - " value.\n", - mc->name, EXYNOS4210_NCPUS); + error_report("%s board supports only %d CPU cores, ignoring smp_cpus" + " value", + mc->name, EXYNOS4210_NCPUS); } exynos4_board_binfo.ram_size = exynos4_board_ram_size[board_type]; diff --git a/hw/arm/fsl-imx25.c b/hw/arm/fsl-imx25.c index 2126f73ca0..9056f27bf8 100644 --- a/hw/arm/fsl-imx25.c +++ b/hw/arm/fsl-imx25.c @@ -290,11 +290,6 @@ static void fsl_imx25_class_init(ObjectClass *oc, void *data) dc->realize = fsl_imx25_realize; - /* - * Reason: creates an ARM CPU, thus use after free(), see - * arm_cpu_class_init() - */ - dc->cannot_destroy_with_object_finalize_yet = true; dc->desc = "i.MX25 SOC"; } diff --git a/hw/arm/fsl-imx31.c b/hw/arm/fsl-imx31.c index dd1c713ae3..d7e2d832b2 100644 --- a/hw/arm/fsl-imx31.c +++ b/hw/arm/fsl-imx31.c @@ -262,11 +262,6 @@ static void fsl_imx31_class_init(ObjectClass *oc, void *data) dc->realize = fsl_imx31_realize; - /* - * Reason: creates an ARM CPU, thus use after free(), see - * arm_cpu_class_init() - */ - dc->cannot_destroy_with_object_finalize_yet = true; dc->desc = "i.MX31 SOC"; } diff --git a/hw/arm/fsl-imx6.c b/hw/arm/fsl-imx6.c index 76dd8a48ca..6969e734ad 100644 --- a/hw/arm/fsl-imx6.c +++ b/hw/arm/fsl-imx6.c @@ -442,11 +442,6 @@ static void fsl_imx6_class_init(ObjectClass *oc, void *data) dc->realize = fsl_imx6_realize; - /* - * Reason: creates an ARM CPU, thus use after free(), see - * arm_cpu_class_init() - */ - dc->cannot_destroy_with_object_finalize_yet = true; dc->desc = "i.MX6 SOC"; } diff --git a/hw/arm/pxa2xx.c b/hw/arm/pxa2xx.c index cfee3929d9..eea551dc16 100644 --- a/hw/arm/pxa2xx.c +++ b/hw/arm/pxa2xx.c @@ -755,19 +755,18 @@ static void pxa2xx_ssp_reset(DeviceState *d) s->rx_start = s->rx_level = 0; } -static int pxa2xx_ssp_init(SysBusDevice *sbd) +static void pxa2xx_ssp_init(Object *obj) { - DeviceState *dev = DEVICE(sbd); - PXA2xxSSPState *s = PXA2XX_SSP(dev); - + DeviceState *dev = DEVICE(obj); + PXA2xxSSPState *s = PXA2XX_SSP(obj); + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); sysbus_init_irq(sbd, &s->irq); - memory_region_init_io(&s->iomem, OBJECT(s), &pxa2xx_ssp_ops, s, + memory_region_init_io(&s->iomem, obj, &pxa2xx_ssp_ops, s, "pxa2xx-ssp", 0x1000); sysbus_init_mmio(sbd, &s->iomem); s->bus = ssi_create_bus(dev, "ssi"); - return 0; } /* Real-Time Clock */ @@ -2321,10 +2320,8 @@ PXA2xxState *pxa255_init(MemoryRegion *address_space, unsigned int sdram_size) static void pxa2xx_ssp_class_init(ObjectClass *klass, void *data) { - SysBusDeviceClass *sdc = SYS_BUS_DEVICE_CLASS(klass); DeviceClass *dc = DEVICE_CLASS(klass); - sdc->init = pxa2xx_ssp_init; dc->reset = pxa2xx_ssp_reset; dc->vmsd = &vmstate_pxa2xx_ssp; } @@ -2333,6 +2330,7 @@ static const TypeInfo pxa2xx_ssp_info = { .name = TYPE_PXA2XX_SSP, .parent = TYPE_SYS_BUS_DEVICE, .instance_size = sizeof(PXA2xxSSPState), + .instance_init = pxa2xx_ssp_init, .class_init = pxa2xx_ssp_class_init, }; diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c index 9edcd49740..ea7a8094e1 100644 --- a/hw/arm/stellaris.c +++ b/hw/arm/stellaris.c @@ -108,7 +108,10 @@ static void gptm_reload(gptm_state *s, int n, int reset) } else if (s->mode[n] == 0xa) { /* PWM mode. Not implemented. */ } else { - hw_error("TODO: 16-bit timer mode 0x%x\n", s->mode[n]); + qemu_log_mask(LOG_UNIMP, + "GPTM: 16-bit timer mode unimplemented: 0x%x\n", + s->mode[n]); + return; } s->tick[n] = tick; timer_mod(s->timer[n], tick); @@ -149,7 +152,9 @@ static void gptm_tick(void *opaque) } else if (s->mode[n] == 0xa) { /* PWM mode. Not implemented. */ } else { - hw_error("TODO: 16-bit timer mode 0x%x\n", s->mode[n]); + qemu_log_mask(LOG_UNIMP, + "GPTM: 16-bit timer mode unimplemented: 0x%x\n", + s->mode[n]); } gptm_update_irq(s); } @@ -286,7 +291,8 @@ static void gptm_write(void *opaque, hwaddr offset, s->match_prescale[0] = value; break; default: - hw_error("gptm_write: Bad offset 0x%x\n", (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "GPTM: read at bad offset 0x%x\n", (int)offset); } gptm_update_irq(s); } @@ -425,7 +431,10 @@ static int ssys_board_class(const ssys_state *s) } /* for unknown classes, fall through */ default: - hw_error("ssys_board_class: Unknown class 0x%08x\n", did0); + /* This can only happen if the hardwired constant did0 value + * in this board's stellaris_board_info struct is wrong. + */ + g_assert_not_reached(); } } @@ -479,8 +488,7 @@ static uint64_t ssys_read(void *opaque, hwaddr offset, case DID0_CLASS_SANDSTORM: return pllcfg_sandstorm[xtal]; default: - hw_error("ssys_read: Unhandled class for PLLCFG read.\n"); - return 0; + g_assert_not_reached(); } } case 0x070: /* RCC2 */ @@ -512,7 +520,8 @@ static uint64_t ssys_read(void *opaque, hwaddr offset, case 0x1e4: /* USER1 */ return s->user1; default: - hw_error("ssys_read: Bad offset 0x%x\n", (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "SSYS: read at bad offset 0x%x\n", (int)offset); return 0; } } @@ -614,7 +623,8 @@ static void ssys_write(void *opaque, hwaddr offset, s->ldoarst = value; break; default: - hw_error("ssys_write: Bad offset 0x%x\n", (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "SSYS: write at bad offset 0x%x\n", (int)offset); } ssys_update(s); } @@ -748,7 +758,8 @@ static uint64_t stellaris_i2c_read(void *opaque, hwaddr offset, case 0x20: /* MCR */ return s->mcr; default: - hw_error("strllaris_i2c_read: Bad offset 0x%x\n", (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "stellaris_i2c: read at bad offset 0x%x\n", (int)offset); return 0; } } @@ -823,17 +834,18 @@ static void stellaris_i2c_write(void *opaque, hwaddr offset, s->mris &= ~value; break; case 0x20: /* MCR */ - if (value & 1) - hw_error( - "stellaris_i2c_write: Loopback not implemented\n"); - if (value & 0x20) - hw_error( - "stellaris_i2c_write: Slave mode not implemented\n"); + if (value & 1) { + qemu_log_mask(LOG_UNIMP, "stellaris_i2c: Loopback not implemented"); + } + if (value & 0x20) { + qemu_log_mask(LOG_UNIMP, + "stellaris_i2c: Slave mode not implemented"); + } s->mcr = value & 0x31; break; default: - hw_error("stellaris_i2c_write: Bad offset 0x%x\n", - (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "stellaris_i2c: write at bad offset 0x%x\n", (int)offset); } stellaris_i2c_update(s); } @@ -1057,8 +1069,8 @@ static uint64_t stellaris_adc_read(void *opaque, hwaddr offset, case 0x30: /* SAC */ return s->sac; default: - hw_error("strllaris_adc_read: Bad offset 0x%x\n", - (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "stellaris_adc: read at bad offset 0x%x\n", (int)offset); return 0; } } @@ -1078,8 +1090,9 @@ static void stellaris_adc_write(void *opaque, hwaddr offset, return; case 0x04: /* SSCTL */ if (value != 6) { - hw_error("ADC: Unimplemented sequence %" PRIx64 "\n", - value); + qemu_log_mask(LOG_UNIMP, + "ADC: Unimplemented sequence %" PRIx64 "\n", + value); } s->ssctl[n] = value; return; @@ -1110,13 +1123,14 @@ static void stellaris_adc_write(void *opaque, hwaddr offset, s->sspri = value; break; case 0x28: /* PSSI */ - hw_error("Not implemented: ADC sample initiate\n"); + qemu_log_mask(LOG_UNIMP, "ADC: sample initiate unimplemented"); break; case 0x30: /* SAC */ s->sac = value; break; default: - hw_error("stellaris_adc_write: Bad offset 0x%x\n", (int)offset); + qemu_log_mask(LOG_GUEST_ERROR, + "stellaris_adc: write at bad offset 0x%x\n", (int)offset); } stellaris_adc_update(s); } diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c index bc4e66b862..64f52f80a5 100644 --- a/hw/arm/xlnx-zynqmp.c +++ b/hw/arm/xlnx-zynqmp.c @@ -30,6 +30,8 @@ #define ARM_PHYS_TIMER_PPI 30 #define ARM_VIRT_TIMER_PPI 27 +#define GEM_REVISION 0x40070106 + #define GIC_BASE_ADDR 0xf9000000 #define GIC_DIST_ADDR 0xf9010000 #define GIC_CPU_ADDR 0xf9020000 @@ -334,8 +336,10 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp) qemu_check_nic_model(nd, TYPE_CADENCE_GEM); qdev_set_nic_properties(DEVICE(&s->gem[i]), nd); } + object_property_set_int(OBJECT(&s->gem[i]), GEM_REVISION, "revision", + &error_abort); object_property_set_int(OBJECT(&s->gem[i]), 2, "num-priority-queues", - &error_abort); + &error_abort); object_property_set_bool(OBJECT(&s->gem[i]), true, "realized", &err); if (err) { error_propagate(errp, err); @@ -439,12 +443,6 @@ static void xlnx_zynqmp_class_init(ObjectClass *oc, void *data) dc->props = xlnx_zynqmp_props; dc->realize = xlnx_zynqmp_realize; - - /* - * Reason: creates an ARM CPU, thus use after free(), see - * arm_cpu_class_init() - */ - dc->cannot_destroy_with_object_finalize_yet = true; } static const TypeInfo xlnx_zynqmp_type_info = { diff --git a/hw/block/fdc.c b/hw/block/fdc.c index a328693d15..2e629b398b 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -2521,8 +2521,8 @@ static void fdctrl_result_timer(void *opaque) } /* Init functions */ -static void fdctrl_connect_drives(FDCtrl *fdctrl, Error **errp, - DeviceState *fdc_dev) +static void fdctrl_connect_drives(FDCtrl *fdctrl, DeviceState *fdc_dev, + Error **errp) { unsigned int i; FDrive *drive; @@ -2675,7 +2675,7 @@ static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, } floppy_bus_create(fdctrl, &fdctrl->bus, dev); - fdctrl_connect_drives(fdctrl, errp, dev); + fdctrl_connect_drives(fdctrl, dev, errp); } static const MemoryRegionPortio fdc_portio_list[] = { diff --git a/hw/char/exynos4210_uart.c b/hw/char/exynos4210_uart.c index b75f28d473..bff706ab3a 100644 --- a/hw/char/exynos4210_uart.c +++ b/hw/char/exynos4210_uart.c @@ -102,7 +102,7 @@ typedef struct Exynos4210UartReg { uint32_t reset_value; } Exynos4210UartReg; -static Exynos4210UartReg exynos4210_uart_regs[] = { +static const Exynos4210UartReg exynos4210_uart_regs[] = { {"ULCON", ULCON, 0x00000000}, {"UCON", UCON, 0x00003000}, {"UFCON", UFCON, 0x00000000}, @@ -220,7 +220,7 @@ static uint8_t fifo_retrieve(Exynos4210UartFIFO *q) return ret; } -static int fifo_elements_number(Exynos4210UartFIFO *q) +static int fifo_elements_number(const Exynos4210UartFIFO *q) { if (q->sp < q->rp) { return q->size - q->rp + q->sp; @@ -229,7 +229,7 @@ static int fifo_elements_number(Exynos4210UartFIFO *q) return q->sp - q->rp; } -static int fifo_empty_elements_number(Exynos4210UartFIFO *q) +static int fifo_empty_elements_number(const Exynos4210UartFIFO *q) { return q->size - fifo_elements_number(q); } @@ -245,7 +245,7 @@ static void fifo_reset(Exynos4210UartFIFO *q) q->rp = 0; } -static uint32_t exynos4210_uart_Tx_FIFO_trigger_level(Exynos4210UartState *s) +static uint32_t exynos4210_uart_Tx_FIFO_trigger_level(const Exynos4210UartState *s) { uint32_t level = 0; uint32_t reg; diff --git a/hw/core/machine.c b/hw/core/machine.c index 0d92672203..ada9eea483 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -396,6 +396,11 @@ static void machine_class_init(ObjectClass *oc, void *data) mc->default_ram_size = 128 * M_BYTE; mc->rom_file_has_mr = true; + /* numa node memory size aligned on 8MB by default. + * On Linux, each node's border has to be 8MB aligned + */ + mc->numa_mem_align_shift = 23; + object_class_property_add_str(oc, "accel", machine_get_accel, machine_set_accel, &error_abort); object_class_property_set_description(oc, "accel", diff --git a/hw/core/null-machine.c b/hw/core/null-machine.c index 27c8369b57..864832db34 100644 --- a/hw/core/null-machine.c +++ b/hw/core/null-machine.c @@ -40,6 +40,12 @@ static void machine_none_init(MachineState *mch) memory_region_allocate_system_memory(ram, NULL, "ram", mch->ram_size); memory_region_add_subregion(get_system_memory(), 0, ram); } + + if (mch->kernel_filename) { + error_report("The -kernel parameter is not supported " + "(use the generic 'loader' device instead)."); + exit(1); + } } static void machine_none_machine_init(MachineClass *mc) diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index c34be1c1ba..79c2014135 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -124,8 +124,12 @@ static void release_drive(Object *obj, const char *name, void *opaque) BlockBackend **ptr = qdev_get_prop_ptr(dev, prop); if (*ptr) { + AioContext *ctx = blk_get_aio_context(*ptr); + + aio_context_acquire(ctx); blockdev_auto_del(*ptr); blk_detach_dev(*ptr, dev); + aio_context_release(ctx); } } @@ -405,7 +409,7 @@ void qdev_prop_set_drive(DeviceState *dev, const char *name, if (value) { ref = blk_name(value); if (!*ref) { - BlockDriverState *bs = blk_bs(value); + const BlockDriverState *bs = blk_bs(value); if (bs) { ref = bdrv_get_node_name(bs); } diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c index 6ab4265eb4..fa3617db2d 100644 --- a/hw/core/qdev-properties.c +++ b/hw/core/qdev-properties.c @@ -1010,7 +1010,8 @@ void qdev_prop_set_string(DeviceState *dev, const char *name, const char *value) object_property_set_str(OBJECT(dev), value, name, &error_abort); } -void qdev_prop_set_macaddr(DeviceState *dev, const char *name, uint8_t *value) +void qdev_prop_set_macaddr(DeviceState *dev, const char *name, + const uint8_t *value) { char str[2 * 6 + 5 + 1]; snprintf(str, sizeof(str), "%02x:%02x:%02x:%02x:%02x:%02x", diff --git a/hw/core/qdev.c b/hw/core/qdev.c index 1e7fb33246..02b632f6b3 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -39,9 +39,9 @@ #include "qapi-event.h" #include "migration/migration.h" -int qdev_hotplug = 0; +bool qdev_hotplug = false; static bool qdev_hot_added = false; -static bool qdev_hot_removed = false; +bool qdev_hot_removed = false; const VMStateDescription *qdev_get_vmsd(DeviceState *dev) { @@ -271,40 +271,6 @@ HotplugHandler *qdev_get_hotplug_handler(DeviceState *dev) return hotplug_ctrl; } -void qdev_unplug(DeviceState *dev, Error **errp) -{ - DeviceClass *dc = DEVICE_GET_CLASS(dev); - HotplugHandler *hotplug_ctrl; - HotplugHandlerClass *hdc; - - if (dev->parent_bus && !qbus_is_hotpluggable(dev->parent_bus)) { - error_setg(errp, QERR_BUS_NO_HOTPLUG, dev->parent_bus->name); - return; - } - - if (!dc->hotpluggable) { - error_setg(errp, QERR_DEVICE_NO_HOTPLUG, - object_get_typename(OBJECT(dev))); - return; - } - - qdev_hot_removed = true; - - hotplug_ctrl = qdev_get_hotplug_handler(dev); - /* hotpluggable device MUST have HotplugHandler, if it doesn't - * then something is very wrong with it */ - g_assert(hotplug_ctrl); - - /* If device supports async unplug just request it to be done, - * otherwise just remove it synchronously */ - hdc = HOTPLUG_HANDLER_GET_CLASS(hotplug_ctrl); - if (hdc->unplug_request) { - hotplug_handler_unplug_request(hotplug_ctrl, dev, errp); - } else { - hotplug_handler_unplug(hotplug_ctrl, dev, errp); - } -} - static int qdev_reset_one(DeviceState *dev, void *opaque) { device_reset(dev); @@ -385,7 +351,7 @@ void qdev_machine_creation_done(void) * ok, initial machine setup is done, starting from now we can * only create hotpluggable devices */ - qdev_hotplug = 1; + qdev_hotplug = true; } bool qdev_machine_modified(void) @@ -1037,13 +1003,6 @@ static bool device_get_hotplugged(Object *obj, Error **err) return dev->hotplugged; } -static void device_set_hotplugged(Object *obj, bool value, Error **err) -{ - DeviceState *dev = DEVICE(obj); - - dev->hotplugged = value; -} - static void device_initfn(Object *obj) { DeviceState *dev = DEVICE(obj); @@ -1063,7 +1022,7 @@ static void device_initfn(Object *obj) object_property_add_bool(obj, "hotpluggable", device_get_hotpluggable, NULL, NULL); object_property_add_bool(obj, "hotplugged", - device_get_hotplugged, device_set_hotplugged, + device_get_hotplugged, NULL, &error_abort); class = object_get_class(OBJECT(dev)); diff --git a/hw/display/cg3.c b/hw/display/cg3.c index 1174220394..03d9197f71 100644 --- a/hw/display/cg3.c +++ b/hw/display/cg3.c @@ -26,7 +26,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu-common.h" -#include "cpu.h" #include "qemu/error-report.h" #include "ui/console.h" #include "hw/sysbus.h" @@ -114,8 +113,8 @@ static void cg3_update_display(void *opaque) for (y = 0; y < height; y++) { int update = s->full_update; - page = (y * width) & TARGET_PAGE_MASK; - update |= memory_region_get_dirty(&s->vram_mem, page, page + width, + page = y * width; + update |= memory_region_get_dirty(&s->vram_mem, page, width, DIRTY_MEMORY_VGA); if (update) { if (y_start < 0) { @@ -148,8 +147,7 @@ static void cg3_update_display(void *opaque) } if (page_max >= page_min) { memory_region_reset_dirty(&s->vram_mem, - page_min, page_max - page_min + TARGET_PAGE_SIZE, - DIRTY_MEMORY_VGA); + page_min, page_max - page_min, DIRTY_MEMORY_VGA); } /* vsync interrupt? */ if (s->regs[0] & CG3_CR_ENABLE_INTS) { @@ -305,8 +303,7 @@ static void cg3_realizefn(DeviceState *dev, Error **errp) vmstate_register_ram_global(&s->rom); fcode_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, CG3_ROM_FILE); if (fcode_filename) { - ret = load_image_targphys(fcode_filename, s->prom_addr, - FCODE_MAX_ROM_SIZE); + ret = load_image_mr(fcode_filename, &s->rom); g_free(fcode_filename); if (ret < 0 || ret > FCODE_MAX_ROM_SIZE) { error_report("cg3: could not load prom '%s'", CG3_ROM_FILE); @@ -371,7 +368,6 @@ static Property cg3_properties[] = { DEFINE_PROP_UINT16("width", CG3State, width, -1), DEFINE_PROP_UINT16("height", CG3State, height, -1), DEFINE_PROP_UINT16("depth", CG3State, depth, -1), - DEFINE_PROP_UINT64("prom-addr", CG3State, prom_addr, -1), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/display/cirrus_vga_rop2.h b/hw/display/cirrus_vga_rop2.h index b86bcd6e09..b208b7348a 100644 --- a/hw/display/cirrus_vga_rop2.h +++ b/hw/display/cirrus_vga_rop2.h @@ -29,8 +29,8 @@ #elif DEPTH == 24 #define PUTPIXEL(s, a, c) do { \ ROP_OP(s, a, c); \ - ROP_OP(s, a + 1, (col >> 8)); \ - ROP_OP(s, a + 2, (col >> 16)); \ + ROP_OP(s, a + 1, (c >> 8)); \ + ROP_OP(s, a + 2, (c >> 16)); \ } while (0) #elif DEPTH == 32 #define PUTPIXEL(s, a, c) ROP_OP_32(s, a, c) diff --git a/hw/display/exynos4210_fimd.c b/hw/display/exynos4210_fimd.c index e5be713406..fd0b2bec65 100644 --- a/hw/display/exynos4210_fimd.c +++ b/hw/display/exynos4210_fimd.c @@ -1263,6 +1263,7 @@ static void exynos4210_fimd_update(void *opaque) Exynos4210fimdState *s = (Exynos4210fimdState *)opaque; DisplaySurface *surface; Exynos4210fimdWindow *w; + DirtyBitmapSnapshot *snap; int i, line; hwaddr fb_line_addr, inc_size; int scrn_height; @@ -1291,10 +1292,12 @@ static void exynos4210_fimd_update(void *opaque) memory_region_sync_dirty_bitmap(w->mem_section.mr); host_fb_addr = w->host_fb_addr; fb_line_addr = w->mem_section.offset_within_region; + snap = memory_region_snapshot_and_clear_dirty(w->mem_section.mr, + fb_line_addr, inc_size * scrn_height, DIRTY_MEMORY_VGA); for (line = 0; line < scrn_height; line++) { - is_dirty = memory_region_get_dirty(w->mem_section.mr, - fb_line_addr, scrn_width, DIRTY_MEMORY_VGA); + is_dirty = memory_region_snapshot_get_dirty(w->mem_section.mr, + snap, fb_line_addr, scrn_width); if (s->invalidate || is_dirty) { if (first_line == -1) { @@ -1309,9 +1312,7 @@ static void exynos4210_fimd_update(void *opaque) fb_line_addr += inc_size; is_dirty = false; } - memory_region_reset_dirty(w->mem_section.mr, - w->mem_section.offset_within_region, - w->fb_len, DIRTY_MEMORY_VGA); + g_free(snap); blend = true; } } diff --git a/hw/display/framebuffer.c b/hw/display/framebuffer.c index 25aa46c8c7..d7310d25f2 100644 --- a/hw/display/framebuffer.c +++ b/hw/display/framebuffer.c @@ -67,7 +67,7 @@ void framebuffer_update_display( int *first_row, /* Input and output. */ int *last_row /* Output only */) { - hwaddr src_len; + DirtyBitmapSnapshot *snap; uint8_t *dest; uint8_t *src; int first, last = 0; @@ -78,7 +78,6 @@ void framebuffer_update_display( i = *first_row; *first_row = -1; - src_len = (hwaddr)src_width * rows; mem = mem_section->mr; if (!mem) { @@ -102,9 +101,10 @@ void framebuffer_update_display( src += i * src_width; dest += i * dest_row_pitch; + snap = memory_region_snapshot_and_clear_dirty(mem, addr, src_width * rows, + DIRTY_MEMORY_VGA); for (; i < rows; i++) { - dirty = memory_region_get_dirty(mem, addr, src_width, - DIRTY_MEMORY_VGA); + dirty = memory_region_snapshot_get_dirty(mem, snap, addr, src_width); if (dirty || invalidate) { fn(opaque, dest, src, cols, dest_col_pitch); if (first == -1) @@ -115,11 +115,10 @@ void framebuffer_update_display( src += src_width; dest += dest_row_pitch; } + g_free(snap); if (first < 0) { return; } - memory_region_reset_dirty(mem, mem_section->offset_within_region, src_len, - DIRTY_MEMORY_VGA); *first_row = first; *last_row = last; } diff --git a/hw/display/g364fb.c b/hw/display/g364fb.c index 8cdc205dd9..86557d14a9 100644 --- a/hw/display/g364fb.c +++ b/hw/display/g364fb.c @@ -64,17 +64,8 @@ typedef struct G364State { static inline int check_dirty(G364State *s, ram_addr_t page) { - return memory_region_get_dirty(&s->mem_vram, page, G364_PAGE_SIZE, - DIRTY_MEMORY_VGA); -} - -static inline void reset_dirty(G364State *s, - ram_addr_t page_min, ram_addr_t page_max) -{ - memory_region_reset_dirty(&s->mem_vram, - page_min, - page_max + G364_PAGE_SIZE - page_min - 1, - DIRTY_MEMORY_VGA); + return memory_region_test_and_clear_dirty(&s->mem_vram, page, G364_PAGE_SIZE, + DIRTY_MEMORY_VGA); } static void g364fb_draw_graphic8(G364State *s) @@ -83,7 +74,7 @@ static void g364fb_draw_graphic8(G364State *s) int i, w; uint8_t *vram; uint8_t *data_display, *dd; - ram_addr_t page, page_min, page_max; + ram_addr_t page; int x, y; int xmin, xmax; int ymin, ymax; @@ -114,8 +105,6 @@ static void g364fb_draw_graphic8(G364State *s) } page = 0; - page_min = (ram_addr_t)-1; - page_max = 0; x = y = 0; xmin = s->width; @@ -137,9 +126,6 @@ static void g364fb_draw_graphic8(G364State *s) if (check_dirty(s, page)) { if (y < ymin) ymin = ymax = y; - if (page_min == (ram_addr_t)-1) - page_min = page; - page_max = page; if (x < xmin) xmin = x; for (i = 0; i < G364_PAGE_SIZE; i++) { @@ -196,10 +182,7 @@ static void g364fb_draw_graphic8(G364State *s) ymax = y; } else { int dy; - if (page_min != (ram_addr_t)-1) { - reset_dirty(s, page_min, page_max); - page_min = (ram_addr_t)-1; - page_max = 0; + if (xmax || ymax) { dpy_gfx_update(s->con, xmin, ymin, xmax - xmin + 1, ymax - ymin + 1); xmin = s->width; @@ -219,9 +202,8 @@ static void g364fb_draw_graphic8(G364State *s) } done: - if (page_min != (ram_addr_t)-1) { + if (xmax || ymax) { dpy_gfx_update(s->con, xmin, ymin, xmax - xmin + 1, ymax - ymin + 1); - reset_dirty(s, page_min, page_max); } } diff --git a/hw/display/qxl.c b/hw/display/qxl.c index 0d02f0efe6..4d94cecd72 100644 --- a/hw/display/qxl.c +++ b/hw/display/qxl.c @@ -26,6 +26,7 @@ #include "qemu/queue.h" #include "qemu/atomic.h" #include "sysemu/sysemu.h" +#include "migration/migration.h" #include "trace.h" #include "qxl.h" @@ -304,6 +305,16 @@ void qxl_spice_reset_cursor(PCIQXLDevice *qxl) qxl->ssd.cursor = cursor_builtin_hidden(); } +static uint32_t qxl_crc32(const uint8_t *p, unsigned len) +{ + /* + * zlib xors the seed with 0xffffffff, and xors the result + * again with 0xffffffff; Both are not done with linux's crc32, + * which we want to be compatible with, so undo that. + */ + return crc32(0xffffffff, p, len) ^ 0xffffffff; +} + static ram_addr_t qxl_rom_size(void) { #define QXL_REQUIRED_SZ (sizeof(QXLRom) + sizeof(QXLModes) + sizeof(qxl_modes)) @@ -368,6 +379,18 @@ static void init_qxl_rom(PCIQXLDevice *d) rom->num_pages = cpu_to_le32(num_pages); rom->ram_header_offset = cpu_to_le32(d->vga.vram_size - ram_header_size); + if (d->xres && d->yres) { + /* needs linux kernel 4.12+ to work */ + rom->client_monitors_config.count = 1; + rom->client_monitors_config.heads[0].left = 0; + rom->client_monitors_config.heads[0].top = 0; + rom->client_monitors_config.heads[0].right = cpu_to_le32(d->xres); + rom->client_monitors_config.heads[0].bottom = cpu_to_le32(d->yres); + rom->client_monitors_config_crc = qxl_crc32( + (const uint8_t *)&rom->client_monitors_config, + sizeof(rom->client_monitors_config)); + } + d->shadow_rom = *rom; d->rom = rom; d->modes = modes; @@ -639,6 +662,30 @@ static int interface_get_command(QXLInstance *sin, struct QXLCommandExt *ext) qxl->guest_primary.commands++; qxl_track_command(qxl, ext); qxl_log_command(qxl, "cmd", ext); + { + /* + * Windows 8 drivers place qxl commands in the vram + * (instead of the ram) bar. We can't live migrate such a + * guest, so add a migration blocker in case we detect + * this, to avoid triggering the assert in pre_save(). + * + * https://cgit.freedesktop.org/spice/win32/qxl-wddm-dod/commit/?id=f6e099db39e7d0787f294d5fd0dce328b5210faa + */ + void *msg = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id); + if (msg != NULL && ( + msg < (void *)qxl->vga.vram_ptr || + msg > ((void *)qxl->vga.vram_ptr + qxl->vga.vram_size))) { + if (!qxl->migration_blocker) { + Error *local_err = NULL; + error_setg(&qxl->migration_blocker, + "qxl: guest bug: command not in ram bar"); + migrate_add_blocker(qxl->migration_blocker, &local_err); + if (local_err) { + error_report_err(local_err); + } + } + } + } trace_qxl_ring_command_get(qxl->id, qxl_mode_to_string(qxl->mode)); return true; default: @@ -986,16 +1033,6 @@ static void interface_set_client_capabilities(QXLInstance *sin, qxl_send_events(qxl, QXL_INTERRUPT_CLIENT); } -static uint32_t qxl_crc32(const uint8_t *p, unsigned len) -{ - /* - * zlib xors the seed with 0xffffffff, and xors the result - * again with 0xffffffff; Both are not done with linux's crc32, - * which we want to be compatible with, so undo that. - */ - return crc32(0xffffffff, p, len) ^ 0xffffffff; -} - static bool qxl_rom_monitors_config_changed(QXLRom *rom, VDAgentMonitorsConfig *monitors_config, unsigned int max_outputs) @@ -1146,6 +1183,7 @@ static void qxl_enter_vga_mode(PCIQXLDevice *d) update_displaychangelistener(&d->ssd.dcl, GUI_REFRESH_INTERVAL_DEFAULT); qemu_spice_create_host_primary(&d->ssd); d->mode = QXL_MODE_VGA; + qemu_spice_display_switch(&d->ssd, d->ssd.ds); vga_dirty_log_start(&d->vga); graphic_hw_update(d->vga.con); } @@ -1235,6 +1273,12 @@ static void qxl_hard_reset(PCIQXLDevice *d, int loadvm) qemu_spice_create_host_memslot(&d->ssd); qxl_soft_reset(d); + if (d->migration_blocker) { + migrate_del_blocker(d->migration_blocker); + error_free(d->migration_blocker); + d->migration_blocker = NULL; + } + if (startstop) { qemu_spice_display_start(); } @@ -2365,6 +2409,8 @@ static Property qxl_properties[] = { #if SPICE_SERVER_VERSION >= 0x000c06 /* release 0.12.6 */ DEFINE_PROP_UINT16("max_outputs", PCIQXLDevice, max_outputs, 0), #endif + DEFINE_PROP_UINT32("xres", PCIQXLDevice, xres, 0), + DEFINE_PROP_UINT32("yres", PCIQXLDevice, yres, 0), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/display/qxl.h b/hw/display/qxl.h index d2d49dd933..f6556adb73 100644 --- a/hw/display/qxl.h +++ b/hw/display/qxl.h @@ -40,6 +40,7 @@ typedef struct PCIQXLDevice { uint32_t cmdlog; uint32_t guest_bug; + Error *migration_blocker; enum qxl_mode mode; uint32_t cmdflags; @@ -118,6 +119,8 @@ typedef struct PCIQXLDevice { uint32_t vram_size_mb; uint32_t vram32_size_mb; uint32_t vgamem_size_mb; + uint32_t xres; + uint32_t yres; /* qxl_render_update state */ int render_update_cookie_num; diff --git a/hw/display/sm501.c b/hw/display/sm501.c index 040a0b93f2..2094adbc9c 100644 --- a/hw/display/sm501.c +++ b/hw/display/sm501.c @@ -2,6 +2,7 @@ * QEMU SM501 Device * * Copyright (c) 2008 Shin-ichiro KAWASAKI + * Copyright (c) 2016 BALATON Zoltan * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -23,6 +24,7 @@ */ #include "qemu/osdep.h" +#include "qemu/cutils.h" #include "qapi/error.h" #include "qemu-common.h" #include "cpu.h" @@ -31,6 +33,7 @@ #include "ui/console.h" #include "hw/devices.h" #include "hw/sysbus.h" +#include "hw/pci/pci.h" #include "qemu/range.h" #include "ui/pixel_ops.h" #include "exec/address-spaces.h" @@ -38,10 +41,13 @@ /* * Status: 2010/05/07 * - Minimum implementation for Linux console : mmio regs and CRT layer. - * - 2D grapihcs acceleration partially supported : only fill rectangle. + * - 2D graphics acceleration partially supported : only fill rectangle. * - * TODO: + * Status: 2016/12/04 + * - Misc fixes: endianness, hardware cursor * - Panel support + * + * TODO: * - Touch panel support * - USB support * - UART support @@ -49,395 +55,396 @@ * - Performance tuning */ -//#define DEBUG_SM501 -//#define DEBUG_BITBLT +/*#define DEBUG_SM501*/ +/*#define DEBUG_BITBLT*/ #ifdef DEBUG_SM501 #define SM501_DPRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) #else -#define SM501_DPRINTF(fmt, ...) do {} while(0) +#define SM501_DPRINTF(fmt, ...) do {} while (0) #endif - #define MMIO_BASE_OFFSET 0x3e00000 +#define MMIO_SIZE 0x200000 +#define DC_PALETTE_ENTRIES (0x400 * 3) /* SM501 register definitions taken from "linux/include/linux/sm501-regs.h" */ /* System Configuration area */ /* System config base */ -#define SM501_SYS_CONFIG (0x000000) +#define SM501_SYS_CONFIG (0x000000) /* config 1 */ -#define SM501_SYSTEM_CONTROL (0x000000) +#define SM501_SYSTEM_CONTROL (0x000000) -#define SM501_SYSCTRL_PANEL_TRISTATE (1<<0) -#define SM501_SYSCTRL_MEM_TRISTATE (1<<1) -#define SM501_SYSCTRL_CRT_TRISTATE (1<<2) +#define SM501_SYSCTRL_PANEL_TRISTATE (1 << 0) +#define SM501_SYSCTRL_MEM_TRISTATE (1 << 1) +#define SM501_SYSCTRL_CRT_TRISTATE (1 << 2) -#define SM501_SYSCTRL_PCI_SLAVE_BURST_MASK (3<<4) -#define SM501_SYSCTRL_PCI_SLAVE_BURST_1 (0<<4) -#define SM501_SYSCTRL_PCI_SLAVE_BURST_2 (1<<4) -#define SM501_SYSCTRL_PCI_SLAVE_BURST_4 (2<<4) -#define SM501_SYSCTRL_PCI_SLAVE_BURST_8 (3<<4) +#define SM501_SYSCTRL_PCI_SLAVE_BURST_MASK (3 << 4) +#define SM501_SYSCTRL_PCI_SLAVE_BURST_1 (0 << 4) +#define SM501_SYSCTRL_PCI_SLAVE_BURST_2 (1 << 4) +#define SM501_SYSCTRL_PCI_SLAVE_BURST_4 (2 << 4) +#define SM501_SYSCTRL_PCI_SLAVE_BURST_8 (3 << 4) -#define SM501_SYSCTRL_PCI_CLOCK_RUN_EN (1<<6) -#define SM501_SYSCTRL_PCI_RETRY_DISABLE (1<<7) -#define SM501_SYSCTRL_PCI_SUBSYS_LOCK (1<<11) -#define SM501_SYSCTRL_PCI_BURST_READ_EN (1<<15) +#define SM501_SYSCTRL_PCI_CLOCK_RUN_EN (1 << 6) +#define SM501_SYSCTRL_PCI_RETRY_DISABLE (1 << 7) +#define SM501_SYSCTRL_PCI_SUBSYS_LOCK (1 << 11) +#define SM501_SYSCTRL_PCI_BURST_READ_EN (1 << 15) /* miscellaneous control */ -#define SM501_MISC_CONTROL (0x000004) +#define SM501_MISC_CONTROL (0x000004) -#define SM501_MISC_BUS_SH (0x0) -#define SM501_MISC_BUS_PCI (0x1) -#define SM501_MISC_BUS_XSCALE (0x2) -#define SM501_MISC_BUS_NEC (0x6) -#define SM501_MISC_BUS_MASK (0x7) +#define SM501_MISC_BUS_SH (0x0) +#define SM501_MISC_BUS_PCI (0x1) +#define SM501_MISC_BUS_XSCALE (0x2) +#define SM501_MISC_BUS_NEC (0x6) +#define SM501_MISC_BUS_MASK (0x7) -#define SM501_MISC_VR_62MB (1<<3) -#define SM501_MISC_CDR_RESET (1<<7) -#define SM501_MISC_USB_LB (1<<8) -#define SM501_MISC_USB_SLAVE (1<<9) -#define SM501_MISC_BL_1 (1<<10) -#define SM501_MISC_MC (1<<11) -#define SM501_MISC_DAC_POWER (1<<12) -#define SM501_MISC_IRQ_INVERT (1<<16) -#define SM501_MISC_SH (1<<17) +#define SM501_MISC_VR_62MB (1 << 3) +#define SM501_MISC_CDR_RESET (1 << 7) +#define SM501_MISC_USB_LB (1 << 8) +#define SM501_MISC_USB_SLAVE (1 << 9) +#define SM501_MISC_BL_1 (1 << 10) +#define SM501_MISC_MC (1 << 11) +#define SM501_MISC_DAC_POWER (1 << 12) +#define SM501_MISC_IRQ_INVERT (1 << 16) +#define SM501_MISC_SH (1 << 17) -#define SM501_MISC_HOLD_EMPTY (0<<18) -#define SM501_MISC_HOLD_8 (1<<18) -#define SM501_MISC_HOLD_16 (2<<18) -#define SM501_MISC_HOLD_24 (3<<18) -#define SM501_MISC_HOLD_32 (4<<18) -#define SM501_MISC_HOLD_MASK (7<<18) +#define SM501_MISC_HOLD_EMPTY (0 << 18) +#define SM501_MISC_HOLD_8 (1 << 18) +#define SM501_MISC_HOLD_16 (2 << 18) +#define SM501_MISC_HOLD_24 (3 << 18) +#define SM501_MISC_HOLD_32 (4 << 18) +#define SM501_MISC_HOLD_MASK (7 << 18) -#define SM501_MISC_FREQ_12 (1<<24) -#define SM501_MISC_PNL_24BIT (1<<25) -#define SM501_MISC_8051_LE (1<<26) +#define SM501_MISC_FREQ_12 (1 << 24) +#define SM501_MISC_PNL_24BIT (1 << 25) +#define SM501_MISC_8051_LE (1 << 26) -#define SM501_GPIO31_0_CONTROL (0x000008) -#define SM501_GPIO63_32_CONTROL (0x00000C) -#define SM501_DRAM_CONTROL (0x000010) +#define SM501_GPIO31_0_CONTROL (0x000008) +#define SM501_GPIO63_32_CONTROL (0x00000C) +#define SM501_DRAM_CONTROL (0x000010) /* command list */ -#define SM501_ARBTRTN_CONTROL (0x000014) +#define SM501_ARBTRTN_CONTROL (0x000014) /* command list */ -#define SM501_COMMAND_LIST_STATUS (0x000024) +#define SM501_COMMAND_LIST_STATUS (0x000024) /* interrupt debug */ -#define SM501_RAW_IRQ_STATUS (0x000028) -#define SM501_RAW_IRQ_CLEAR (0x000028) -#define SM501_IRQ_STATUS (0x00002C) -#define SM501_IRQ_MASK (0x000030) -#define SM501_DEBUG_CONTROL (0x000034) +#define SM501_RAW_IRQ_STATUS (0x000028) +#define SM501_RAW_IRQ_CLEAR (0x000028) +#define SM501_IRQ_STATUS (0x00002C) +#define SM501_IRQ_MASK (0x000030) +#define SM501_DEBUG_CONTROL (0x000034) /* power management */ -#define SM501_POWERMODE_P2X_SRC (1<<29) -#define SM501_POWERMODE_V2X_SRC (1<<20) -#define SM501_POWERMODE_M_SRC (1<<12) -#define SM501_POWERMODE_M1_SRC (1<<4) - -#define SM501_CURRENT_GATE (0x000038) -#define SM501_CURRENT_CLOCK (0x00003C) -#define SM501_POWER_MODE_0_GATE (0x000040) -#define SM501_POWER_MODE_0_CLOCK (0x000044) -#define SM501_POWER_MODE_1_GATE (0x000048) -#define SM501_POWER_MODE_1_CLOCK (0x00004C) -#define SM501_SLEEP_MODE_GATE (0x000050) -#define SM501_POWER_MODE_CONTROL (0x000054) +#define SM501_POWERMODE_P2X_SRC (1 << 29) +#define SM501_POWERMODE_V2X_SRC (1 << 20) +#define SM501_POWERMODE_M_SRC (1 << 12) +#define SM501_POWERMODE_M1_SRC (1 << 4) + +#define SM501_CURRENT_GATE (0x000038) +#define SM501_CURRENT_CLOCK (0x00003C) +#define SM501_POWER_MODE_0_GATE (0x000040) +#define SM501_POWER_MODE_0_CLOCK (0x000044) +#define SM501_POWER_MODE_1_GATE (0x000048) +#define SM501_POWER_MODE_1_CLOCK (0x00004C) +#define SM501_SLEEP_MODE_GATE (0x000050) +#define SM501_POWER_MODE_CONTROL (0x000054) /* power gates for units within the 501 */ -#define SM501_GATE_HOST (0) -#define SM501_GATE_MEMORY (1) -#define SM501_GATE_DISPLAY (2) -#define SM501_GATE_2D_ENGINE (3) -#define SM501_GATE_CSC (4) -#define SM501_GATE_ZVPORT (5) -#define SM501_GATE_GPIO (6) -#define SM501_GATE_UART0 (7) -#define SM501_GATE_UART1 (8) -#define SM501_GATE_SSP (10) -#define SM501_GATE_USB_HOST (11) -#define SM501_GATE_USB_GADGET (12) -#define SM501_GATE_UCONTROLLER (17) -#define SM501_GATE_AC97 (18) +#define SM501_GATE_HOST (0) +#define SM501_GATE_MEMORY (1) +#define SM501_GATE_DISPLAY (2) +#define SM501_GATE_2D_ENGINE (3) +#define SM501_GATE_CSC (4) +#define SM501_GATE_ZVPORT (5) +#define SM501_GATE_GPIO (6) +#define SM501_GATE_UART0 (7) +#define SM501_GATE_UART1 (8) +#define SM501_GATE_SSP (10) +#define SM501_GATE_USB_HOST (11) +#define SM501_GATE_USB_GADGET (12) +#define SM501_GATE_UCONTROLLER (17) +#define SM501_GATE_AC97 (18) /* panel clock */ -#define SM501_CLOCK_P2XCLK (24) +#define SM501_CLOCK_P2XCLK (24) /* crt clock */ -#define SM501_CLOCK_V2XCLK (16) +#define SM501_CLOCK_V2XCLK (16) /* main clock */ -#define SM501_CLOCK_MCLK (8) +#define SM501_CLOCK_MCLK (8) /* SDRAM controller clock */ -#define SM501_CLOCK_M1XCLK (0) +#define SM501_CLOCK_M1XCLK (0) /* config 2 */ -#define SM501_PCI_MASTER_BASE (0x000058) -#define SM501_ENDIAN_CONTROL (0x00005C) -#define SM501_DEVICEID (0x000060) +#define SM501_PCI_MASTER_BASE (0x000058) +#define SM501_ENDIAN_CONTROL (0x00005C) +#define SM501_DEVICEID (0x000060) /* 0x050100A0 */ -#define SM501_DEVICEID_SM501 (0x05010000) -#define SM501_DEVICEID_IDMASK (0xffff0000) -#define SM501_DEVICEID_REVMASK (0x000000ff) +#define SM501_DEVICEID_SM501 (0x05010000) +#define SM501_DEVICEID_IDMASK (0xffff0000) +#define SM501_DEVICEID_REVMASK (0x000000ff) -#define SM501_PLLCLOCK_COUNT (0x000064) -#define SM501_MISC_TIMING (0x000068) -#define SM501_CURRENT_SDRAM_CLOCK (0x00006C) +#define SM501_PLLCLOCK_COUNT (0x000064) +#define SM501_MISC_TIMING (0x000068) +#define SM501_CURRENT_SDRAM_CLOCK (0x00006C) -#define SM501_PROGRAMMABLE_PLL_CONTROL (0x000074) +#define SM501_PROGRAMMABLE_PLL_CONTROL (0x000074) /* GPIO base */ -#define SM501_GPIO (0x010000) -#define SM501_GPIO_DATA_LOW (0x00) -#define SM501_GPIO_DATA_HIGH (0x04) -#define SM501_GPIO_DDR_LOW (0x08) -#define SM501_GPIO_DDR_HIGH (0x0C) -#define SM501_GPIO_IRQ_SETUP (0x10) -#define SM501_GPIO_IRQ_STATUS (0x14) -#define SM501_GPIO_IRQ_RESET (0x14) +#define SM501_GPIO (0x010000) +#define SM501_GPIO_DATA_LOW (0x00) +#define SM501_GPIO_DATA_HIGH (0x04) +#define SM501_GPIO_DDR_LOW (0x08) +#define SM501_GPIO_DDR_HIGH (0x0C) +#define SM501_GPIO_IRQ_SETUP (0x10) +#define SM501_GPIO_IRQ_STATUS (0x14) +#define SM501_GPIO_IRQ_RESET (0x14) /* I2C controller base */ -#define SM501_I2C (0x010040) -#define SM501_I2C_BYTE_COUNT (0x00) -#define SM501_I2C_CONTROL (0x01) -#define SM501_I2C_STATUS (0x02) -#define SM501_I2C_RESET (0x02) -#define SM501_I2C_SLAVE_ADDRESS (0x03) -#define SM501_I2C_DATA (0x04) +#define SM501_I2C (0x010040) +#define SM501_I2C_BYTE_COUNT (0x00) +#define SM501_I2C_CONTROL (0x01) +#define SM501_I2C_STATUS (0x02) +#define SM501_I2C_RESET (0x02) +#define SM501_I2C_SLAVE_ADDRESS (0x03) +#define SM501_I2C_DATA (0x04) /* SSP base */ -#define SM501_SSP (0x020000) +#define SM501_SSP (0x020000) /* Uart 0 base */ -#define SM501_UART0 (0x030000) +#define SM501_UART0 (0x030000) /* Uart 1 base */ -#define SM501_UART1 (0x030020) +#define SM501_UART1 (0x030020) /* USB host port base */ -#define SM501_USB_HOST (0x040000) +#define SM501_USB_HOST (0x040000) /* USB slave/gadget base */ -#define SM501_USB_GADGET (0x060000) +#define SM501_USB_GADGET (0x060000) /* USB slave/gadget data port base */ -#define SM501_USB_GADGET_DATA (0x070000) +#define SM501_USB_GADGET_DATA (0x070000) /* Display controller/video engine base */ -#define SM501_DC (0x080000) +#define SM501_DC (0x080000) /* common defines for the SM501 address registers */ -#define SM501_ADDR_FLIP (1<<31) -#define SM501_ADDR_EXT (1<<27) -#define SM501_ADDR_CS1 (1<<26) -#define SM501_ADDR_MASK (0x3f << 26) +#define SM501_ADDR_FLIP (1 << 31) +#define SM501_ADDR_EXT (1 << 27) +#define SM501_ADDR_CS1 (1 << 26) +#define SM501_ADDR_MASK (0x3f << 26) -#define SM501_FIFO_MASK (0x3 << 16) -#define SM501_FIFO_1 (0x0 << 16) -#define SM501_FIFO_3 (0x1 << 16) -#define SM501_FIFO_7 (0x2 << 16) -#define SM501_FIFO_11 (0x3 << 16) +#define SM501_FIFO_MASK (0x3 << 16) +#define SM501_FIFO_1 (0x0 << 16) +#define SM501_FIFO_3 (0x1 << 16) +#define SM501_FIFO_7 (0x2 << 16) +#define SM501_FIFO_11 (0x3 << 16) /* common registers for panel and the crt */ -#define SM501_OFF_DC_H_TOT (0x000) -#define SM501_OFF_DC_V_TOT (0x008) -#define SM501_OFF_DC_H_SYNC (0x004) -#define SM501_OFF_DC_V_SYNC (0x00C) - -#define SM501_DC_PANEL_CONTROL (0x000) - -#define SM501_DC_PANEL_CONTROL_FPEN (1<<27) -#define SM501_DC_PANEL_CONTROL_BIAS (1<<26) -#define SM501_DC_PANEL_CONTROL_DATA (1<<25) -#define SM501_DC_PANEL_CONTROL_VDD (1<<24) -#define SM501_DC_PANEL_CONTROL_DP (1<<23) - -#define SM501_DC_PANEL_CONTROL_TFT_888 (0<<21) -#define SM501_DC_PANEL_CONTROL_TFT_333 (1<<21) -#define SM501_DC_PANEL_CONTROL_TFT_444 (2<<21) - -#define SM501_DC_PANEL_CONTROL_DE (1<<20) - -#define SM501_DC_PANEL_CONTROL_LCD_TFT (0<<18) -#define SM501_DC_PANEL_CONTROL_LCD_STN8 (1<<18) -#define SM501_DC_PANEL_CONTROL_LCD_STN12 (2<<18) - -#define SM501_DC_PANEL_CONTROL_CP (1<<14) -#define SM501_DC_PANEL_CONTROL_VSP (1<<13) -#define SM501_DC_PANEL_CONTROL_HSP (1<<12) -#define SM501_DC_PANEL_CONTROL_CK (1<<9) -#define SM501_DC_PANEL_CONTROL_TE (1<<8) -#define SM501_DC_PANEL_CONTROL_VPD (1<<7) -#define SM501_DC_PANEL_CONTROL_VP (1<<6) -#define SM501_DC_PANEL_CONTROL_HPD (1<<5) -#define SM501_DC_PANEL_CONTROL_HP (1<<4) -#define SM501_DC_PANEL_CONTROL_GAMMA (1<<3) -#define SM501_DC_PANEL_CONTROL_EN (1<<2) - -#define SM501_DC_PANEL_CONTROL_8BPP (0<<0) -#define SM501_DC_PANEL_CONTROL_16BPP (1<<0) -#define SM501_DC_PANEL_CONTROL_32BPP (2<<0) - - -#define SM501_DC_PANEL_PANNING_CONTROL (0x004) -#define SM501_DC_PANEL_COLOR_KEY (0x008) -#define SM501_DC_PANEL_FB_ADDR (0x00C) -#define SM501_DC_PANEL_FB_OFFSET (0x010) -#define SM501_DC_PANEL_FB_WIDTH (0x014) -#define SM501_DC_PANEL_FB_HEIGHT (0x018) -#define SM501_DC_PANEL_TL_LOC (0x01C) -#define SM501_DC_PANEL_BR_LOC (0x020) -#define SM501_DC_PANEL_H_TOT (0x024) -#define SM501_DC_PANEL_H_SYNC (0x028) -#define SM501_DC_PANEL_V_TOT (0x02C) -#define SM501_DC_PANEL_V_SYNC (0x030) -#define SM501_DC_PANEL_CUR_LINE (0x034) - -#define SM501_DC_VIDEO_CONTROL (0x040) -#define SM501_DC_VIDEO_FB0_ADDR (0x044) -#define SM501_DC_VIDEO_FB_WIDTH (0x048) -#define SM501_DC_VIDEO_FB0_LAST_ADDR (0x04C) -#define SM501_DC_VIDEO_TL_LOC (0x050) -#define SM501_DC_VIDEO_BR_LOC (0x054) -#define SM501_DC_VIDEO_SCALE (0x058) -#define SM501_DC_VIDEO_INIT_SCALE (0x05C) -#define SM501_DC_VIDEO_YUV_CONSTANTS (0x060) -#define SM501_DC_VIDEO_FB1_ADDR (0x064) -#define SM501_DC_VIDEO_FB1_LAST_ADDR (0x068) - -#define SM501_DC_VIDEO_ALPHA_CONTROL (0x080) -#define SM501_DC_VIDEO_ALPHA_FB_ADDR (0x084) -#define SM501_DC_VIDEO_ALPHA_FB_OFFSET (0x088) -#define SM501_DC_VIDEO_ALPHA_FB_LAST_ADDR (0x08C) -#define SM501_DC_VIDEO_ALPHA_TL_LOC (0x090) -#define SM501_DC_VIDEO_ALPHA_BR_LOC (0x094) -#define SM501_DC_VIDEO_ALPHA_SCALE (0x098) -#define SM501_DC_VIDEO_ALPHA_INIT_SCALE (0x09C) -#define SM501_DC_VIDEO_ALPHA_CHROMA_KEY (0x0A0) -#define SM501_DC_VIDEO_ALPHA_COLOR_LOOKUP (0x0A4) - -#define SM501_DC_PANEL_HWC_BASE (0x0F0) -#define SM501_DC_PANEL_HWC_ADDR (0x0F0) -#define SM501_DC_PANEL_HWC_LOC (0x0F4) -#define SM501_DC_PANEL_HWC_COLOR_1_2 (0x0F8) -#define SM501_DC_PANEL_HWC_COLOR_3 (0x0FC) - -#define SM501_HWC_EN (1<<31) - -#define SM501_OFF_HWC_ADDR (0x00) -#define SM501_OFF_HWC_LOC (0x04) -#define SM501_OFF_HWC_COLOR_1_2 (0x08) -#define SM501_OFF_HWC_COLOR_3 (0x0C) - -#define SM501_DC_ALPHA_CONTROL (0x100) -#define SM501_DC_ALPHA_FB_ADDR (0x104) -#define SM501_DC_ALPHA_FB_OFFSET (0x108) -#define SM501_DC_ALPHA_TL_LOC (0x10C) -#define SM501_DC_ALPHA_BR_LOC (0x110) -#define SM501_DC_ALPHA_CHROMA_KEY (0x114) -#define SM501_DC_ALPHA_COLOR_LOOKUP (0x118) - -#define SM501_DC_CRT_CONTROL (0x200) - -#define SM501_DC_CRT_CONTROL_TVP (1<<15) -#define SM501_DC_CRT_CONTROL_CP (1<<14) -#define SM501_DC_CRT_CONTROL_VSP (1<<13) -#define SM501_DC_CRT_CONTROL_HSP (1<<12) -#define SM501_DC_CRT_CONTROL_VS (1<<11) -#define SM501_DC_CRT_CONTROL_BLANK (1<<10) -#define SM501_DC_CRT_CONTROL_SEL (1<<9) -#define SM501_DC_CRT_CONTROL_TE (1<<8) +#define SM501_OFF_DC_H_TOT (0x000) +#define SM501_OFF_DC_V_TOT (0x008) +#define SM501_OFF_DC_H_SYNC (0x004) +#define SM501_OFF_DC_V_SYNC (0x00C) + +#define SM501_DC_PANEL_CONTROL (0x000) + +#define SM501_DC_PANEL_CONTROL_FPEN (1 << 27) +#define SM501_DC_PANEL_CONTROL_BIAS (1 << 26) +#define SM501_DC_PANEL_CONTROL_DATA (1 << 25) +#define SM501_DC_PANEL_CONTROL_VDD (1 << 24) +#define SM501_DC_PANEL_CONTROL_DP (1 << 23) + +#define SM501_DC_PANEL_CONTROL_TFT_888 (0 << 21) +#define SM501_DC_PANEL_CONTROL_TFT_333 (1 << 21) +#define SM501_DC_PANEL_CONTROL_TFT_444 (2 << 21) + +#define SM501_DC_PANEL_CONTROL_DE (1 << 20) + +#define SM501_DC_PANEL_CONTROL_LCD_TFT (0 << 18) +#define SM501_DC_PANEL_CONTROL_LCD_STN8 (1 << 18) +#define SM501_DC_PANEL_CONTROL_LCD_STN12 (2 << 18) + +#define SM501_DC_PANEL_CONTROL_CP (1 << 14) +#define SM501_DC_PANEL_CONTROL_VSP (1 << 13) +#define SM501_DC_PANEL_CONTROL_HSP (1 << 12) +#define SM501_DC_PANEL_CONTROL_CK (1 << 9) +#define SM501_DC_PANEL_CONTROL_TE (1 << 8) +#define SM501_DC_PANEL_CONTROL_VPD (1 << 7) +#define SM501_DC_PANEL_CONTROL_VP (1 << 6) +#define SM501_DC_PANEL_CONTROL_HPD (1 << 5) +#define SM501_DC_PANEL_CONTROL_HP (1 << 4) +#define SM501_DC_PANEL_CONTROL_GAMMA (1 << 3) +#define SM501_DC_PANEL_CONTROL_EN (1 << 2) + +#define SM501_DC_PANEL_CONTROL_8BPP (0 << 0) +#define SM501_DC_PANEL_CONTROL_16BPP (1 << 0) +#define SM501_DC_PANEL_CONTROL_32BPP (2 << 0) + + +#define SM501_DC_PANEL_PANNING_CONTROL (0x004) +#define SM501_DC_PANEL_COLOR_KEY (0x008) +#define SM501_DC_PANEL_FB_ADDR (0x00C) +#define SM501_DC_PANEL_FB_OFFSET (0x010) +#define SM501_DC_PANEL_FB_WIDTH (0x014) +#define SM501_DC_PANEL_FB_HEIGHT (0x018) +#define SM501_DC_PANEL_TL_LOC (0x01C) +#define SM501_DC_PANEL_BR_LOC (0x020) +#define SM501_DC_PANEL_H_TOT (0x024) +#define SM501_DC_PANEL_H_SYNC (0x028) +#define SM501_DC_PANEL_V_TOT (0x02C) +#define SM501_DC_PANEL_V_SYNC (0x030) +#define SM501_DC_PANEL_CUR_LINE (0x034) + +#define SM501_DC_VIDEO_CONTROL (0x040) +#define SM501_DC_VIDEO_FB0_ADDR (0x044) +#define SM501_DC_VIDEO_FB_WIDTH (0x048) +#define SM501_DC_VIDEO_FB0_LAST_ADDR (0x04C) +#define SM501_DC_VIDEO_TL_LOC (0x050) +#define SM501_DC_VIDEO_BR_LOC (0x054) +#define SM501_DC_VIDEO_SCALE (0x058) +#define SM501_DC_VIDEO_INIT_SCALE (0x05C) +#define SM501_DC_VIDEO_YUV_CONSTANTS (0x060) +#define SM501_DC_VIDEO_FB1_ADDR (0x064) +#define SM501_DC_VIDEO_FB1_LAST_ADDR (0x068) + +#define SM501_DC_VIDEO_ALPHA_CONTROL (0x080) +#define SM501_DC_VIDEO_ALPHA_FB_ADDR (0x084) +#define SM501_DC_VIDEO_ALPHA_FB_OFFSET (0x088) +#define SM501_DC_VIDEO_ALPHA_FB_LAST_ADDR (0x08C) +#define SM501_DC_VIDEO_ALPHA_TL_LOC (0x090) +#define SM501_DC_VIDEO_ALPHA_BR_LOC (0x094) +#define SM501_DC_VIDEO_ALPHA_SCALE (0x098) +#define SM501_DC_VIDEO_ALPHA_INIT_SCALE (0x09C) +#define SM501_DC_VIDEO_ALPHA_CHROMA_KEY (0x0A0) +#define SM501_DC_VIDEO_ALPHA_COLOR_LOOKUP (0x0A4) + +#define SM501_DC_PANEL_HWC_BASE (0x0F0) +#define SM501_DC_PANEL_HWC_ADDR (0x0F0) +#define SM501_DC_PANEL_HWC_LOC (0x0F4) +#define SM501_DC_PANEL_HWC_COLOR_1_2 (0x0F8) +#define SM501_DC_PANEL_HWC_COLOR_3 (0x0FC) + +#define SM501_HWC_EN (1 << 31) + +#define SM501_OFF_HWC_ADDR (0x00) +#define SM501_OFF_HWC_LOC (0x04) +#define SM501_OFF_HWC_COLOR_1_2 (0x08) +#define SM501_OFF_HWC_COLOR_3 (0x0C) + +#define SM501_DC_ALPHA_CONTROL (0x100) +#define SM501_DC_ALPHA_FB_ADDR (0x104) +#define SM501_DC_ALPHA_FB_OFFSET (0x108) +#define SM501_DC_ALPHA_TL_LOC (0x10C) +#define SM501_DC_ALPHA_BR_LOC (0x110) +#define SM501_DC_ALPHA_CHROMA_KEY (0x114) +#define SM501_DC_ALPHA_COLOR_LOOKUP (0x118) + +#define SM501_DC_CRT_CONTROL (0x200) + +#define SM501_DC_CRT_CONTROL_TVP (1 << 15) +#define SM501_DC_CRT_CONTROL_CP (1 << 14) +#define SM501_DC_CRT_CONTROL_VSP (1 << 13) +#define SM501_DC_CRT_CONTROL_HSP (1 << 12) +#define SM501_DC_CRT_CONTROL_VS (1 << 11) +#define SM501_DC_CRT_CONTROL_BLANK (1 << 10) +#define SM501_DC_CRT_CONTROL_SEL (1 << 9) +#define SM501_DC_CRT_CONTROL_TE (1 << 8) #define SM501_DC_CRT_CONTROL_PIXEL_MASK (0xF << 4) -#define SM501_DC_CRT_CONTROL_GAMMA (1<<3) -#define SM501_DC_CRT_CONTROL_ENABLE (1<<2) +#define SM501_DC_CRT_CONTROL_GAMMA (1 << 3) +#define SM501_DC_CRT_CONTROL_ENABLE (1 << 2) -#define SM501_DC_CRT_CONTROL_8BPP (0<<0) -#define SM501_DC_CRT_CONTROL_16BPP (1<<0) -#define SM501_DC_CRT_CONTROL_32BPP (2<<0) +#define SM501_DC_CRT_CONTROL_8BPP (0 << 0) +#define SM501_DC_CRT_CONTROL_16BPP (1 << 0) +#define SM501_DC_CRT_CONTROL_32BPP (2 << 0) -#define SM501_DC_CRT_FB_ADDR (0x204) -#define SM501_DC_CRT_FB_OFFSET (0x208) -#define SM501_DC_CRT_H_TOT (0x20C) -#define SM501_DC_CRT_H_SYNC (0x210) -#define SM501_DC_CRT_V_TOT (0x214) -#define SM501_DC_CRT_V_SYNC (0x218) -#define SM501_DC_CRT_SIGNATURE_ANALYZER (0x21C) -#define SM501_DC_CRT_CUR_LINE (0x220) -#define SM501_DC_CRT_MONITOR_DETECT (0x224) +#define SM501_DC_CRT_FB_ADDR (0x204) +#define SM501_DC_CRT_FB_OFFSET (0x208) +#define SM501_DC_CRT_H_TOT (0x20C) +#define SM501_DC_CRT_H_SYNC (0x210) +#define SM501_DC_CRT_V_TOT (0x214) +#define SM501_DC_CRT_V_SYNC (0x218) +#define SM501_DC_CRT_SIGNATURE_ANALYZER (0x21C) +#define SM501_DC_CRT_CUR_LINE (0x220) +#define SM501_DC_CRT_MONITOR_DETECT (0x224) -#define SM501_DC_CRT_HWC_BASE (0x230) -#define SM501_DC_CRT_HWC_ADDR (0x230) -#define SM501_DC_CRT_HWC_LOC (0x234) -#define SM501_DC_CRT_HWC_COLOR_1_2 (0x238) -#define SM501_DC_CRT_HWC_COLOR_3 (0x23C) +#define SM501_DC_CRT_HWC_BASE (0x230) +#define SM501_DC_CRT_HWC_ADDR (0x230) +#define SM501_DC_CRT_HWC_LOC (0x234) +#define SM501_DC_CRT_HWC_COLOR_1_2 (0x238) +#define SM501_DC_CRT_HWC_COLOR_3 (0x23C) -#define SM501_DC_PANEL_PALETTE (0x400) +#define SM501_DC_PANEL_PALETTE (0x400) -#define SM501_DC_VIDEO_PALETTE (0x800) +#define SM501_DC_VIDEO_PALETTE (0x800) -#define SM501_DC_CRT_PALETTE (0xC00) +#define SM501_DC_CRT_PALETTE (0xC00) /* Zoom Video port base */ -#define SM501_ZVPORT (0x090000) +#define SM501_ZVPORT (0x090000) /* AC97/I2S base */ -#define SM501_AC97 (0x0A0000) +#define SM501_AC97 (0x0A0000) /* 8051 micro controller base */ -#define SM501_UCONTROLLER (0x0B0000) +#define SM501_UCONTROLLER (0x0B0000) /* 8051 micro controller SRAM base */ -#define SM501_UCONTROLLER_SRAM (0x0C0000) +#define SM501_UCONTROLLER_SRAM (0x0C0000) /* DMA base */ -#define SM501_DMA (0x0D0000) +#define SM501_DMA (0x0D0000) /* 2d engine base */ -#define SM501_2D_ENGINE (0x100000) -#define SM501_2D_SOURCE (0x00) -#define SM501_2D_DESTINATION (0x04) -#define SM501_2D_DIMENSION (0x08) -#define SM501_2D_CONTROL (0x0C) -#define SM501_2D_PITCH (0x10) -#define SM501_2D_FOREGROUND (0x14) -#define SM501_2D_BACKGROUND (0x18) -#define SM501_2D_STRETCH (0x1C) -#define SM501_2D_COLOR_COMPARE (0x20) -#define SM501_2D_COLOR_COMPARE_MASK (0x24) -#define SM501_2D_MASK (0x28) -#define SM501_2D_CLIP_TL (0x2C) -#define SM501_2D_CLIP_BR (0x30) -#define SM501_2D_MONO_PATTERN_LOW (0x34) -#define SM501_2D_MONO_PATTERN_HIGH (0x38) -#define SM501_2D_WINDOW_WIDTH (0x3C) -#define SM501_2D_SOURCE_BASE (0x40) -#define SM501_2D_DESTINATION_BASE (0x44) -#define SM501_2D_ALPHA (0x48) -#define SM501_2D_WRAP (0x4C) -#define SM501_2D_STATUS (0x50) - -#define SM501_CSC_Y_SOURCE_BASE (0xC8) -#define SM501_CSC_CONSTANTS (0xCC) -#define SM501_CSC_Y_SOURCE_X (0xD0) -#define SM501_CSC_Y_SOURCE_Y (0xD4) -#define SM501_CSC_U_SOURCE_BASE (0xD8) -#define SM501_CSC_V_SOURCE_BASE (0xDC) -#define SM501_CSC_SOURCE_DIMENSION (0xE0) -#define SM501_CSC_SOURCE_PITCH (0xE4) -#define SM501_CSC_DESTINATION (0xE8) -#define SM501_CSC_DESTINATION_DIMENSION (0xEC) -#define SM501_CSC_DESTINATION_PITCH (0xF0) -#define SM501_CSC_SCALE_FACTOR (0xF4) -#define SM501_CSC_DESTINATION_BASE (0xF8) -#define SM501_CSC_CONTROL (0xFC) +#define SM501_2D_ENGINE (0x100000) +#define SM501_2D_SOURCE (0x00) +#define SM501_2D_DESTINATION (0x04) +#define SM501_2D_DIMENSION (0x08) +#define SM501_2D_CONTROL (0x0C) +#define SM501_2D_PITCH (0x10) +#define SM501_2D_FOREGROUND (0x14) +#define SM501_2D_BACKGROUND (0x18) +#define SM501_2D_STRETCH (0x1C) +#define SM501_2D_COLOR_COMPARE (0x20) +#define SM501_2D_COLOR_COMPARE_MASK (0x24) +#define SM501_2D_MASK (0x28) +#define SM501_2D_CLIP_TL (0x2C) +#define SM501_2D_CLIP_BR (0x30) +#define SM501_2D_MONO_PATTERN_LOW (0x34) +#define SM501_2D_MONO_PATTERN_HIGH (0x38) +#define SM501_2D_WINDOW_WIDTH (0x3C) +#define SM501_2D_SOURCE_BASE (0x40) +#define SM501_2D_DESTINATION_BASE (0x44) +#define SM501_2D_ALPHA (0x48) +#define SM501_2D_WRAP (0x4C) +#define SM501_2D_STATUS (0x50) + +#define SM501_CSC_Y_SOURCE_BASE (0xC8) +#define SM501_CSC_CONSTANTS (0xCC) +#define SM501_CSC_Y_SOURCE_X (0xD0) +#define SM501_CSC_Y_SOURCE_Y (0xD4) +#define SM501_CSC_U_SOURCE_BASE (0xD8) +#define SM501_CSC_V_SOURCE_BASE (0xDC) +#define SM501_CSC_SOURCE_DIMENSION (0xE0) +#define SM501_CSC_SOURCE_PITCH (0xE4) +#define SM501_CSC_DESTINATION (0xE8) +#define SM501_CSC_DESTINATION_DIMENSION (0xEC) +#define SM501_CSC_DESTINATION_PITCH (0xF0) +#define SM501_CSC_SCALE_FACTOR (0xF4) +#define SM501_CSC_DESTINATION_BASE (0xF8) +#define SM501_CSC_CONTROL (0xFC) /* 2d engine data port base */ -#define SM501_2D_ENGINE_DATA (0x110000) +#define SM501_2D_ENGINE_DATA (0x110000) /* end of register definitions */ @@ -446,12 +453,12 @@ /* SM501 local memory size taken from "linux/drivers/mfd/sm501.c" */ static const uint32_t sm501_mem_local_size[] = { - [0] = 4*1024*1024, - [1] = 8*1024*1024, - [2] = 16*1024*1024, - [3] = 32*1024*1024, - [4] = 64*1024*1024, - [5] = 2*1024*1024, + [0] = 4 * M_BYTE, + [1] = 8 * M_BYTE, + [2] = 16 * M_BYTE, + [3] = 32 * M_BYTE, + [4] = 64 * M_BYTE, + [5] = 2 * M_BYTE, }; #define get_local_mem_size(s) sm501_mem_local_size[(s)->local_mem_size_index] @@ -460,10 +467,13 @@ typedef struct SM501State { QemuConsole *con; /* status & internal resources */ - hwaddr base; uint32_t local_mem_size_index; - uint8_t * local_mem; + uint8_t *local_mem; MemoryRegion local_mem_region; + MemoryRegion mmio_region; + MemoryRegion system_config_region; + MemoryRegion disp_ctrl_region; + MemoryRegion twoD_engine_region; uint32_t last_width; uint32_t last_height; @@ -473,6 +483,7 @@ typedef struct SM501State { uint32_t gpio_31_0_control; uint32_t gpio_63_32_control; uint32_t dram_control; + uint32_t arbitration_control; uint32_t irq_mask; uint32_t misc_timing; uint32_t power_mode_control; @@ -482,7 +493,7 @@ typedef struct SM501State { uint32_t uart0_mcr; uint32_t uart0_scr; - uint8_t dc_palette[0x400 * 3]; + uint8_t dc_palette[DC_PALETTE_ENTRIES]; uint32_t dc_panel_control; uint32_t dc_panel_panning_control; @@ -502,6 +513,8 @@ typedef struct SM501State { uint32_t dc_panel_hwc_color_1_2; uint32_t dc_panel_hwc_color_3; + uint32_t dc_video_control; + uint32_t dc_crt_control; uint32_t dc_crt_fb_addr; uint32_t dc_crt_fb_offset; @@ -521,13 +534,20 @@ typedef struct SM501State { uint32_t twoD_control; uint32_t twoD_pitch; uint32_t twoD_foreground; + uint32_t twoD_background; uint32_t twoD_stretch; + uint32_t twoD_color_compare; uint32_t twoD_color_compare_mask; uint32_t twoD_mask; + uint32_t twoD_clip_tl; + uint32_t twoD_clip_br; + uint32_t twoD_mono_pattern_low; + uint32_t twoD_mono_pattern_high; uint32_t twoD_window_width; uint32_t twoD_source_base; uint32_t twoD_destination_base; - + uint32_t twoD_alpha; + uint32_t twoD_wrap; } SM501State; static uint32_t get_local_mem_size_index(uint32_t size) @@ -536,18 +556,36 @@ static uint32_t get_local_mem_size_index(uint32_t size) int i, index = 0; for (i = 0; i < ARRAY_SIZE(sm501_mem_local_size); i++) { - uint32_t new_size = sm501_mem_local_size[i]; - if (new_size >= size) { - if (norm_size == 0 || norm_size > new_size) { - norm_size = new_size; - index = i; - } - } + uint32_t new_size = sm501_mem_local_size[i]; + if (new_size >= size) { + if (norm_size == 0 || norm_size > new_size) { + norm_size = new_size; + index = i; + } + } } return index; } +static inline int get_width(SM501State *s, int crt) +{ + int width = crt ? s->dc_crt_h_total : s->dc_panel_h_total; + return (width & 0x00000FFF) + 1; +} + +static inline int get_height(SM501State *s, int crt) +{ + int height = crt ? s->dc_crt_v_total : s->dc_panel_v_total; + return (height & 0x00000FFF) + 1; +} + +static inline int get_bpp(SM501State *s, int crt) +{ + int bpp = crt ? s->dc_crt_control : s->dc_panel_control; + return 1 << (bpp & 3); +} + /** * Check the availability of hardware cursor. * @param crt 0 for PANEL, 1 for CRT. @@ -555,17 +593,17 @@ static uint32_t get_local_mem_size_index(uint32_t size) static inline int is_hwc_enabled(SM501State *state, int crt) { uint32_t addr = crt ? state->dc_crt_hwc_addr : state->dc_panel_hwc_addr; - return addr & 0x80000000; + return addr & SM501_HWC_EN; } /** * Get the address which holds cursor pattern data. * @param crt 0 for PANEL, 1 for CRT. */ -static inline uint32_t get_hwc_address(SM501State *state, int crt) +static inline uint8_t *get_hwc_address(SM501State *state, int crt) { uint32_t addr = crt ? state->dc_crt_hwc_addr : state->dc_panel_hwc_addr; - return (addr & 0x03FFFFF0)/* >> 4*/; + return state->local_mem + (addr & 0x03FFFFF0); } /** @@ -591,53 +629,51 @@ static inline uint32_t get_hwc_x(SM501State *state, int crt) } /** - * Get the cursor position in x coordinate. + * Get the hardware cursor palette. * @param crt 0 for PANEL, 1 for CRT. - * @param index 0, 1, 2 or 3 which specifies color of corsor dot. + * @param palette pointer to a [3 * 3] array to store color values in */ -static inline uint16_t get_hwc_color(SM501State *state, int crt, int index) +static inline void get_hwc_palette(SM501State *state, int crt, uint8_t *palette) { - uint32_t color_reg = 0; - uint16_t color_565 = 0; - - if (index == 0) { - return 0; - } - - switch (index) { - case 1: - case 2: - color_reg = crt ? state->dc_crt_hwc_color_1_2 - : state->dc_panel_hwc_color_1_2; - break; - case 3: - color_reg = crt ? state->dc_crt_hwc_color_3 - : state->dc_panel_hwc_color_3; - break; - default: - printf("invalid hw cursor color.\n"); - abort(); - } + int i; + uint32_t color_reg; + uint16_t rgb565; + + for (i = 0; i < 3; i++) { + if (i + 1 == 3) { + color_reg = crt ? state->dc_crt_hwc_color_3 + : state->dc_panel_hwc_color_3; + } else { + color_reg = crt ? state->dc_crt_hwc_color_1_2 + : state->dc_panel_hwc_color_1_2; + } - switch (index) { - case 1: - case 3: - color_565 = (uint16_t)(color_reg & 0xFFFF); - break; - case 2: - color_565 = (uint16_t)((color_reg >> 16) & 0xFFFF); - break; + if (i + 1 == 2) { + rgb565 = (color_reg >> 16) & 0xFFFF; + } else { + rgb565 = color_reg & 0xFFFF; + } + palette[i * 3 + 0] = (rgb565 << 3) & 0xf8; /* red */ + palette[i * 3 + 1] = (rgb565 >> 3) & 0xfc; /* green */ + palette[i * 3 + 2] = (rgb565 >> 8) & 0xf8; /* blue */ } - return color_565; } -static int within_hwc_y_range(SM501State *state, int y, int crt) +static inline void hwc_invalidate(SM501State *s, int crt) { - int hwc_y = get_hwc_y(state, crt); - return (hwc_y <= y && y < hwc_y + SM501_HWC_HEIGHT); + int w = get_width(s, crt); + int h = get_height(s, crt); + int bpp = get_bpp(s, crt); + int start = get_hwc_y(s, crt); + int end = MIN(h, start + SM501_HWC_HEIGHT) + 1; + + start *= w * bpp; + end *= w * bpp; + + memory_region_set_dirty(&s->local_mem_region, start, end - start); } -static void sm501_2d_operation(SM501State * s) +static void sm501_2d_operation(SM501State *s) { /* obtain operation parameters */ int operation = (s->twoD_control >> 16) & 0x1f; @@ -653,8 +689,8 @@ static void sm501_2d_operation(SM501State * s) int addressing = (s->twoD_stretch >> 16) & 0xF; /* get frame buffer info */ - uint8_t * src = s->local_mem + (s->twoD_source_base & 0x03FFFFFF); - uint8_t * dst = s->local_mem + (s->twoD_destination_base & 0x03FFFFFF); + uint8_t *src = s->local_mem + (s->twoD_source_base & 0x03FFFFFF); + uint8_t *dst = s->local_mem + (s->twoD_destination_base & 0x03FFFFFF); int src_width = (s->dc_crt_h_total & 0x00000FFF) + 1; int dst_width = (s->dc_crt_h_total & 0x00000FFF) + 1; @@ -671,20 +707,20 @@ static void sm501_2d_operation(SM501State * s) switch (operation) { case 0x00: /* copy area */ -#define COPY_AREA(_bpp, _pixel_type, rtl) { \ - int y, x, index_d, index_s; \ - for (y = 0; y < operation_height; y++) { \ - for (x = 0; x < operation_width; x++) { \ - if (rtl) { \ - index_s = ((src_y - y) * src_width + src_x - x) * _bpp; \ - index_d = ((dst_y - y) * dst_width + dst_x - x) * _bpp; \ - } else { \ - index_s = ((src_y + y) * src_width + src_x + x) * _bpp; \ - index_d = ((dst_y + y) * dst_width + dst_x + x) * _bpp; \ - } \ - *(_pixel_type*)&dst[index_d] = *(_pixel_type*)&src[index_s];\ - } \ - } \ +#define COPY_AREA(_bpp, _pixel_type, rtl) { \ + int y, x, index_d, index_s; \ + for (y = 0; y < operation_height; y++) { \ + for (x = 0; x < operation_width; x++) { \ + if (rtl) { \ + index_s = ((src_y - y) * src_width + src_x - x) * _bpp; \ + index_d = ((dst_y - y) * dst_width + dst_x - x) * _bpp; \ + } else { \ + index_s = ((src_y + y) * src_width + src_x + x) * _bpp; \ + index_d = ((dst_y + y) * dst_width + dst_x + x) * _bpp; \ + } \ + *(_pixel_type *)&dst[index_d] = *(_pixel_type *)&src[index_s];\ + } \ + } \ } switch (format_flags) { case 0: @@ -705,7 +741,7 @@ static void sm501_2d_operation(SM501State * s) for (y = 0; y < operation_height; y++) { \ for (x = 0; x < operation_width; x++) { \ int index = ((dst_y + y) * dst_width + dst_x + x) * _bpp; \ - *(_pixel_type*)&dst[index] = (_pixel_type)color; \ + *(_pixel_type *)&dst[index] = (_pixel_type)color; \ } \ } \ } @@ -733,50 +769,53 @@ static void sm501_2d_operation(SM501State * s) static uint64_t sm501_system_config_read(void *opaque, hwaddr addr, unsigned size) { - SM501State * s = (SM501State *)opaque; + SM501State *s = (SM501State *)opaque; uint32_t ret = 0; SM501_DPRINTF("sm501 system config regs : read addr=%x\n", (int)addr); - switch(addr) { + switch (addr) { case SM501_SYSTEM_CONTROL: - ret = s->system_control; - break; + ret = s->system_control; + break; case SM501_MISC_CONTROL: - ret = s->misc_control; - break; + ret = s->misc_control; + break; case SM501_GPIO31_0_CONTROL: - ret = s->gpio_31_0_control; - break; + ret = s->gpio_31_0_control; + break; case SM501_GPIO63_32_CONTROL: - ret = s->gpio_63_32_control; - break; + ret = s->gpio_63_32_control; + break; case SM501_DEVICEID: - ret = 0x050100A0; - break; + ret = 0x050100A0; + break; case SM501_DRAM_CONTROL: - ret = (s->dram_control & 0x07F107C0) | s->local_mem_size_index << 13; - break; + ret = (s->dram_control & 0x07F107C0) | s->local_mem_size_index << 13; + break; + case SM501_ARBTRTN_CONTROL: + ret = s->arbitration_control; + break; case SM501_IRQ_MASK: - ret = s->irq_mask; - break; + ret = s->irq_mask; + break; case SM501_MISC_TIMING: - /* TODO : simulate gate control */ - ret = s->misc_timing; - break; + /* TODO : simulate gate control */ + ret = s->misc_timing; + break; case SM501_CURRENT_GATE: - /* TODO : simulate gate control */ - ret = 0x00021807; - break; + /* TODO : simulate gate control */ + ret = 0x00021807; + break; case SM501_CURRENT_CLOCK: - ret = 0x2A1A0A09; - break; + ret = 0x2A1A0A09; + break; case SM501_POWER_MODE_CONTROL: - ret = s->power_mode_control; - break; + ret = s->power_mode_control; + break; default: - printf("sm501 system config : not implemented register read." - " addr=%x\n", (int)addr); + printf("sm501 system config : not implemented register read." + " addr=%x\n", (int)addr); abort(); } @@ -786,47 +825,50 @@ static uint64_t sm501_system_config_read(void *opaque, hwaddr addr, static void sm501_system_config_write(void *opaque, hwaddr addr, uint64_t value, unsigned size) { - SM501State * s = (SM501State *)opaque; + SM501State *s = (SM501State *)opaque; SM501_DPRINTF("sm501 system config regs : write addr=%x, val=%x\n", - (uint32_t)addr, (uint32_t)value); + (uint32_t)addr, (uint32_t)value); - switch(addr) { + switch (addr) { case SM501_SYSTEM_CONTROL: - s->system_control = value & 0xE300B8F7; - break; + s->system_control = value & 0xE300B8F7; + break; case SM501_MISC_CONTROL: - s->misc_control = value & 0xFF7FFF20; - break; + s->misc_control = value & 0xFF7FFF20; + break; case SM501_GPIO31_0_CONTROL: - s->gpio_31_0_control = value; - break; + s->gpio_31_0_control = value; + break; case SM501_GPIO63_32_CONTROL: - s->gpio_63_32_control = value; - break; + s->gpio_63_32_control = value; + break; case SM501_DRAM_CONTROL: - s->local_mem_size_index = (value >> 13) & 0x7; - /* rODO : check validity of size change */ - s->dram_control |= value & 0x7FFFFFC3; - break; + s->local_mem_size_index = (value >> 13) & 0x7; + /* TODO : check validity of size change */ + s->dram_control |= value & 0x7FFFFFC3; + break; + case SM501_ARBTRTN_CONTROL: + s->arbitration_control = value & 0x37777777; + break; case SM501_IRQ_MASK: - s->irq_mask = value; - break; + s->irq_mask = value; + break; case SM501_MISC_TIMING: - s->misc_timing = value & 0xF31F1FFF; - break; + s->misc_timing = value & 0xF31F1FFF; + break; case SM501_POWER_MODE_0_GATE: case SM501_POWER_MODE_1_GATE: case SM501_POWER_MODE_0_CLOCK: case SM501_POWER_MODE_1_CLOCK: - /* TODO : simulate gate & clock control */ - break; + /* TODO : simulate gate & clock control */ + break; case SM501_POWER_MODE_CONTROL: - s->power_mode_control = value & 0x00000003; - break; + s->power_mode_control = value & 0x00000003; + break; default: - printf("sm501 system config : not implemented register write." - " addr=%x, val=%x\n", (int)addr, (uint32_t)value); + printf("sm501 system config : not implemented register write." + " addr=%x, val=%x\n", (int)addr, (uint32_t)value); abort(); } } @@ -838,124 +880,128 @@ static const MemoryRegionOps sm501_system_config_ops = { .min_access_size = 4, .max_access_size = 4, }, - .endianness = DEVICE_NATIVE_ENDIAN, + .endianness = DEVICE_LITTLE_ENDIAN, }; static uint32_t sm501_palette_read(void *opaque, hwaddr addr) { - SM501State * s = (SM501State *)opaque; + SM501State *s = (SM501State *)opaque; SM501_DPRINTF("sm501 palette read addr=%x\n", (int)addr); /* TODO : consider BYTE/WORD access */ /* TODO : consider endian */ assert(range_covers_byte(0, 0x400 * 3, addr)); - return *(uint32_t*)&s->dc_palette[addr]; + return *(uint32_t *)&s->dc_palette[addr]; } -static void sm501_palette_write(void *opaque, - hwaddr addr, uint32_t value) +static void sm501_palette_write(void *opaque, hwaddr addr, + uint32_t value) { - SM501State * s = (SM501State *)opaque; + SM501State *s = (SM501State *)opaque; SM501_DPRINTF("sm501 palette write addr=%x, val=%x\n", - (int)addr, value); + (int)addr, value); /* TODO : consider BYTE/WORD access */ /* TODO : consider endian */ assert(range_covers_byte(0, 0x400 * 3, addr)); - *(uint32_t*)&s->dc_palette[addr] = value; + *(uint32_t *)&s->dc_palette[addr] = value; } static uint64_t sm501_disp_ctrl_read(void *opaque, hwaddr addr, unsigned size) { - SM501State * s = (SM501State *)opaque; + SM501State *s = (SM501State *)opaque; uint32_t ret = 0; SM501_DPRINTF("sm501 disp ctrl regs : read addr=%x\n", (int)addr); - switch(addr) { + switch (addr) { case SM501_DC_PANEL_CONTROL: - ret = s->dc_panel_control; - break; + ret = s->dc_panel_control; + break; case SM501_DC_PANEL_PANNING_CONTROL: - ret = s->dc_panel_panning_control; - break; + ret = s->dc_panel_panning_control; + break; case SM501_DC_PANEL_FB_ADDR: - ret = s->dc_panel_fb_addr; - break; + ret = s->dc_panel_fb_addr; + break; case SM501_DC_PANEL_FB_OFFSET: - ret = s->dc_panel_fb_offset; - break; + ret = s->dc_panel_fb_offset; + break; case SM501_DC_PANEL_FB_WIDTH: - ret = s->dc_panel_fb_width; - break; + ret = s->dc_panel_fb_width; + break; case SM501_DC_PANEL_FB_HEIGHT: - ret = s->dc_panel_fb_height; - break; + ret = s->dc_panel_fb_height; + break; case SM501_DC_PANEL_TL_LOC: - ret = s->dc_panel_tl_location; - break; + ret = s->dc_panel_tl_location; + break; case SM501_DC_PANEL_BR_LOC: - ret = s->dc_panel_br_location; - break; + ret = s->dc_panel_br_location; + break; case SM501_DC_PANEL_H_TOT: - ret = s->dc_panel_h_total; - break; + ret = s->dc_panel_h_total; + break; case SM501_DC_PANEL_H_SYNC: - ret = s->dc_panel_h_sync; - break; + ret = s->dc_panel_h_sync; + break; case SM501_DC_PANEL_V_TOT: - ret = s->dc_panel_v_total; - break; + ret = s->dc_panel_v_total; + break; case SM501_DC_PANEL_V_SYNC: - ret = s->dc_panel_v_sync; - break; + ret = s->dc_panel_v_sync; + break; + + case SM501_DC_VIDEO_CONTROL: + ret = s->dc_video_control; + break; case SM501_DC_CRT_CONTROL: - ret = s->dc_crt_control; - break; + ret = s->dc_crt_control; + break; case SM501_DC_CRT_FB_ADDR: - ret = s->dc_crt_fb_addr; - break; + ret = s->dc_crt_fb_addr; + break; case SM501_DC_CRT_FB_OFFSET: - ret = s->dc_crt_fb_offset; - break; + ret = s->dc_crt_fb_offset; + break; case SM501_DC_CRT_H_TOT: - ret = s->dc_crt_h_total; - break; + ret = s->dc_crt_h_total; + break; case SM501_DC_CRT_H_SYNC: - ret = s->dc_crt_h_sync; - break; + ret = s->dc_crt_h_sync; + break; case SM501_DC_CRT_V_TOT: - ret = s->dc_crt_v_total; - break; + ret = s->dc_crt_v_total; + break; case SM501_DC_CRT_V_SYNC: - ret = s->dc_crt_v_sync; - break; + ret = s->dc_crt_v_sync; + break; case SM501_DC_CRT_HWC_ADDR: - ret = s->dc_crt_hwc_addr; - break; + ret = s->dc_crt_hwc_addr; + break; case SM501_DC_CRT_HWC_LOC: - ret = s->dc_crt_hwc_location; - break; + ret = s->dc_crt_hwc_location; + break; case SM501_DC_CRT_HWC_COLOR_1_2: - ret = s->dc_crt_hwc_color_1_2; - break; + ret = s->dc_crt_hwc_color_1_2; + break; case SM501_DC_CRT_HWC_COLOR_3: - ret = s->dc_crt_hwc_color_3; - break; + ret = s->dc_crt_hwc_color_3; + break; - case SM501_DC_PANEL_PALETTE ... SM501_DC_PANEL_PALETTE + 0x400*3 - 4: + case SM501_DC_PANEL_PALETTE ... SM501_DC_PANEL_PALETTE + 0x400 * 3 - 4: ret = sm501_palette_read(opaque, addr - SM501_DC_PANEL_PALETTE); break; default: - printf("sm501 disp ctrl : not implemented register read." - " addr=%x\n", (int)addr); + printf("sm501 disp ctrl : not implemented register read." + " addr=%x\n", (int)addr); abort(); } @@ -965,104 +1011,124 @@ static uint64_t sm501_disp_ctrl_read(void *opaque, hwaddr addr, static void sm501_disp_ctrl_write(void *opaque, hwaddr addr, uint64_t value, unsigned size) { - SM501State * s = (SM501State *)opaque; + SM501State *s = (SM501State *)opaque; SM501_DPRINTF("sm501 disp ctrl regs : write addr=%x, val=%x\n", - (unsigned)addr, (unsigned)value); + (unsigned)addr, (unsigned)value); - switch(addr) { + switch (addr) { case SM501_DC_PANEL_CONTROL: - s->dc_panel_control = value & 0x0FFF73FF; - break; + s->dc_panel_control = value & 0x0FFF73FF; + break; case SM501_DC_PANEL_PANNING_CONTROL: - s->dc_panel_panning_control = value & 0xFF3FFF3F; - break; + s->dc_panel_panning_control = value & 0xFF3FFF3F; + break; case SM501_DC_PANEL_FB_ADDR: - s->dc_panel_fb_addr = value & 0x8FFFFFF0; - break; + s->dc_panel_fb_addr = value & 0x8FFFFFF0; + break; case SM501_DC_PANEL_FB_OFFSET: - s->dc_panel_fb_offset = value & 0x3FF03FF0; - break; + s->dc_panel_fb_offset = value & 0x3FF03FF0; + break; case SM501_DC_PANEL_FB_WIDTH: - s->dc_panel_fb_width = value & 0x0FFF0FFF; - break; + s->dc_panel_fb_width = value & 0x0FFF0FFF; + break; case SM501_DC_PANEL_FB_HEIGHT: - s->dc_panel_fb_height = value & 0x0FFF0FFF; - break; + s->dc_panel_fb_height = value & 0x0FFF0FFF; + break; case SM501_DC_PANEL_TL_LOC: - s->dc_panel_tl_location = value & 0x07FF07FF; - break; + s->dc_panel_tl_location = value & 0x07FF07FF; + break; case SM501_DC_PANEL_BR_LOC: - s->dc_panel_br_location = value & 0x07FF07FF; - break; + s->dc_panel_br_location = value & 0x07FF07FF; + break; case SM501_DC_PANEL_H_TOT: - s->dc_panel_h_total = value & 0x0FFF0FFF; - break; + s->dc_panel_h_total = value & 0x0FFF0FFF; + break; case SM501_DC_PANEL_H_SYNC: - s->dc_panel_h_sync = value & 0x00FF0FFF; - break; + s->dc_panel_h_sync = value & 0x00FF0FFF; + break; case SM501_DC_PANEL_V_TOT: - s->dc_panel_v_total = value & 0x0FFF0FFF; - break; + s->dc_panel_v_total = value & 0x0FFF0FFF; + break; case SM501_DC_PANEL_V_SYNC: - s->dc_panel_v_sync = value & 0x003F0FFF; - break; + s->dc_panel_v_sync = value & 0x003F0FFF; + break; case SM501_DC_PANEL_HWC_ADDR: - s->dc_panel_hwc_addr = value & 0x8FFFFFF0; - break; + value &= 0x8FFFFFF0; + if (value != s->dc_panel_hwc_addr) { + hwc_invalidate(s, 0); + s->dc_panel_hwc_addr = value; + } + break; case SM501_DC_PANEL_HWC_LOC: - s->dc_panel_hwc_location = value & 0x0FFF0FFF; - break; + value &= 0x0FFF0FFF; + if (value != s->dc_panel_hwc_location) { + hwc_invalidate(s, 0); + s->dc_panel_hwc_location = value; + } + break; case SM501_DC_PANEL_HWC_COLOR_1_2: - s->dc_panel_hwc_color_1_2 = value; - break; + s->dc_panel_hwc_color_1_2 = value; + break; case SM501_DC_PANEL_HWC_COLOR_3: - s->dc_panel_hwc_color_3 = value & 0x0000FFFF; - break; + s->dc_panel_hwc_color_3 = value & 0x0000FFFF; + break; + + case SM501_DC_VIDEO_CONTROL: + s->dc_video_control = value & 0x00037FFF; + break; case SM501_DC_CRT_CONTROL: - s->dc_crt_control = value & 0x0003FFFF; - break; + s->dc_crt_control = value & 0x0003FFFF; + break; case SM501_DC_CRT_FB_ADDR: - s->dc_crt_fb_addr = value & 0x8FFFFFF0; - break; + s->dc_crt_fb_addr = value & 0x8FFFFFF0; + break; case SM501_DC_CRT_FB_OFFSET: - s->dc_crt_fb_offset = value & 0x3FF03FF0; - break; + s->dc_crt_fb_offset = value & 0x3FF03FF0; + break; case SM501_DC_CRT_H_TOT: - s->dc_crt_h_total = value & 0x0FFF0FFF; - break; + s->dc_crt_h_total = value & 0x0FFF0FFF; + break; case SM501_DC_CRT_H_SYNC: - s->dc_crt_h_sync = value & 0x00FF0FFF; - break; + s->dc_crt_h_sync = value & 0x00FF0FFF; + break; case SM501_DC_CRT_V_TOT: - s->dc_crt_v_total = value & 0x0FFF0FFF; - break; + s->dc_crt_v_total = value & 0x0FFF0FFF; + break; case SM501_DC_CRT_V_SYNC: - s->dc_crt_v_sync = value & 0x003F0FFF; - break; + s->dc_crt_v_sync = value & 0x003F0FFF; + break; case SM501_DC_CRT_HWC_ADDR: - s->dc_crt_hwc_addr = value & 0x8FFFFFF0; - break; + value &= 0x8FFFFFF0; + if (value != s->dc_crt_hwc_addr) { + hwc_invalidate(s, 1); + s->dc_crt_hwc_addr = value; + } + break; case SM501_DC_CRT_HWC_LOC: - s->dc_crt_hwc_location = value & 0x0FFF0FFF; - break; + value &= 0x0FFF0FFF; + if (value != s->dc_crt_hwc_location) { + hwc_invalidate(s, 1); + s->dc_crt_hwc_location = value; + } + break; case SM501_DC_CRT_HWC_COLOR_1_2: - s->dc_crt_hwc_color_1_2 = value; - break; + s->dc_crt_hwc_color_1_2 = value; + break; case SM501_DC_CRT_HWC_COLOR_3: - s->dc_crt_hwc_color_3 = value & 0x0000FFFF; - break; + s->dc_crt_hwc_color_3 = value & 0x0000FFFF; + break; - case SM501_DC_PANEL_PALETTE ... SM501_DC_PANEL_PALETTE + 0x400*3 - 4: + case SM501_DC_PANEL_PALETTE ... SM501_DC_PANEL_PALETTE + 0x400 * 3 - 4: sm501_palette_write(opaque, addr - SM501_DC_PANEL_PALETTE, value); break; default: - printf("sm501 disp ctrl : not implemented register write." - " addr=%x, val=%x\n", (int)addr, (unsigned)value); + printf("sm501 disp ctrl : not implemented register write." + " addr=%x, val=%x\n", (int)addr, (unsigned)value); abort(); } } @@ -1074,20 +1140,80 @@ static const MemoryRegionOps sm501_disp_ctrl_ops = { .min_access_size = 4, .max_access_size = 4, }, - .endianness = DEVICE_NATIVE_ENDIAN, + .endianness = DEVICE_LITTLE_ENDIAN, }; static uint64_t sm501_2d_engine_read(void *opaque, hwaddr addr, unsigned size) { - SM501State * s = (SM501State *)opaque; + SM501State *s = (SM501State *)opaque; uint32_t ret = 0; SM501_DPRINTF("sm501 2d engine regs : read addr=%x\n", (int)addr); - switch(addr) { + switch (addr) { + case SM501_2D_SOURCE: + ret = s->twoD_source; + break; + case SM501_2D_DESTINATION: + ret = s->twoD_destination; + break; + case SM501_2D_DIMENSION: + ret = s->twoD_dimension; + break; + case SM501_2D_CONTROL: + ret = s->twoD_control; + break; + case SM501_2D_PITCH: + ret = s->twoD_pitch; + break; + case SM501_2D_FOREGROUND: + ret = s->twoD_foreground; + break; + case SM501_2D_BACKGROUND: + ret = s->twoD_background; + break; + case SM501_2D_STRETCH: + ret = s->twoD_stretch; + break; + case SM501_2D_COLOR_COMPARE: + ret = s->twoD_color_compare; + break; + case SM501_2D_COLOR_COMPARE_MASK: + ret = s->twoD_color_compare_mask; + break; + case SM501_2D_MASK: + ret = s->twoD_mask; + break; + case SM501_2D_CLIP_TL: + ret = s->twoD_clip_tl; + break; + case SM501_2D_CLIP_BR: + ret = s->twoD_clip_br; + break; + case SM501_2D_MONO_PATTERN_LOW: + ret = s->twoD_mono_pattern_low; + break; + case SM501_2D_MONO_PATTERN_HIGH: + ret = s->twoD_mono_pattern_high; + break; + case SM501_2D_WINDOW_WIDTH: + ret = s->twoD_window_width; + break; case SM501_2D_SOURCE_BASE: ret = s->twoD_source_base; break; + case SM501_2D_DESTINATION_BASE: + ret = s->twoD_destination_base; + break; + case SM501_2D_ALPHA: + ret = s->twoD_alpha; + break; + case SM501_2D_WRAP: + ret = s->twoD_wrap; + break; + case SM501_2D_STATUS: + ret = 0; /* Should return interrupt status */ + break; default: printf("sm501 disp ctrl : not implemented register read." " addr=%x\n", (int)addr); @@ -1100,11 +1226,11 @@ static uint64_t sm501_2d_engine_read(void *opaque, hwaddr addr, static void sm501_2d_engine_write(void *opaque, hwaddr addr, uint64_t value, unsigned size) { - SM501State * s = (SM501State *)opaque; + SM501State *s = (SM501State *)opaque; SM501_DPRINTF("sm501 2d engine regs : write addr=%x, val=%x\n", (unsigned)addr, (unsigned)value); - switch(addr) { + switch (addr) { case SM501_2D_SOURCE: s->twoD_source = value; break; @@ -1130,15 +1256,33 @@ static void sm501_2d_engine_write(void *opaque, hwaddr addr, case SM501_2D_FOREGROUND: s->twoD_foreground = value; break; + case SM501_2D_BACKGROUND: + s->twoD_background = value; + break; case SM501_2D_STRETCH: s->twoD_stretch = value; break; + case SM501_2D_COLOR_COMPARE: + s->twoD_color_compare = value; + break; case SM501_2D_COLOR_COMPARE_MASK: s->twoD_color_compare_mask = value; break; case SM501_2D_MASK: s->twoD_mask = value; break; + case SM501_2D_CLIP_TL: + s->twoD_clip_tl = value; + break; + case SM501_2D_CLIP_BR: + s->twoD_clip_br = value; + break; + case SM501_2D_MONO_PATTERN_LOW: + s->twoD_mono_pattern_low = value; + break; + case SM501_2D_MONO_PATTERN_HIGH: + s->twoD_mono_pattern_high = value; + break; case SM501_2D_WINDOW_WIDTH: s->twoD_window_width = value; break; @@ -1148,6 +1292,15 @@ static void sm501_2d_engine_write(void *opaque, hwaddr addr, case SM501_2D_DESTINATION_BASE: s->twoD_destination_base = value; break; + case SM501_2D_ALPHA: + s->twoD_alpha = value; + break; + case SM501_2D_WRAP: + s->twoD_wrap = value; + break; + case SM501_2D_STATUS: + /* ignored, writing 0 should clear interrupt status */ + break; default: printf("sm501 2d engine : not implemented register write." " addr=%x, val=%x\n", (int)addr, (unsigned)value); @@ -1162,16 +1315,17 @@ static const MemoryRegionOps sm501_2d_engine_ops = { .min_access_size = 4, .max_access_size = 4, }, - .endianness = DEVICE_NATIVE_ENDIAN, + .endianness = DEVICE_LITTLE_ENDIAN, }; /* draw line functions for all console modes */ typedef void draw_line_func(uint8_t *d, const uint8_t *s, - int width, const uint32_t *pal); + int width, const uint32_t *pal); -typedef void draw_hwc_line_func(SM501State * s, int crt, uint8_t * palette, - int c_y, uint8_t *d, int width); +typedef void draw_hwc_line_func(uint8_t *d, const uint8_t *s, + int width, const uint8_t *palette, + int c_x, int c_y); #define DEPTH 8 #include "sm501_template.h" @@ -1197,7 +1351,7 @@ typedef void draw_hwc_line_func(SM501State * s, int crt, uint8_t * palette, #define DEPTH 32 #include "sm501_template.h" -static draw_line_func * draw_line8_funcs[] = { +static draw_line_func *draw_line8_funcs[] = { draw_line8_8, draw_line8_15, draw_line8_16, @@ -1207,7 +1361,7 @@ static draw_line_func * draw_line8_funcs[] = { draw_line8_16bgr, }; -static draw_line_func * draw_line16_funcs[] = { +static draw_line_func *draw_line16_funcs[] = { draw_line16_8, draw_line16_15, draw_line16_16, @@ -1217,7 +1371,7 @@ static draw_line_func * draw_line16_funcs[] = { draw_line16_16bgr, }; -static draw_line_func * draw_line32_funcs[] = { +static draw_line_func *draw_line32_funcs[] = { draw_line32_8, draw_line32_15, draw_line32_16, @@ -1227,7 +1381,7 @@ static draw_line_func * draw_line32_funcs[] = { draw_line32_16bgr, }; -static draw_hwc_line_func * draw_hwc_line_funcs[] = { +static draw_hwc_line_func *draw_hwc_line_funcs[] = { draw_hwc_line_8, draw_hwc_line_15, draw_hwc_line_16, @@ -1242,7 +1396,7 @@ static inline int get_depth_index(DisplaySurface *surface) switch (surface_bits_per_pixel(surface)) { default: case 8: - return 0; + return 0; case 15: return 1; case 16: @@ -1256,203 +1410,459 @@ static inline int get_depth_index(DisplaySurface *surface) } } -static void sm501_draw_crt(SM501State * s) +static void sm501_update_display(void *opaque) { + SM501State *s = (SM501State *)opaque; DisplaySurface *surface = qemu_console_surface(s->con); - int y; - int width = (s->dc_crt_h_total & 0x00000FFF) + 1; - int height = (s->dc_crt_v_total & 0x00000FFF) + 1; - - uint8_t * src = s->local_mem; - int src_bpp = 0; + int y, c_x = 0, c_y = 0; + int crt = (s->dc_crt_control & SM501_DC_CRT_CONTROL_SEL) ? 1 : 0; + int width = get_width(s, crt); + int height = get_height(s, crt); + int src_bpp = get_bpp(s, crt); int dst_bpp = surface_bytes_per_pixel(surface); - uint32_t * palette = (uint32_t *)&s->dc_palette[SM501_DC_CRT_PALETTE - - SM501_DC_PANEL_PALETTE]; - uint8_t hwc_palette[3 * 3]; - int ds_depth_index = get_depth_index(surface); - draw_line_func * draw_line = NULL; - draw_hwc_line_func * draw_hwc_line = NULL; + int dst_depth_index = get_depth_index(surface); + draw_line_func *draw_line = NULL; + draw_hwc_line_func *draw_hwc_line = NULL; int full_update = 0; int y_start = -1; ram_addr_t page_min = ~0l; ram_addr_t page_max = 0l; - ram_addr_t offset = 0; + ram_addr_t offset; + uint32_t *palette; + uint8_t hwc_palette[3 * 3]; + uint8_t *hwc_src = NULL; + + if (!((crt ? s->dc_crt_control : s->dc_panel_control) + & SM501_DC_CRT_CONTROL_ENABLE)) { + return; + } + + palette = (uint32_t *)(crt ? &s->dc_palette[SM501_DC_CRT_PALETTE - + SM501_DC_PANEL_PALETTE] + : &s->dc_palette[0]); /* choose draw_line function */ - switch (s->dc_crt_control & 3) { - case SM501_DC_CRT_CONTROL_8BPP: - src_bpp = 1; - draw_line = draw_line8_funcs[ds_depth_index]; - break; - case SM501_DC_CRT_CONTROL_16BPP: - src_bpp = 2; - draw_line = draw_line16_funcs[ds_depth_index]; - break; - case SM501_DC_CRT_CONTROL_32BPP: - src_bpp = 4; - draw_line = draw_line32_funcs[ds_depth_index]; - break; + switch (src_bpp) { + case 1: + draw_line = draw_line8_funcs[dst_depth_index]; + break; + case 2: + draw_line = draw_line16_funcs[dst_depth_index]; + break; + case 4: + draw_line = draw_line32_funcs[dst_depth_index]; + break; default: - printf("sm501 draw crt : invalid DC_CRT_CONTROL=%x.\n", - s->dc_crt_control); + printf("sm501 update display : invalid control register value.\n"); abort(); - break; + break; } /* set up to draw hardware cursor */ - if (is_hwc_enabled(s, 1)) { - int i; - - /* get cursor palette */ - for (i = 0; i < 3; i++) { - uint16_t rgb565 = get_hwc_color(s, 1, i + 1); - hwc_palette[i * 3 + 0] = (rgb565 & 0xf800) >> 8; /* red */ - hwc_palette[i * 3 + 1] = (rgb565 & 0x07e0) >> 3; /* green */ - hwc_palette[i * 3 + 2] = (rgb565 & 0x001f) << 3; /* blue */ - } - + if (is_hwc_enabled(s, crt)) { /* choose cursor draw line function */ - draw_hwc_line = draw_hwc_line_funcs[ds_depth_index]; + draw_hwc_line = draw_hwc_line_funcs[dst_depth_index]; + hwc_src = get_hwc_address(s, crt); + c_x = get_hwc_x(s, crt); + c_y = get_hwc_y(s, crt); + get_hwc_palette(s, crt, hwc_palette); } /* adjust console size */ if (s->last_width != width || s->last_height != height) { qemu_console_resize(s->con, width, height); surface = qemu_console_surface(s->con); - s->last_width = width; - s->last_height = height; - full_update = 1; + s->last_width = width; + s->last_height = height; + full_update = 1; } /* draw each line according to conditions */ memory_region_sync_dirty_bitmap(&s->local_mem_region); - for (y = 0; y < height; y++) { - int update_hwc = draw_hwc_line ? within_hwc_y_range(s, y, 1) : 0; - int update = full_update || update_hwc; + for (y = 0, offset = 0; y < height; y++, offset += width * src_bpp) { + int update, update_hwc; ram_addr_t page0 = offset; ram_addr_t page1 = offset + width * src_bpp - 1; - /* check dirty flags for each line */ - update = memory_region_get_dirty(&s->local_mem_region, page0, - page1 - page0, DIRTY_MEMORY_VGA); + /* check if hardware cursor is enabled and we're within its range */ + update_hwc = draw_hwc_line && c_y <= y && y < c_y + SM501_HWC_HEIGHT; + update = full_update || update_hwc; + /* check dirty flags for each line */ + update |= memory_region_get_dirty(&s->local_mem_region, page0, + page1 - page0, DIRTY_MEMORY_VGA); - /* draw line and change status */ - if (update) { + /* draw line and change status */ + if (update) { uint8_t *d = surface_data(surface); d += y * width * dst_bpp; /* draw graphics layer */ - draw_line(d, src, width, palette); + draw_line(d, s->local_mem + offset, width, palette); - /* draw haredware cursor */ + /* draw hardware cursor */ if (update_hwc) { - draw_hwc_line(s, 1, hwc_palette, y - get_hwc_y(s, 1), d, width); + draw_hwc_line(d, hwc_src, width, hwc_palette, c_x, y - c_y); } - if (y_start < 0) - y_start = y; - if (page0 < page_min) - page_min = page0; - if (page1 > page_max) - page_max = page1; - } else { - if (y_start >= 0) { - /* flush to display */ + if (y_start < 0) { + y_start = y; + } + if (page0 < page_min) { + page_min = page0; + } + if (page1 > page_max) { + page_max = page1; + } + } else { + if (y_start >= 0) { + /* flush to display */ dpy_gfx_update(s->con, 0, y_start, width, y - y_start); - y_start = -1; - } - } - - src += width * src_bpp; - offset += width * src_bpp; + y_start = -1; + } + } } /* complete flush to display */ - if (y_start >= 0) + if (y_start >= 0) { dpy_gfx_update(s->con, 0, y_start, width, y - y_start); + } /* clear dirty flags */ if (page_min != ~0l) { - memory_region_reset_dirty(&s->local_mem_region, + memory_region_reset_dirty(&s->local_mem_region, page_min, page_max + TARGET_PAGE_SIZE, DIRTY_MEMORY_VGA); } } -static void sm501_update_display(void *opaque) -{ - SM501State * s = (SM501State *)opaque; - - if (s->dc_crt_control & SM501_DC_CRT_CONTROL_ENABLE) - sm501_draw_crt(s); -} - static const GraphicHwOps sm501_ops = { .gfx_update = sm501_update_display, }; -void sm501_init(MemoryRegion *address_space_mem, uint32_t base, - uint32_t local_mem_bytes, qemu_irq irq, Chardev *chr) +static void sm501_reset(SM501State *s) { - SM501State * s; - DeviceState *dev; - MemoryRegion *sm501_system_config = g_new(MemoryRegion, 1); - MemoryRegion *sm501_disp_ctrl = g_new(MemoryRegion, 1); - MemoryRegion *sm501_2d_engine = g_new(MemoryRegion, 1); - - /* allocate management data region */ - s = (SM501State *)g_malloc0(sizeof(SM501State)); - s->base = base; - s->local_mem_size_index - = get_local_mem_size_index(local_mem_bytes); - SM501_DPRINTF("local mem size=%x. index=%d\n", get_local_mem_size(s), - s->local_mem_size_index); - s->system_control = 0x00100000; - s->misc_control = 0x00001000; /* assumes SH, active=low */ - s->dc_panel_control = 0x00010000; + s->system_control = 0x00100000; /* 2D engine FIFO empty */ + /* Bits 17 (SH), 7 (CDR), 6:5 (Test), 2:0 (Bus) are all supposed + * to be determined at reset by GPIO lines which set config bits. + * We hardwire them: + * SH = 0 : Hitachi Ready Polarity == Active Low + * CDR = 0 : do not reset clock divider + * TEST = 0 : Normal mode (not testing the silicon) + * BUS = 0 : Hitachi SH3/SH4 + */ + s->misc_control = SM501_MISC_DAC_POWER; + s->gpio_31_0_control = 0; + s->gpio_63_32_control = 0; + s->dram_control = 0; + s->arbitration_control = 0x05146732; + s->irq_mask = 0; + s->misc_timing = 0; + s->power_mode_control = 0; + s->dc_panel_control = 0x00010000; /* FIFO level 3 */ + s->dc_video_control = 0; s->dc_crt_control = 0x00010000; + s->twoD_source = 0; + s->twoD_destination = 0; + s->twoD_dimension = 0; + s->twoD_control = 0; + s->twoD_pitch = 0; + s->twoD_foreground = 0; + s->twoD_background = 0; + s->twoD_stretch = 0; + s->twoD_color_compare = 0; + s->twoD_color_compare_mask = 0; + s->twoD_mask = 0; + s->twoD_clip_tl = 0; + s->twoD_clip_br = 0; + s->twoD_mono_pattern_low = 0; + s->twoD_mono_pattern_high = 0; + s->twoD_window_width = 0; + s->twoD_source_base = 0; + s->twoD_destination_base = 0; + s->twoD_alpha = 0; + s->twoD_wrap = 0; +} + +static void sm501_init(SM501State *s, DeviceState *dev, + uint32_t local_mem_bytes) +{ + s->local_mem_size_index = get_local_mem_size_index(local_mem_bytes); + SM501_DPRINTF("sm501 local mem size=%x. index=%d\n", get_local_mem_size(s), + s->local_mem_size_index); - /* allocate local memory */ - memory_region_init_ram(&s->local_mem_region, NULL, "sm501.local", - local_mem_bytes, &error_fatal); + /* local memory */ + memory_region_init_ram(&s->local_mem_region, OBJECT(dev), "sm501.local", + get_local_mem_size(s), &error_fatal); vmstate_register_ram_global(&s->local_mem_region); memory_region_set_log(&s->local_mem_region, true, DIRTY_MEMORY_VGA); s->local_mem = memory_region_get_ram_ptr(&s->local_mem_region); - memory_region_add_subregion(address_space_mem, base, &s->local_mem_region); - /* map mmio */ - memory_region_init_io(sm501_system_config, NULL, &sm501_system_config_ops, s, + /* mmio */ + memory_region_init(&s->mmio_region, OBJECT(dev), "sm501.mmio", MMIO_SIZE); + memory_region_init_io(&s->system_config_region, OBJECT(dev), + &sm501_system_config_ops, s, "sm501-system-config", 0x6c); - memory_region_add_subregion(address_space_mem, base + MMIO_BASE_OFFSET, - sm501_system_config); - memory_region_init_io(sm501_disp_ctrl, NULL, &sm501_disp_ctrl_ops, s, + memory_region_add_subregion(&s->mmio_region, SM501_SYS_CONFIG, + &s->system_config_region); + memory_region_init_io(&s->disp_ctrl_region, OBJECT(dev), + &sm501_disp_ctrl_ops, s, "sm501-disp-ctrl", 0x1000); - memory_region_add_subregion(address_space_mem, - base + MMIO_BASE_OFFSET + SM501_DC, - sm501_disp_ctrl); - memory_region_init_io(sm501_2d_engine, NULL, &sm501_2d_engine_ops, s, + memory_region_add_subregion(&s->mmio_region, SM501_DC, + &s->disp_ctrl_region); + memory_region_init_io(&s->twoD_engine_region, OBJECT(dev), + &sm501_2d_engine_ops, s, "sm501-2d-engine", 0x54); - memory_region_add_subregion(address_space_mem, - base + MMIO_BASE_OFFSET + SM501_2D_ENGINE, - sm501_2d_engine); + memory_region_add_subregion(&s->mmio_region, SM501_2D_ENGINE, + &s->twoD_engine_region); + + /* create qemu graphic console */ + s->con = graphic_console_init(DEVICE(dev), 0, &sm501_ops, s); +} + +static const VMStateDescription vmstate_sm501_state = { + .name = "sm501-state", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(local_mem_size_index, SM501State), + VMSTATE_UINT32(system_control, SM501State), + VMSTATE_UINT32(misc_control, SM501State), + VMSTATE_UINT32(gpio_31_0_control, SM501State), + VMSTATE_UINT32(gpio_63_32_control, SM501State), + VMSTATE_UINT32(dram_control, SM501State), + VMSTATE_UINT32(arbitration_control, SM501State), + VMSTATE_UINT32(irq_mask, SM501State), + VMSTATE_UINT32(misc_timing, SM501State), + VMSTATE_UINT32(power_mode_control, SM501State), + VMSTATE_UINT32(uart0_ier, SM501State), + VMSTATE_UINT32(uart0_lcr, SM501State), + VMSTATE_UINT32(uart0_mcr, SM501State), + VMSTATE_UINT32(uart0_scr, SM501State), + VMSTATE_UINT8_ARRAY(dc_palette, SM501State, DC_PALETTE_ENTRIES), + VMSTATE_UINT32(dc_panel_control, SM501State), + VMSTATE_UINT32(dc_panel_panning_control, SM501State), + VMSTATE_UINT32(dc_panel_fb_addr, SM501State), + VMSTATE_UINT32(dc_panel_fb_offset, SM501State), + VMSTATE_UINT32(dc_panel_fb_width, SM501State), + VMSTATE_UINT32(dc_panel_fb_height, SM501State), + VMSTATE_UINT32(dc_panel_tl_location, SM501State), + VMSTATE_UINT32(dc_panel_br_location, SM501State), + VMSTATE_UINT32(dc_panel_h_total, SM501State), + VMSTATE_UINT32(dc_panel_h_sync, SM501State), + VMSTATE_UINT32(dc_panel_v_total, SM501State), + VMSTATE_UINT32(dc_panel_v_sync, SM501State), + VMSTATE_UINT32(dc_panel_hwc_addr, SM501State), + VMSTATE_UINT32(dc_panel_hwc_location, SM501State), + VMSTATE_UINT32(dc_panel_hwc_color_1_2, SM501State), + VMSTATE_UINT32(dc_panel_hwc_color_3, SM501State), + VMSTATE_UINT32(dc_video_control, SM501State), + VMSTATE_UINT32(dc_crt_control, SM501State), + VMSTATE_UINT32(dc_crt_fb_addr, SM501State), + VMSTATE_UINT32(dc_crt_fb_offset, SM501State), + VMSTATE_UINT32(dc_crt_h_total, SM501State), + VMSTATE_UINT32(dc_crt_h_sync, SM501State), + VMSTATE_UINT32(dc_crt_v_total, SM501State), + VMSTATE_UINT32(dc_crt_v_sync, SM501State), + VMSTATE_UINT32(dc_crt_hwc_addr, SM501State), + VMSTATE_UINT32(dc_crt_hwc_location, SM501State), + VMSTATE_UINT32(dc_crt_hwc_color_1_2, SM501State), + VMSTATE_UINT32(dc_crt_hwc_color_3, SM501State), + VMSTATE_UINT32(twoD_source, SM501State), + VMSTATE_UINT32(twoD_destination, SM501State), + VMSTATE_UINT32(twoD_dimension, SM501State), + VMSTATE_UINT32(twoD_control, SM501State), + VMSTATE_UINT32(twoD_pitch, SM501State), + VMSTATE_UINT32(twoD_foreground, SM501State), + VMSTATE_UINT32(twoD_background, SM501State), + VMSTATE_UINT32(twoD_stretch, SM501State), + VMSTATE_UINT32(twoD_color_compare, SM501State), + VMSTATE_UINT32(twoD_color_compare_mask, SM501State), + VMSTATE_UINT32(twoD_mask, SM501State), + VMSTATE_UINT32(twoD_clip_tl, SM501State), + VMSTATE_UINT32(twoD_clip_br, SM501State), + VMSTATE_UINT32(twoD_mono_pattern_low, SM501State), + VMSTATE_UINT32(twoD_mono_pattern_high, SM501State), + VMSTATE_UINT32(twoD_window_width, SM501State), + VMSTATE_UINT32(twoD_source_base, SM501State), + VMSTATE_UINT32(twoD_destination_base, SM501State), + VMSTATE_UINT32(twoD_alpha, SM501State), + VMSTATE_UINT32(twoD_wrap, SM501State), + VMSTATE_END_OF_LIST() + } +}; + +#define TYPE_SYSBUS_SM501 "sysbus-sm501" +#define SYSBUS_SM501(obj) \ + OBJECT_CHECK(SM501SysBusState, (obj), TYPE_SYSBUS_SM501) + +typedef struct { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + SM501State state; + uint32_t vram_size; + uint32_t base; + void *chr_state; +} SM501SysBusState; + +static void sm501_realize_sysbus(DeviceState *dev, Error **errp) +{ + SM501SysBusState *s = SYSBUS_SM501(dev); + SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + DeviceState *usb_dev; + + sm501_init(&s->state, dev, s->vram_size); + if (get_local_mem_size(&s->state) != s->vram_size) { + error_setg(errp, "Invalid VRAM size, nearest valid size is %" PRIu32, + get_local_mem_size(&s->state)); + return; + } + sysbus_init_mmio(sbd, &s->state.local_mem_region); + sysbus_init_mmio(sbd, &s->state.mmio_region); /* bridge to usb host emulation module */ - dev = qdev_create(NULL, "sysbus-ohci"); - qdev_prop_set_uint32(dev, "num-ports", 2); - qdev_prop_set_uint64(dev, "dma-offset", base); - qdev_init_nofail(dev); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, - base + MMIO_BASE_OFFSET + SM501_USB_HOST); - sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, irq); + usb_dev = qdev_create(NULL, "sysbus-ohci"); + qdev_prop_set_uint32(usb_dev, "num-ports", 2); + qdev_prop_set_uint64(usb_dev, "dma-offset", s->base); + qdev_init_nofail(usb_dev); + memory_region_add_subregion(&s->state.mmio_region, SM501_USB_HOST, + sysbus_mmio_get_region(SYS_BUS_DEVICE(usb_dev), 0)); + sysbus_pass_irq(sbd, SYS_BUS_DEVICE(usb_dev)); /* bridge to serial emulation module */ - if (chr) { - serial_mm_init(address_space_mem, - base + MMIO_BASE_OFFSET + SM501_UART0, 2, + if (s->chr_state) { + serial_mm_init(&s->state.mmio_region, SM501_UART0, 2, NULL, /* TODO : chain irq to IRL */ - 115200, chr, DEVICE_NATIVE_ENDIAN); + 115200, s->chr_state, DEVICE_LITTLE_ENDIAN); } +} - /* create qemu graphic console */ - s->con = graphic_console_init(DEVICE(dev), 0, &sm501_ops, s); +static Property sm501_sysbus_properties[] = { + DEFINE_PROP_UINT32("vram-size", SM501SysBusState, vram_size, 0), + DEFINE_PROP_UINT32("base", SM501SysBusState, base, 0), + DEFINE_PROP_PTR("chr-state", SM501SysBusState, chr_state), + DEFINE_PROP_END_OF_LIST(), +}; + +static void sm501_reset_sysbus(DeviceState *dev) +{ + SM501SysBusState *s = SYSBUS_SM501(dev); + sm501_reset(&s->state); } + +static const VMStateDescription vmstate_sm501_sysbus = { + .name = TYPE_SYSBUS_SM501, + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_STRUCT(state, SM501SysBusState, 1, + vmstate_sm501_state, SM501State), + VMSTATE_END_OF_LIST() + } +}; + +static void sm501_sysbus_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = sm501_realize_sysbus; + set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories); + dc->desc = "SM501 Multimedia Companion"; + dc->props = sm501_sysbus_properties; + dc->reset = sm501_reset_sysbus; + dc->vmsd = &vmstate_sm501_sysbus; + /* Note: pointer property "chr-state" may remain null, thus + * no need for dc->cannot_instantiate_with_device_add_yet = true; + */ +} + +static const TypeInfo sm501_sysbus_info = { + .name = TYPE_SYSBUS_SM501, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(SM501SysBusState), + .class_init = sm501_sysbus_class_init, +}; + +#define TYPE_PCI_SM501 "sm501" +#define PCI_SM501(obj) OBJECT_CHECK(SM501PCIState, (obj), TYPE_PCI_SM501) + +typedef struct { + /*< private >*/ + PCIDevice parent_obj; + /*< public >*/ + SM501State state; + uint32_t vram_size; +} SM501PCIState; + +static void sm501_realize_pci(PCIDevice *dev, Error **errp) +{ + SM501PCIState *s = PCI_SM501(dev); + + sm501_init(&s->state, DEVICE(dev), s->vram_size); + if (get_local_mem_size(&s->state) != s->vram_size) { + error_setg(errp, "Invalid VRAM size, nearest valid size is %" PRIu32, + get_local_mem_size(&s->state)); + return; + } + pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, + &s->state.local_mem_region); + pci_register_bar(dev, 1, PCI_BASE_ADDRESS_SPACE_MEMORY, + &s->state.mmio_region); +} + +static Property sm501_pci_properties[] = { + DEFINE_PROP_UINT32("vram-size", SM501PCIState, vram_size, 64 * M_BYTE), + DEFINE_PROP_END_OF_LIST(), +}; + +static void sm501_reset_pci(DeviceState *dev) +{ + SM501PCIState *s = PCI_SM501(dev); + sm501_reset(&s->state); + /* Bits 2:0 of misc_control register is 001 for PCI */ + s->state.misc_control |= 1; +} + +static const VMStateDescription vmstate_sm501_pci = { + .name = TYPE_PCI_SM501, + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_PCI_DEVICE(parent_obj, SM501PCIState), + VMSTATE_STRUCT(state, SM501PCIState, 1, + vmstate_sm501_state, SM501State), + VMSTATE_END_OF_LIST() + } +}; + +static void sm501_pci_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + k->realize = sm501_realize_pci; + k->vendor_id = PCI_VENDOR_ID_SILICON_MOTION; + k->device_id = PCI_DEVICE_ID_SM501; + k->class_id = PCI_CLASS_DISPLAY_OTHER; + set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories); + dc->desc = "SM501 Display Controller"; + dc->props = sm501_pci_properties; + dc->reset = sm501_reset_pci; + dc->hotpluggable = false; + dc->vmsd = &vmstate_sm501_pci; +} + +static const TypeInfo sm501_pci_info = { + .name = TYPE_PCI_SM501, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(SM501PCIState), + .class_init = sm501_pci_class_init, +}; + +static void sm501_register_types(void) +{ + type_register_static(&sm501_sysbus_info); + type_register_static(&sm501_pci_info); +} + +type_init(sm501_register_types) diff --git a/hw/display/sm501_template.h b/hw/display/sm501_template.h index f33e499be4..a60abad019 100644 --- a/hw/display/sm501_template.h +++ b/hw/display/sm501_template.h @@ -47,81 +47,67 @@ static void glue(draw_line8_, PIXEL_NAME)( { uint8_t v, r, g, b; do { - v = ldub_p(s); - r = (pal[v] >> 16) & 0xff; - g = (pal[v] >> 8) & 0xff; - b = (pal[v] >> 0) & 0xff; - ((PIXEL_TYPE *) d)[0] = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b); - s ++; - d += BPP; - } while (-- width != 0); + v = ldub_p(s); + r = (pal[v] >> 16) & 0xff; + g = (pal[v] >> 8) & 0xff; + b = (pal[v] >> 0) & 0xff; + *(PIXEL_TYPE *)d = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b); + s++; + d += BPP; + } while (--width != 0); } static void glue(draw_line16_, PIXEL_NAME)( - uint8_t *d, const uint8_t *s, int width, const uint32_t *pal) + uint8_t *d, const uint8_t *s, int width, const uint32_t *pal) { uint16_t rgb565; uint8_t r, g, b; do { - rgb565 = lduw_p(s); - r = ((rgb565 >> 11) & 0x1f) << 3; - g = ((rgb565 >> 5) & 0x3f) << 2; - b = ((rgb565 >> 0) & 0x1f) << 3; - ((PIXEL_TYPE *) d)[0] = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b); - s += 2; - d += BPP; - } while (-- width != 0); + rgb565 = lduw_le_p(s); + r = (rgb565 >> 8) & 0xf8; + g = (rgb565 >> 3) & 0xfc; + b = (rgb565 << 3) & 0xf8; + *(PIXEL_TYPE *)d = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b); + s += 2; + d += BPP; + } while (--width != 0); } static void glue(draw_line32_, PIXEL_NAME)( - uint8_t *d, const uint8_t *s, int width, const uint32_t *pal) + uint8_t *d, const uint8_t *s, int width, const uint32_t *pal) { uint8_t r, g, b; do { - ldub_p(s); -#if defined(TARGET_WORDS_BIGENDIAN) - r = s[1]; - g = s[2]; - b = s[3]; -#else - b = s[0]; - g = s[1]; r = s[2]; -#endif - ((PIXEL_TYPE *) d)[0] = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b); - s += 4; - d += BPP; - } while (-- width != 0); + g = s[1]; + b = s[0]; + *(PIXEL_TYPE *)d = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b); + s += 4; + d += BPP; + } while (--width != 0); } /** * Draw hardware cursor image on the given line. */ -static void glue(draw_hwc_line_, PIXEL_NAME)(SM501State * s, int crt, - uint8_t * palette, int c_y, uint8_t *d, int width) +static void glue(draw_hwc_line_, PIXEL_NAME)(uint8_t *d, const uint8_t *s, + int width, const uint8_t *palette, int c_x, int c_y) { - int x, i; - uint8_t bitset = 0; - - /* get hardware cursor pattern */ - uint32_t cursor_addr = get_hwc_address(s, crt); - assert(0 <= c_y && c_y < SM501_HWC_HEIGHT); - cursor_addr += 64 * c_y / 4; /* 4 pixels per byte */ - cursor_addr += s->base; + int i; + uint8_t r, g, b, v, bitset = 0; /* get cursor position */ - x = get_hwc_x(s, crt); - d += x * BPP; - - for (i = 0; i < SM501_HWC_WIDTH && x + i < width; i++) { - uint8_t v; + assert(0 <= c_y && c_y < SM501_HWC_HEIGHT); + s += SM501_HWC_WIDTH * c_y / 4; /* 4 pixels per byte */ + d += c_x * BPP; + for (i = 0; i < SM501_HWC_WIDTH && c_x + i < width; i++) { /* get pixel value */ if (i % 4 == 0) { - bitset = ldub_phys(&address_space_memory, cursor_addr); - cursor_addr++; + bitset = ldub_p(s); + s++; } v = bitset & 3; bitset >>= 2; @@ -129,10 +115,10 @@ static void glue(draw_hwc_line_, PIXEL_NAME)(SM501State * s, int crt, /* write pixel */ if (v) { v--; - uint8_t r = palette[v * 3 + 0]; - uint8_t g = palette[v * 3 + 1]; - uint8_t b = palette[v * 3 + 2]; - ((PIXEL_TYPE *) d)[0] = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b); + r = palette[v * 3 + 0]; + g = palette[v * 3 + 1]; + b = palette[v * 3 + 2]; + *(PIXEL_TYPE *)d = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b); } d += BPP; } diff --git a/hw/display/tcx.c b/hw/display/tcx.c index 8e26aae801..5a1115cc65 100644 --- a/hw/display/tcx.c +++ b/hw/display/tcx.c @@ -25,7 +25,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu-common.h" -#include "cpu.h" /* FIXME shouldn't use TARGET_PAGE_SIZE */ #include "ui/console.h" #include "ui/pixel_ops.h" #include "hw/loader.h" @@ -93,41 +92,46 @@ typedef struct TCXState { uint16_t cursy; } TCXState; -static void tcx_set_dirty(TCXState *s) +static void tcx_set_dirty(TCXState *s, ram_addr_t addr, int len) { - memory_region_set_dirty(&s->vram_mem, 0, MAXX * MAXY); + memory_region_set_dirty(&s->vram_mem, addr, len); + + if (s->depth == 24) { + memory_region_set_dirty(&s->vram_mem, s->vram24_offset + addr * 4, + len * 4); + memory_region_set_dirty(&s->vram_mem, s->cplane_offset + addr * 4, + len * 4); + } } -static inline int tcx24_check_dirty(TCXState *s, ram_addr_t page, - ram_addr_t page24, ram_addr_t cpage) +static int tcx_check_dirty(TCXState *s, ram_addr_t addr, int len) { int ret; - ret = memory_region_get_dirty(&s->vram_mem, page, TARGET_PAGE_SIZE, - DIRTY_MEMORY_VGA); - ret |= memory_region_get_dirty(&s->vram_mem, page24, TARGET_PAGE_SIZE * 4, - DIRTY_MEMORY_VGA); - ret |= memory_region_get_dirty(&s->vram_mem, cpage, TARGET_PAGE_SIZE * 4, - DIRTY_MEMORY_VGA); + ret = memory_region_get_dirty(&s->vram_mem, addr, len, DIRTY_MEMORY_VGA); + + if (s->depth == 24) { + ret |= memory_region_get_dirty(&s->vram_mem, + s->vram24_offset + addr * 4, len * 4, + DIRTY_MEMORY_VGA); + ret |= memory_region_get_dirty(&s->vram_mem, + s->cplane_offset + addr * 4, len * 4, + DIRTY_MEMORY_VGA); + } + return ret; } -static inline void tcx24_reset_dirty(TCXState *ts, ram_addr_t page_min, - ram_addr_t page_max, ram_addr_t page24, - ram_addr_t cpage) +static void tcx_reset_dirty(TCXState *s, ram_addr_t addr, int len) { - memory_region_reset_dirty(&ts->vram_mem, - page_min, - (page_max - page_min) + TARGET_PAGE_SIZE, - DIRTY_MEMORY_VGA); - memory_region_reset_dirty(&ts->vram_mem, - page24 + page_min * 4, - (page_max - page_min) * 4 + TARGET_PAGE_SIZE, - DIRTY_MEMORY_VGA); - memory_region_reset_dirty(&ts->vram_mem, - cpage + page_min * 4, - (page_max - page_min) * 4 + TARGET_PAGE_SIZE, - DIRTY_MEMORY_VGA); + memory_region_reset_dirty(&s->vram_mem, addr, len, DIRTY_MEMORY_VGA); + + if (s->depth == 24) { + memory_region_reset_dirty(&s->vram_mem, s->vram24_offset + addr * 4, + len * 4, DIRTY_MEMORY_VGA); + memory_region_reset_dirty(&s->vram_mem, s->cplane_offset + addr * 4, + len * 4, DIRTY_MEMORY_VGA); + } } static void update_palette_entries(TCXState *s, int start, int end) @@ -136,27 +140,14 @@ static void update_palette_entries(TCXState *s, int start, int end) int i; for (i = start; i < end; i++) { - switch (surface_bits_per_pixel(surface)) { - default: - case 8: - s->palette[i] = rgb_to_pixel8(s->r[i], s->g[i], s->b[i]); - break; - case 15: - s->palette[i] = rgb_to_pixel15(s->r[i], s->g[i], s->b[i]); - break; - case 16: - s->palette[i] = rgb_to_pixel16(s->r[i], s->g[i], s->b[i]); - break; - case 32: - if (is_surface_bgr(surface)) { - s->palette[i] = rgb_to_pixel32bgr(s->r[i], s->g[i], s->b[i]); - } else { - s->palette[i] = rgb_to_pixel32(s->r[i], s->g[i], s->b[i]); - } - break; + if (is_surface_bgr(surface)) { + s->palette[i] = rgb_to_pixel32bgr(s->r[i], s->g[i], s->b[i]); + } else { + s->palette[i] = rgb_to_pixel32(s->r[i], s->g[i], s->b[i]); } + break; } - tcx_set_dirty(s); + tcx_set_dirty(s, 0, memory_region_size(&s->vram_mem)); } static void tcx_draw_line32(TCXState *s1, uint8_t *d, @@ -172,31 +163,6 @@ static void tcx_draw_line32(TCXState *s1, uint8_t *d, } } -static void tcx_draw_line16(TCXState *s1, uint8_t *d, - const uint8_t *s, int width) -{ - int x; - uint8_t val; - uint16_t *p = (uint16_t *)d; - - for (x = 0; x < width; x++) { - val = *s++; - *p++ = s1->palette[val]; - } -} - -static void tcx_draw_line8(TCXState *s1, uint8_t *d, - const uint8_t *s, int width) -{ - int x; - uint8_t val; - - for(x = 0; x < width; x++) { - val = *s++; - *d++ = s1->palette[val]; - } -} - static void tcx_draw_cursor32(TCXState *s1, uint8_t *d, int y, int width) { @@ -223,57 +189,6 @@ static void tcx_draw_cursor32(TCXState *s1, uint8_t *d, } } -static void tcx_draw_cursor16(TCXState *s1, uint8_t *d, - int y, int width) -{ - int x, len; - uint32_t mask, bits; - uint16_t *p = (uint16_t *)d; - - y = y - s1->cursy; - mask = s1->cursmask[y]; - bits = s1->cursbits[y]; - len = MIN(width - s1->cursx, 32); - p = &p[s1->cursx]; - for (x = 0; x < len; x++) { - if (mask & 0x80000000) { - if (bits & 0x80000000) { - *p = s1->palette[259]; - } else { - *p = s1->palette[258]; - } - } - p++; - mask <<= 1; - bits <<= 1; - } -} - -static void tcx_draw_cursor8(TCXState *s1, uint8_t *d, - int y, int width) -{ - int x, len; - uint32_t mask, bits; - - y = y - s1->cursy; - mask = s1->cursmask[y]; - bits = s1->cursbits[y]; - len = MIN(width - s1->cursx, 32); - d = &d[s1->cursx]; - for (x = 0; x < len; x++) { - if (mask & 0x80000000) { - if (bits & 0x80000000) { - *d = s1->palette[259]; - } else { - *d = s1->palette[258]; - } - } - d++; - mask <<= 1; - bits <<= 1; - } -} - /* XXX Could be much more optimal: * detect if line/page/whole screen is in 24 bit mode @@ -322,10 +237,8 @@ static void tcx_update_display(void *opaque) ram_addr_t page, page_min, page_max; int y, y_start, dd, ds; uint8_t *d, *s; - void (*f)(TCXState *s1, uint8_t *dst, const uint8_t *src, int width); - void (*fc)(TCXState *s1, uint8_t *dst, int y, int width); - if (surface_bits_per_pixel(surface) == 0) { + if (surface_bits_per_pixel(surface) != 32) { return; } @@ -338,29 +251,9 @@ static void tcx_update_display(void *opaque) dd = surface_stride(surface); ds = 1024; - switch (surface_bits_per_pixel(surface)) { - case 32: - f = tcx_draw_line32; - fc = tcx_draw_cursor32; - break; - case 15: - case 16: - f = tcx_draw_line16; - fc = tcx_draw_cursor16; - break; - default: - case 8: - f = tcx_draw_line8; - fc = tcx_draw_cursor8; - break; - case 0: - return; - } - memory_region_sync_dirty_bitmap(&ts->vram_mem); - for (y = 0; y < ts->height; page += TARGET_PAGE_SIZE) { - if (memory_region_get_dirty(&ts->vram_mem, page, TARGET_PAGE_SIZE, - DIRTY_MEMORY_VGA)) { + for (y = 0; y < ts->height; y++, page += ds) { + if (tcx_check_dirty(ts, page, ds)) { if (y_start < 0) y_start = y; if (page < page_min) @@ -368,37 +261,10 @@ static void tcx_update_display(void *opaque) if (page > page_max) page_max = page; - f(ts, d, s, ts->width); - if (y >= ts->cursy && y < ts->cursy + 32 && ts->cursx < ts->width) { - fc(ts, d, y, ts->width); - } - d += dd; - s += ds; - y++; - - f(ts, d, s, ts->width); - if (y >= ts->cursy && y < ts->cursy + 32 && ts->cursx < ts->width) { - fc(ts, d, y, ts->width); - } - d += dd; - s += ds; - y++; - - f(ts, d, s, ts->width); - if (y >= ts->cursy && y < ts->cursy + 32 && ts->cursx < ts->width) { - fc(ts, d, y, ts->width); - } - d += dd; - s += ds; - y++; - - f(ts, d, s, ts->width); + tcx_draw_line32(ts, d, s, ts->width); if (y >= ts->cursy && y < ts->cursy + 32 && ts->cursx < ts->width) { - fc(ts, d, y, ts->width); + tcx_draw_cursor32(ts, d, y, ts->width); } - d += dd; - s += ds; - y++; } else { if (y_start >= 0) { /* flush to display */ @@ -406,10 +272,9 @@ static void tcx_update_display(void *opaque) ts->width, y - y_start); y_start = -1; } - d += dd * 4; - s += ds * 4; - y += 4; } + s += ds; + d += dd; } if (y_start >= 0) { /* flush to display */ @@ -418,10 +283,7 @@ static void tcx_update_display(void *opaque) } /* reset modified pages */ if (page_max >= page_min) { - memory_region_reset_dirty(&ts->vram_mem, - page_min, - (page_max - page_min) + TARGET_PAGE_SIZE, - DIRTY_MEMORY_VGA); + tcx_reset_dirty(ts, page_min, page_max - page_min); } } @@ -429,7 +291,7 @@ static void tcx24_update_display(void *opaque) { TCXState *ts = opaque; DisplaySurface *surface = qemu_console_surface(ts->con); - ram_addr_t page, page_min, page_max, cpage, page24; + ram_addr_t page, page_min, page_max; int y, y_start, dd, ds; uint8_t *d, *s; uint32_t *cptr, *s24; @@ -439,8 +301,6 @@ static void tcx24_update_display(void *opaque) } page = 0; - page24 = ts->vram24_offset; - cpage = ts->cplane_offset; y_start = -1; page_min = -1; page_max = 0; @@ -452,9 +312,8 @@ static void tcx24_update_display(void *opaque) ds = 1024; memory_region_sync_dirty_bitmap(&ts->vram_mem); - for (y = 0; y < ts->height; page += TARGET_PAGE_SIZE, - page24 += TARGET_PAGE_SIZE, cpage += TARGET_PAGE_SIZE) { - if (tcx24_check_dirty(ts, page, page24, cpage)) { + for (y = 0; y < ts->height; y++, page += ds) { + if (tcx_check_dirty(ts, page, ds)) { if (y_start < 0) y_start = y; if (page < page_min) @@ -465,38 +324,6 @@ static void tcx24_update_display(void *opaque) if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) { tcx_draw_cursor32(ts, d, y, ts->width); } - d += dd; - s += ds; - cptr += ds; - s24 += ds; - y++; - tcx24_draw_line32(ts, d, s, ts->width, cptr, s24); - if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) { - tcx_draw_cursor32(ts, d, y, ts->width); - } - d += dd; - s += ds; - cptr += ds; - s24 += ds; - y++; - tcx24_draw_line32(ts, d, s, ts->width, cptr, s24); - if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) { - tcx_draw_cursor32(ts, d, y, ts->width); - } - d += dd; - s += ds; - cptr += ds; - s24 += ds; - y++; - tcx24_draw_line32(ts, d, s, ts->width, cptr, s24); - if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) { - tcx_draw_cursor32(ts, d, y, ts->width); - } - d += dd; - s += ds; - cptr += ds; - s24 += ds; - y++; } else { if (y_start >= 0) { /* flush to display */ @@ -504,12 +331,11 @@ static void tcx24_update_display(void *opaque) ts->width, y - y_start); y_start = -1; } - d += dd * 4; - s += ds * 4; - cptr += ds * 4; - s24 += ds * 4; - y += 4; } + d += dd; + s += ds; + cptr += ds; + s24 += ds; } if (y_start >= 0) { /* flush to display */ @@ -518,7 +344,7 @@ static void tcx24_update_display(void *opaque) } /* reset modified pages */ if (page_max >= page_min) { - tcx24_reset_dirty(ts, page_min, page_max, page24, cpage); + tcx_reset_dirty(ts, page_min, page_max - page_min); } } @@ -526,7 +352,7 @@ static void tcx_invalidate_display(void *opaque) { TCXState *s = opaque; - tcx_set_dirty(s); + tcx_set_dirty(s, 0, memory_region_size(&s->vram_mem)); qemu_console_resize(s->con, s->width, s->height); } @@ -534,7 +360,7 @@ static void tcx24_invalidate_display(void *opaque) { TCXState *s = opaque; - tcx_set_dirty(s); + tcx_set_dirty(s, 0, memory_region_size(&s->vram_mem)); qemu_console_resize(s->con, s->width, s->height); } @@ -543,7 +369,7 @@ static int vmstate_tcx_post_load(void *opaque, int version_id) TCXState *s = opaque; update_palette_entries(s, 0, 256); - tcx_set_dirty(s); + tcx_set_dirty(s, 0, memory_region_size(&s->vram_mem)); return 0; } @@ -699,7 +525,7 @@ static void tcx_stip_writel(void *opaque, hwaddr addr, val <<= 1; } } - memory_region_set_dirty(&s->vram_mem, addr, 32); + tcx_set_dirty(s, addr, 32); } } @@ -732,7 +558,7 @@ static void tcx_rstip_writel(void *opaque, hwaddr addr, val <<= 1; } } - memory_region_set_dirty(&s->vram_mem, addr, 32); + tcx_set_dirty(s, addr, 32); } } @@ -790,7 +616,7 @@ static void tcx_blit_writel(void *opaque, hwaddr addr, memcpy(&s->vram24[addr], &s->vram24[adsr], len * 4); } } - memory_region_set_dirty(&s->vram_mem, addr, len); + tcx_set_dirty(s, addr, len); } } @@ -824,7 +650,7 @@ static void tcx_rblit_writel(void *opaque, hwaddr addr, memcpy(&s->cplane[addr], &s->cplane[adsr], len * 4); } } - memory_region_set_dirty(&s->vram_mem, addr, len); + tcx_set_dirty(s, addr, len); } } @@ -861,7 +687,7 @@ static void tcx_invalidate_cursor_position(TCXState *s) start = ymin * 1024; end = ymax * 1024; - memory_region_set_dirty(&s->vram_mem, start, end-start); + tcx_set_dirty(s, start, end - start); } static uint64_t tcx_thc_readl(void *opaque, hwaddr addr, @@ -1017,8 +843,7 @@ static void tcx_realizefn(DeviceState *dev, Error **errp) vmstate_register_ram_global(&s->rom); fcode_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, TCX_ROM_FILE); if (fcode_filename) { - ret = load_image_targphys(fcode_filename, s->prom_addr, - FCODE_MAX_ROM_SIZE); + ret = load_image_mr(fcode_filename, &s->rom); g_free(fcode_filename); if (ret < 0 || ret > FCODE_MAX_ROM_SIZE) { error_report("tcx: could not load prom '%s'", TCX_ROM_FILE); @@ -1076,7 +901,6 @@ static Property tcx_properties[] = { DEFINE_PROP_UINT16("width", TCXState, width, -1), DEFINE_PROP_UINT16("height", TCXState, height, -1), DEFINE_PROP_UINT16("depth", TCXState, depth, -1), - DEFINE_PROP_UINT64("prom_addr", TCXState, prom_addr, -1), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/display/vga.c b/hw/display/vga.c index 69c3e1d674..b2516c8d21 100644 --- a/hw/display/vga.c +++ b/hw/display/vga.c @@ -1434,6 +1434,14 @@ void vga_invalidate_scanlines(VGACommonState *s, int y1, int y2) } } +static bool vga_scanline_invalidated(VGACommonState *s, int y) +{ + if (y >= VGA_MAX_HEIGHT) { + return false; + } + return s->invalidated_y_table[y >> 5] & (1 << (y & 0x1f)); +} + void vga_sync_dirty_bitmap(VGACommonState *s) { memory_region_sync_dirty_bitmap(&s->vram); @@ -1457,7 +1465,8 @@ static void vga_draw_graphic(VGACommonState *s, int full_update) DisplaySurface *surface = qemu_console_surface(s->con); int y1, y, update, linesize, y_start, double_scan, mask, depth; int width, height, shift_control, line_offset, bwidth, bits; - ram_addr_t page0, page1, page_min, page_max; + ram_addr_t page0, page1; + DirtyBitmapSnapshot *snap = NULL; int disp_width, multi_scan, multi_run; uint8_t *d; uint32_t v, addr1, addr; @@ -1472,9 +1481,6 @@ static void vga_draw_graphic(VGACommonState *s, int full_update) full_update |= update_basic_params(s); - if (!full_update) - vga_sync_dirty_bitmap(s); - s->get_resolution(s, &width, &height); disp_width = width; @@ -1617,11 +1623,17 @@ static void vga_draw_graphic(VGACommonState *s, int full_update) addr1 = (s->start_addr * 4); bwidth = (width * bits + 7) / 8; y_start = -1; - page_min = -1; - page_max = 0; d = surface_data(surface); linesize = surface_stride(surface); y1 = 0; + + if (!full_update) { + vga_sync_dirty_bitmap(s); + snap = memory_region_snapshot_and_clear_dirty(&s->vram, addr1, + bwidth * height, + DIRTY_MEMORY_VGA); + } + for(y = 0; y < height; y++) { addr = addr1; if (!(s->cr[VGA_CRTC_MODE] & 1)) { @@ -1636,17 +1648,17 @@ static void vga_draw_graphic(VGACommonState *s, int full_update) update = full_update; page0 = addr; page1 = addr + bwidth - 1; - update |= memory_region_get_dirty(&s->vram, page0, page1 - page0, - DIRTY_MEMORY_VGA); - /* explicit invalidation for the hardware cursor */ - update |= (s->invalidated_y_table[y >> 5] >> (y & 0x1f)) & 1; + if (full_update) { + update = 1; + } else { + update = memory_region_snapshot_get_dirty(&s->vram, snap, + page0, page1 - page0); + } + /* explicit invalidation for the hardware cursor (cirrus only) */ + update |= vga_scanline_invalidated(s, y); if (update) { if (y_start < 0) y_start = y; - if (page0 < page_min) - page_min = page0; - if (page1 > page_max) - page_max = page1; if (!(is_buffer_shared(surface))) { vga_draw_line(s, d, s->vram_ptr + addr, width); if (s->cursor_draw_line) @@ -1679,14 +1691,8 @@ static void vga_draw_graphic(VGACommonState *s, int full_update) dpy_gfx_update(s->con, 0, y_start, disp_width, y - y_start); } - /* reset modified pages */ - if (page_max >= page_min) { - memory_region_reset_dirty(&s->vram, - page_min, - page_max - page_min, - DIRTY_MEMORY_VGA); - } - memset(s->invalidated_y_table, 0, ((height + 31) >> 5) * 4); + g_free(snap); + memset(s->invalidated_y_table, 0, sizeof(s->invalidated_y_table)); } static void vga_draw_blank(VGACommonState *s, int full_update) diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c index 9b530ab5b0..e1056f34df 100644 --- a/hw/display/virtio-gpu.c +++ b/hw/display/virtio-gpu.c @@ -258,41 +258,22 @@ void virtio_gpu_get_display_info(VirtIOGPU *g, static pixman_format_code_t get_pixman_format(uint32_t virtio_gpu_format) { switch (virtio_gpu_format) { -#ifdef HOST_WORDS_BIGENDIAN case VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM: - return PIXMAN_b8g8r8x8; + return PIXMAN_BE_b8g8r8x8; case VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM: - return PIXMAN_b8g8r8a8; + return PIXMAN_BE_b8g8r8a8; case VIRTIO_GPU_FORMAT_X8R8G8B8_UNORM: - return PIXMAN_x8r8g8b8; + return PIXMAN_BE_x8r8g8b8; case VIRTIO_GPU_FORMAT_A8R8G8B8_UNORM: - return PIXMAN_a8r8g8b8; + return PIXMAN_BE_a8r8g8b8; case VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM: - return PIXMAN_r8g8b8x8; + return PIXMAN_BE_r8g8b8x8; case VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM: - return PIXMAN_r8g8b8a8; + return PIXMAN_BE_r8g8b8a8; case VIRTIO_GPU_FORMAT_X8B8G8R8_UNORM: - return PIXMAN_x8b8g8r8; + return PIXMAN_BE_x8b8g8r8; case VIRTIO_GPU_FORMAT_A8B8G8R8_UNORM: - return PIXMAN_a8b8g8r8; -#else - case VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM: - return PIXMAN_x8r8g8b8; - case VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM: - return PIXMAN_a8r8g8b8; - case VIRTIO_GPU_FORMAT_X8R8G8B8_UNORM: - return PIXMAN_b8g8r8x8; - case VIRTIO_GPU_FORMAT_A8R8G8B8_UNORM: - return PIXMAN_b8g8r8a8; - case VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM: - return PIXMAN_x8b8g8r8; - case VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM: - return PIXMAN_a8b8g8r8; - case VIRTIO_GPU_FORMAT_X8B8G8R8_UNORM: - return PIXMAN_r8g8b8x8; - case VIRTIO_GPU_FORMAT_A8B8G8R8_UNORM: - return PIXMAN_r8g8b8a8; -#endif + return PIXMAN_BE_a8b8g8r8; default: return 0; } @@ -1170,8 +1151,8 @@ static void virtio_gpu_device_realize(DeviceState *qdev, Error **errp) virtio_init(VIRTIO_DEVICE(g), "virtio-gpu", VIRTIO_ID_GPU, g->config_size); - g->req_state[0].width = 1024; - g->req_state[0].height = 768; + g->req_state[0].width = g->conf.xres; + g->req_state[0].height = g->conf.yres; if (virtio_gpu_virgl_enabled(g->conf)) { /* use larger control queue in 3d mode */ @@ -1291,6 +1272,8 @@ static Property virtio_gpu_properties[] = { DEFINE_PROP_BIT("stats", VirtIOGPU, conf.flags, VIRTIO_GPU_FLAG_STATS_ENABLED, false), #endif + DEFINE_PROP_UINT32("xres", VirtIOGPU, conf.xres, 1024), + DEFINE_PROP_UINT32("yres", VirtIOGPU, conf.yres, 768), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/display/vmware_vga.c b/hw/display/vmware_vga.c index 6599cf078d..ec5f27d67e 100644 --- a/hw/display/vmware_vga.c +++ b/hw/display/vmware_vga.c @@ -1118,9 +1118,9 @@ static void vmsvga_update_display(void *opaque) { struct vmsvga_state_s *s = opaque; DisplaySurface *surface; - bool dirty = false; - if (!s->enable) { + if (!s->enable || !s->config) { + /* in standard vga mode */ s->vga.hw_ops->gfx_update(&s->vga); return; } @@ -1131,26 +1131,11 @@ static void vmsvga_update_display(void *opaque) vmsvga_fifo_run(s); vmsvga_update_rect_flush(s); - /* - * Is it more efficient to look at vram VGA-dirty bits or wait - * for the driver to issue SVGA_CMD_UPDATE? - */ - if (memory_region_is_logging(&s->vga.vram, DIRTY_MEMORY_VGA)) { - vga_sync_dirty_bitmap(&s->vga); - dirty = memory_region_get_dirty(&s->vga.vram, 0, - surface_stride(surface) * surface_height(surface), - DIRTY_MEMORY_VGA); - } - if (s->invalidated || dirty) { + if (s->invalidated) { s->invalidated = 0; dpy_gfx_update(s->vga.con, 0, 0, surface_width(surface), surface_height(surface)); } - if (dirty) { - memory_region_reset_dirty(&s->vga.vram, 0, - surface_stride(surface) * surface_height(surface), - DIRTY_MEMORY_VGA); - } } static void vmsvga_reset(DeviceState *dev) diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index e0732ccaf1..f86a40aa30 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -572,8 +572,7 @@ static uint64_t amdvi_mmio_read(void *opaque, hwaddr addr, unsigned size) uint64_t val = -1; if (addr + size > AMDVI_MMIO_SIZE) { - trace_amdvi_mmio_read("error: addr outside region: max ", - (uint64_t)AMDVI_MMIO_SIZE, addr, size); + trace_amdvi_mmio_read_invalid(AMDVI_MMIO_SIZE, addr, size); return (uint64_t)-1; } diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 22d8226e43..02f047c8e3 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -595,6 +595,22 @@ static inline uint32_t vtd_get_agaw_from_context_entry(VTDContextEntry *ce) return 30 + (ce->hi & VTD_CONTEXT_ENTRY_AW) * 9; } +static inline uint64_t vtd_iova_limit(VTDContextEntry *ce) +{ + uint32_t ce_agaw = vtd_get_agaw_from_context_entry(ce); + return 1ULL << MIN(ce_agaw, VTD_MGAW); +} + +/* Return true if IOVA passes range check, otherwise false. */ +static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce) +{ + /* + * Check if @iova is above 2^X-1, where X is the minimum of MGAW + * in CAP_REG and AW in context-entry. + */ + return !(iova & ~(vtd_iova_limit(ce) - 1)); +} + static const uint64_t vtd_paging_entry_rsvd_field[] = { [0] = ~0ULL, /* For not large page */ @@ -630,13 +646,9 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write, uint32_t level = vtd_get_level_from_context_entry(ce); uint32_t offset; uint64_t slpte; - uint32_t ce_agaw = vtd_get_agaw_from_context_entry(ce); uint64_t access_right_check; - /* Check if @iova is above 2^X-1, where X is the minimum of MGAW - * in CAP_REG and AW in context-entry. - */ - if (iova & ~((1ULL << MIN(ce_agaw, VTD_MGAW)) - 1)) { + if (!vtd_iova_range_check(iova, ce)) { VTD_DPRINTF(GENERAL, "error: iova 0x%"PRIx64 " exceeds limits", iova); return -VTD_FR_ADDR_BEYOND_MGAW; } @@ -684,6 +696,135 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write, } } +typedef int (*vtd_page_walk_hook)(IOMMUTLBEntry *entry, void *private); + +/** + * vtd_page_walk_level - walk over specific level for IOVA range + * + * @addr: base GPA addr to start the walk + * @start: IOVA range start address + * @end: IOVA range end address (start <= addr < end) + * @hook_fn: hook func to be called when detected page + * @private: private data to be passed into hook func + * @read: whether parent level has read permission + * @write: whether parent level has write permission + * @notify_unmap: whether we should notify invalid entries + */ +static int vtd_page_walk_level(dma_addr_t addr, uint64_t start, + uint64_t end, vtd_page_walk_hook hook_fn, + void *private, uint32_t level, + bool read, bool write, bool notify_unmap) +{ + bool read_cur, write_cur, entry_valid; + uint32_t offset; + uint64_t slpte; + uint64_t subpage_size, subpage_mask; + IOMMUTLBEntry entry; + uint64_t iova = start; + uint64_t iova_next; + int ret = 0; + + trace_vtd_page_walk_level(addr, level, start, end); + + subpage_size = 1ULL << vtd_slpt_level_shift(level); + subpage_mask = vtd_slpt_level_page_mask(level); + + while (iova < end) { + iova_next = (iova & subpage_mask) + subpage_size; + + offset = vtd_iova_level_offset(iova, level); + slpte = vtd_get_slpte(addr, offset); + + if (slpte == (uint64_t)-1) { + trace_vtd_page_walk_skip_read(iova, iova_next); + goto next; + } + + if (vtd_slpte_nonzero_rsvd(slpte, level)) { + trace_vtd_page_walk_skip_reserve(iova, iova_next); + goto next; + } + + /* Permissions are stacked with parents' */ + read_cur = read && (slpte & VTD_SL_R); + write_cur = write && (slpte & VTD_SL_W); + + /* + * As long as we have either read/write permission, this is a + * valid entry. The rule works for both page entries and page + * table entries. + */ + entry_valid = read_cur | write_cur; + + if (vtd_is_last_slpte(slpte, level)) { + entry.target_as = &address_space_memory; + entry.iova = iova & subpage_mask; + /* NOTE: this is only meaningful if entry_valid == true */ + entry.translated_addr = vtd_get_slpte_addr(slpte); + entry.addr_mask = ~subpage_mask; + entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur); + if (!entry_valid && !notify_unmap) { + trace_vtd_page_walk_skip_perm(iova, iova_next); + goto next; + } + trace_vtd_page_walk_one(level, entry.iova, entry.translated_addr, + entry.addr_mask, entry.perm); + if (hook_fn) { + ret = hook_fn(&entry, private); + if (ret < 0) { + return ret; + } + } + } else { + if (!entry_valid) { + trace_vtd_page_walk_skip_perm(iova, iova_next); + goto next; + } + ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte), iova, + MIN(iova_next, end), hook_fn, private, + level - 1, read_cur, write_cur, + notify_unmap); + if (ret < 0) { + return ret; + } + } + +next: + iova = iova_next; + } + + return 0; +} + +/** + * vtd_page_walk - walk specific IOVA range, and call the hook + * + * @ce: context entry to walk upon + * @start: IOVA address to start the walk + * @end: IOVA range end address (start <= addr < end) + * @hook_fn: the hook that to be called for each detected area + * @private: private data for the hook function + */ +static int vtd_page_walk(VTDContextEntry *ce, uint64_t start, uint64_t end, + vtd_page_walk_hook hook_fn, void *private, + bool notify_unmap) +{ + dma_addr_t addr = vtd_get_slpt_base_from_context(ce); + uint32_t level = vtd_get_level_from_context_entry(ce); + + if (!vtd_iova_range_check(start, ce)) { + return -VTD_FR_ADDR_BEYOND_MGAW; + } + + if (!vtd_iova_range_check(end, ce)) { + /* Fix end so that it reaches the maximum */ + end = vtd_iova_limit(ce); + } + + return vtd_page_walk_level(addr, start, end, hook_fn, private, + level, true, true, notify_unmap); +} + /* Map a device to its corresponding domain (context-entry) */ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, uint8_t devfn, VTDContextEntry *ce) @@ -898,6 +1039,15 @@ static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s) s->intr_root, s->intr_size); } +static void vtd_iommu_replay_all(IntelIOMMUState *s) +{ + IntelIOMMUNotifierNode *node; + + QLIST_FOREACH(node, &s->notifiers_list, next) { + memory_region_iommu_replay_all(&node->vtd_as->iommu); + } +} + static void vtd_context_global_invalidate(IntelIOMMUState *s) { trace_vtd_inv_desc_cc_global(); @@ -905,6 +1055,14 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s) if (s->context_cache_gen == VTD_CONTEXT_CACHE_GEN_MAX) { vtd_reset_context_cache(s); } + /* + * From VT-d spec 6.5.2.1, a global context entry invalidation + * should be followed by a IOTLB global invalidation, so we should + * be safe even without this. Hoewever, let's replay the region as + * well to be safer, and go back here when we need finer tunes for + * VT-d emulation codes. + */ + vtd_iommu_replay_all(s); } @@ -971,6 +1129,16 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s, trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it), VTD_PCI_FUNC(devfn_it)); vtd_as->context_cache_entry.context_cache_gen = 0; + /* + * So a device is moving out of (or moving into) a + * domain, a replay() suites here to notify all the + * IOMMU_NOTIFIER_MAP registers about this change. + * This won't bring bad even if we have no such + * notifier registered - the IOMMU notification + * framework will skip MAP notifications if that + * happened. + */ + memory_region_iommu_replay_all(&vtd_as->iommu); } } } @@ -1012,12 +1180,53 @@ static void vtd_iotlb_global_invalidate(IntelIOMMUState *s) { trace_vtd_iotlb_reset("global invalidation recved"); vtd_reset_iotlb(s); + vtd_iommu_replay_all(s); } static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id) { + IntelIOMMUNotifierNode *node; + VTDContextEntry ce; + VTDAddressSpace *vtd_as; + g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain, &domain_id); + + QLIST_FOREACH(node, &s->notifiers_list, next) { + vtd_as = node->vtd_as; + if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), + vtd_as->devfn, &ce) && + domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) { + memory_region_iommu_replay_all(&vtd_as->iommu); + } + } +} + +static int vtd_page_invalidate_notify_hook(IOMMUTLBEntry *entry, + void *private) +{ + memory_region_notify_iommu((MemoryRegion *)private, *entry); + return 0; +} + +static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s, + uint16_t domain_id, hwaddr addr, + uint8_t am) +{ + IntelIOMMUNotifierNode *node; + VTDContextEntry ce; + int ret; + + QLIST_FOREACH(node, &(s->notifiers_list), next) { + VTDAddressSpace *vtd_as = node->vtd_as; + ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), + vtd_as->devfn, &ce); + if (!ret && domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) { + vtd_page_walk(&ce, addr, addr + (1 << am) * VTD_PAGE_SIZE, + vtd_page_invalidate_notify_hook, + (void *)&vtd_as->iommu, true); + } + } } static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id, @@ -1030,6 +1239,7 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id, info.addr = addr; info.mask = ~((1 << am) - 1); g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info); + vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am); } /* Flush IOTLB @@ -1151,9 +1361,49 @@ static void vtd_handle_gcmd_sirtp(IntelIOMMUState *s) vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRTPS); } +static void vtd_switch_address_space(VTDAddressSpace *as) +{ + assert(as); + + trace_vtd_switch_address_space(pci_bus_num(as->bus), + VTD_PCI_SLOT(as->devfn), + VTD_PCI_FUNC(as->devfn), + as->iommu_state->dmar_enabled); + + /* Turn off first then on the other */ + if (as->iommu_state->dmar_enabled) { + memory_region_set_enabled(&as->sys_alias, false); + memory_region_set_enabled(&as->iommu, true); + } else { + memory_region_set_enabled(&as->iommu, false); + memory_region_set_enabled(&as->sys_alias, true); + } +} + +static void vtd_switch_address_space_all(IntelIOMMUState *s) +{ + GHashTableIter iter; + VTDBus *vtd_bus; + int i; + + g_hash_table_iter_init(&iter, s->vtd_as_by_busptr); + while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) { + for (i = 0; i < X86_IOMMU_PCI_DEVFN_MAX; i++) { + if (!vtd_bus->dev_as[i]) { + continue; + } + vtd_switch_address_space(vtd_bus->dev_as[i]); + } + } +} + /* Handle Translation Enable/Disable */ static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en) { + if (s->dmar_enabled == en) { + return; + } + VTD_DPRINTF(CSR, "Translation Enable %s", (en ? "on" : "off")); if (en) { @@ -1168,6 +1418,8 @@ static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en) /* Ok - report back to driver */ vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_TES, 0); } + + vtd_switch_address_space_all(s); } /* Handle Interrupt Remap Enable/Disable */ @@ -1457,7 +1709,7 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s, entry.iova = addr; entry.perm = IOMMU_NONE; entry.translated_addr = 0; - memory_region_notify_iommu(entry.target_as->root, entry); + memory_region_notify_iommu(&vtd_dev_as->iommu, entry); done: return true; @@ -2005,15 +2257,33 @@ static void vtd_iommu_notify_flag_changed(MemoryRegion *iommu, IOMMUNotifierFlag new) { VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu); + IntelIOMMUState *s = vtd_as->iommu_state; + IntelIOMMUNotifierNode *node = NULL; + IntelIOMMUNotifierNode *next_node = NULL; - if (new & IOMMU_NOTIFIER_MAP) { - error_report("Device at bus %s addr %02x.%d requires iommu " - "notifier which is currently not supported by " - "intel-iommu emulation", - vtd_as->bus->qbus.name, PCI_SLOT(vtd_as->devfn), - PCI_FUNC(vtd_as->devfn)); + if (!s->caching_mode && new & IOMMU_NOTIFIER_MAP) { + error_report("We need to set cache_mode=1 for intel-iommu to enable " + "device assignment with IOMMU protection."); exit(1); } + + if (old == IOMMU_NOTIFIER_NONE) { + node = g_malloc0(sizeof(*node)); + node->vtd_as = vtd_as; + QLIST_INSERT_HEAD(&s->notifiers_list, node, next); + return; + } + + /* update notifier node with new flags */ + QLIST_FOREACH_SAFE(node, &s->notifiers_list, next, next_node) { + if (node->vtd_as == vtd_as) { + if (new == IOMMU_NOTIFIER_NONE) { + QLIST_REMOVE(node, next); + g_free(node); + } + return; + } + } } static const VMStateDescription vtd_vmstate = { @@ -2389,19 +2659,150 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn) vtd_dev_as->devfn = (uint8_t)devfn; vtd_dev_as->iommu_state = s; vtd_dev_as->context_cache_entry.context_cache_gen = 0; + + /* + * Memory region relationships looks like (Address range shows + * only lower 32 bits to make it short in length...): + * + * |-----------------+-------------------+----------| + * | Name | Address range | Priority | + * |-----------------+-------------------+----------+ + * | vtd_root | 00000000-ffffffff | 0 | + * | intel_iommu | 00000000-ffffffff | 1 | + * | vtd_sys_alias | 00000000-ffffffff | 1 | + * | intel_iommu_ir | fee00000-feefffff | 64 | + * |-----------------+-------------------+----------| + * + * We enable/disable DMAR by switching enablement for + * vtd_sys_alias and intel_iommu regions. IR region is always + * enabled. + */ memory_region_init_iommu(&vtd_dev_as->iommu, OBJECT(s), - &s->iommu_ops, "intel_iommu", UINT64_MAX); + &s->iommu_ops, "intel_iommu_dmar", + UINT64_MAX); + memory_region_init_alias(&vtd_dev_as->sys_alias, OBJECT(s), + "vtd_sys_alias", get_system_memory(), + 0, memory_region_size(get_system_memory())); memory_region_init_io(&vtd_dev_as->iommu_ir, OBJECT(s), &vtd_mem_ir_ops, s, "intel_iommu_ir", VTD_INTERRUPT_ADDR_SIZE); - memory_region_add_subregion(&vtd_dev_as->iommu, VTD_INTERRUPT_ADDR_FIRST, - &vtd_dev_as->iommu_ir); - address_space_init(&vtd_dev_as->as, - &vtd_dev_as->iommu, name); + memory_region_init(&vtd_dev_as->root, OBJECT(s), + "vtd_root", UINT64_MAX); + memory_region_add_subregion_overlap(&vtd_dev_as->root, + VTD_INTERRUPT_ADDR_FIRST, + &vtd_dev_as->iommu_ir, 64); + address_space_init(&vtd_dev_as->as, &vtd_dev_as->root, name); + memory_region_add_subregion_overlap(&vtd_dev_as->root, 0, + &vtd_dev_as->sys_alias, 1); + memory_region_add_subregion_overlap(&vtd_dev_as->root, 0, + &vtd_dev_as->iommu, 1); + vtd_switch_address_space(vtd_dev_as); } return vtd_dev_as; } +/* Unmap the whole range in the notifier's scope. */ +static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) +{ + IOMMUTLBEntry entry; + hwaddr size; + hwaddr start = n->start; + hwaddr end = n->end; + + /* + * Note: all the codes in this function has a assumption that IOVA + * bits are no more than VTD_MGAW bits (which is restricted by + * VT-d spec), otherwise we need to consider overflow of 64 bits. + */ + + if (end > VTD_ADDRESS_SIZE) { + /* + * Don't need to unmap regions that is bigger than the whole + * VT-d supported address space size + */ + end = VTD_ADDRESS_SIZE; + } + + assert(start <= end); + size = end - start; + + if (ctpop64(size) != 1) { + /* + * This size cannot format a correct mask. Let's enlarge it to + * suite the minimum available mask. + */ + int n = 64 - clz64(size); + if (n > VTD_MGAW) { + /* should not happen, but in case it happens, limit it */ + n = VTD_MGAW; + } + size = 1ULL << n; + } + + entry.target_as = &address_space_memory; + /* Adjust iova for the size */ + entry.iova = n->start & ~(size - 1); + /* This field is meaningless for unmap */ + entry.translated_addr = 0; + entry.perm = IOMMU_NONE; + entry.addr_mask = size - 1; + + trace_vtd_as_unmap_whole(pci_bus_num(as->bus), + VTD_PCI_SLOT(as->devfn), + VTD_PCI_FUNC(as->devfn), + entry.iova, size); + + memory_region_notify_one(n, &entry); +} + +static void vtd_address_space_unmap_all(IntelIOMMUState *s) +{ + IntelIOMMUNotifierNode *node; + VTDAddressSpace *vtd_as; + IOMMUNotifier *n; + + QLIST_FOREACH(node, &s->notifiers_list, next) { + vtd_as = node->vtd_as; + IOMMU_NOTIFIER_FOREACH(n, &vtd_as->iommu) { + vtd_address_space_unmap(vtd_as, n); + } + } +} + +static int vtd_replay_hook(IOMMUTLBEntry *entry, void *private) +{ + memory_region_notify_one((IOMMUNotifier *)private, entry); + return 0; +} + +static void vtd_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n) +{ + VTDAddressSpace *vtd_as = container_of(mr, VTDAddressSpace, iommu); + IntelIOMMUState *s = vtd_as->iommu_state; + uint8_t bus_n = pci_bus_num(vtd_as->bus); + VTDContextEntry ce; + + /* + * The replay can be triggered by either a invalidation or a newly + * created entry. No matter what, we release existing mappings + * (it means flushing caches for UNMAP-only registers). + */ + vtd_address_space_unmap(vtd_as, n); + + if (vtd_dev_to_context_entry(s, bus_n, vtd_as->devfn, &ce) == 0) { + trace_vtd_replay_ce_valid(bus_n, PCI_SLOT(vtd_as->devfn), + PCI_FUNC(vtd_as->devfn), + VTD_CONTEXT_ENTRY_DID(ce.hi), + ce.hi, ce.lo); + vtd_page_walk(&ce, 0, ~0ULL, vtd_replay_hook, (void *)n, false); + } else { + trace_vtd_replay_ce_invalid(bus_n, PCI_SLOT(vtd_as->devfn), + PCI_FUNC(vtd_as->devfn)); + } + + return; +} + /* Do the initialization. It will also be called when reset, so pay * attention when adding new initialization stuff. */ @@ -2416,6 +2817,7 @@ static void vtd_init(IntelIOMMUState *s) s->iommu_ops.translate = vtd_iommu_translate; s->iommu_ops.notify_flag_changed = vtd_iommu_notify_flag_changed; + s->iommu_ops.replay = vtd_iommu_replay; s->root = 0; s->root_extended = false; s->dmar_enabled = false; @@ -2511,6 +2913,11 @@ static void vtd_reset(DeviceState *dev) VTD_DPRINTF(GENERAL, ""); vtd_init(s); + + /* + * When device reset, throw away all mappings and external caches + */ + vtd_address_space_unmap_all(s); } static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) @@ -2574,6 +2981,7 @@ static void vtd_realize(DeviceState *dev, Error **errp) return; } + QLIST_INIT(&s->notifiers_list); memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num)); memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s, "intel_iommu", DMAR_REG_SIZE); diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 41041219ba..29d67075f4 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -197,6 +197,7 @@ #define VTD_DOMAIN_ID_MASK ((1UL << VTD_DOMAIN_ID_SHIFT) - 1) #define VTD_CAP_ND (((VTD_DOMAIN_ID_SHIFT - 4) / 2) & 7ULL) #define VTD_MGAW 39 /* Maximum Guest Address Width */ +#define VTD_ADDRESS_SIZE (1ULL << VTD_MGAW) #define VTD_CAP_MGAW (((VTD_MGAW - 1) & 0x3fULL) << 16) #define VTD_MAMV 18ULL #define VTD_CAP_MAMV (VTD_MAMV << 48) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index d24388e05f..f3b372a18f 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1104,9 +1104,7 @@ static void pc_new_cpu(const char *typename, int64_t apic_id, Error **errp) object_property_set_bool(cpu, true, "realized", &local_err); object_unref(cpu); - if (local_err) { - error_propagate(errp, local_err); - } + error_propagate(errp, local_err); } void pc_hot_add_cpu(const int64_t id, Error **errp) diff --git a/hw/i386/trace-events b/hw/i386/trace-events index 88ad5e4c43..04a6980800 100644 --- a/hw/i386/trace-events +++ b/hw/i386/trace-events @@ -4,7 +4,6 @@ x86_iommu_iec_notify(bool global, uint32_t index, uint32_t mask) "Notify IEC invalidation: global=%d index=%" PRIu32 " mask=%" PRIu32 # hw/i386/intel_iommu.c -vtd_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)" vtd_inv_desc(const char *type, uint64_t hi, uint64_t lo) "invalidate desc type %s high 0x%"PRIx64" low 0x%"PRIx64 vtd_inv_desc_invalid(uint64_t hi, uint64_t lo) "invalid inv desc hi 0x%"PRIx64" lo 0x%"PRIx64 vtd_inv_desc_cc_domain(uint16_t domain) "context invalidate domain 0x%"PRIx16 @@ -30,6 +29,15 @@ vtd_iotlb_cc_hit(uint8_t bus, uint8_t devfn, uint64_t high, uint64_t low, uint32 vtd_iotlb_cc_update(uint8_t bus, uint8_t devfn, uint64_t high, uint64_t low, uint32_t gen1, uint32_t gen2) "IOTLB context update bus 0x%"PRIx8" devfn 0x%"PRIx8" high 0x%"PRIx64" low 0x%"PRIx64" gen %"PRIu32" -> gen %"PRIu32 vtd_iotlb_reset(const char *reason) "IOTLB reset (reason: %s)" vtd_fault_disabled(void) "Fault processing disabled for context entry" +vtd_replay_ce_valid(uint8_t bus, uint8_t dev, uint8_t fn, uint16_t domain, uint64_t hi, uint64_t lo) "replay valid context device %02"PRIx8":%02"PRIx8".%02"PRIx8" domain 0x%"PRIx16" hi 0x%"PRIx64" lo 0x%"PRIx64 +vtd_replay_ce_invalid(uint8_t bus, uint8_t dev, uint8_t fn) "replay invalid context device %02"PRIx8":%02"PRIx8".%02"PRIx8 +vtd_page_walk_level(uint64_t addr, uint32_t level, uint64_t start, uint64_t end) "walk (base=0x%"PRIx64", level=%"PRIu32") iova range 0x%"PRIx64" - 0x%"PRIx64 +vtd_page_walk_one(uint32_t level, uint64_t iova, uint64_t gpa, uint64_t mask, int perm) "detected page level 0x%"PRIx32" iova 0x%"PRIx64" -> gpa 0x%"PRIx64" mask 0x%"PRIx64" perm %d" +vtd_page_walk_skip_read(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to unable to read" +vtd_page_walk_skip_perm(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to perm empty" +vtd_page_walk_skip_reserve(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to rsrv set" +vtd_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)" +vtd_as_unmap_whole(uint8_t bus, uint8_t slot, uint8_t fn, uint64_t iova, uint64_t size) "Device %02x:%02x.%x start 0x%"PRIx64" size 0x%"PRIx64 # hw/i386/amd_iommu.c amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" + offset 0x%"PRIx32 @@ -37,6 +45,7 @@ amdvi_cache_update(uint16_t domid, uint8_t bus, uint8_t slot, uint8_t func, uint amdvi_completion_wait_fail(uint64_t addr) "error: fail to write at address 0x%"PRIx64 amdvi_mmio_write(const char *reg, uint64_t addr, unsigned size, uint64_t val, uint64_t offset) "%s write addr 0x%"PRIx64", size %u, val 0x%"PRIx64", offset 0x%"PRIx64 amdvi_mmio_read(const char *reg, uint64_t addr, unsigned size, uint64_t offset) "%s read addr 0x%"PRIx64", size %u offset 0x%"PRIx64 +amdvi_mmio_read_invalid(int max, uint64_t addr, unsigned size) "error: addr outside region (max 0x%x): read addr 0x%" PRIx64 ", size %u" amdvi_command_error(uint64_t status) "error: Executing commands with command buffer disabled 0x%"PRIx64 amdvi_command_read_fail(uint64_t addr, uint32_t head) "error: fail to access memory at 0x%"PRIx64" + 0x%"PRIx32 amdvi_command_exec(uint32_t head, uint32_t tail, uint64_t buf) "command buffer head at 0x%"PRIx32" command buffer tail at 0x%"PRIx32" command buffer base at 0x%"PRIx64 diff --git a/hw/input/virtio-input.c b/hw/input/virtio-input.c index b678ee9f20..0e42f0d02c 100644 --- a/hw/input/virtio-input.c +++ b/hw/input/virtio-input.c @@ -22,7 +22,6 @@ void virtio_input_send(VirtIOInput *vinput, virtio_input_event *event) { VirtQueueElement *elem; - unsigned have, need; int i, len; if (!vinput->active) { @@ -32,10 +31,10 @@ void virtio_input_send(VirtIOInput *vinput, virtio_input_event *event) /* queue up events ... */ if (vinput->qindex == vinput->qsize) { vinput->qsize++; - vinput->queue = realloc(vinput->queue, vinput->qsize * - sizeof(virtio_input_event)); + vinput->queue = g_realloc(vinput->queue, vinput->qsize * + sizeof(vinput->queue[0])); } - vinput->queue[vinput->qindex++] = *event; + vinput->queue[vinput->qindex++].event = *event; /* ... until we see a report sync ... */ if (event->type != cpu_to_le16(EV_SYN) || @@ -44,24 +43,24 @@ void virtio_input_send(VirtIOInput *vinput, virtio_input_event *event) } /* ... then check available space ... */ - need = sizeof(virtio_input_event) * vinput->qindex; - virtqueue_get_avail_bytes(vinput->evt, &have, NULL, need, 0); - if (have < need) { - vinput->qindex = 0; - trace_virtio_input_queue_full(); - return; - } - - /* ... and finally pass them to the guest */ for (i = 0; i < vinput->qindex; i++) { elem = virtqueue_pop(vinput->evt, sizeof(VirtQueueElement)); if (!elem) { - /* should not happen, we've checked for space beforehand */ - fprintf(stderr, "%s: Huh? No vq elem available ...\n", __func__); + while (--i >= 0) { + virtqueue_unpop(vinput->evt, vinput->queue[i].elem, 0); + } + vinput->qindex = 0; + trace_virtio_input_queue_full(); return; } + vinput->queue[i].elem = elem; + } + + /* ... and finally pass them to the guest */ + for (i = 0; i < vinput->qindex; i++) { + elem = vinput->queue[i].elem; len = iov_from_buf(elem->in_sg, elem->in_num, - 0, vinput->queue+i, sizeof(virtio_input_event)); + 0, &vinput->queue[i].event, sizeof(virtio_input_event)); virtqueue_push(vinput->evt, elem, len); g_free(elem); } @@ -272,6 +271,8 @@ static void virtio_input_finalize(Object *obj) QTAILQ_REMOVE(&vinput->cfg_list, cfg, node); g_free(cfg); } + + g_free(vinput->queue); } static void virtio_input_device_unrealize(DeviceState *dev, Error **errp) { diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c index 7a6e771ed1..c3829e31b5 100644 --- a/hw/intc/apic_common.c +++ b/hw/intc/apic_common.c @@ -387,25 +387,6 @@ static bool apic_common_sipi_needed(void *opaque) return s->wait_for_sipi != 0; } -static bool apic_irq_delivered_needed(void *opaque) -{ - APICCommonState *s = APIC_COMMON(opaque); - return s->cpu == X86_CPU(first_cpu) && apic_irq_delivered != 0; -} - -static void apic_irq_delivered_pre_save(void *opaque) -{ - APICCommonState *s = APIC_COMMON(opaque); - s->apic_irq_delivered = apic_irq_delivered; -} - -static int apic_irq_delivered_post_load(void *opaque, int version_id) -{ - APICCommonState *s = APIC_COMMON(opaque); - apic_irq_delivered = s->apic_irq_delivered; - return 0; -} - static const VMStateDescription vmstate_apic_common_sipi = { .name = "apic_sipi", .version_id = 1, @@ -418,19 +399,6 @@ static const VMStateDescription vmstate_apic_common_sipi = { } }; -static const VMStateDescription vmstate_apic_irq_delivered = { - .name = "apic_irq_delivered", - .version_id = 1, - .minimum_version_id = 1, - .needed = apic_irq_delivered_needed, - .pre_save = apic_irq_delivered_pre_save, - .post_load = apic_irq_delivered_post_load, - .fields = (VMStateField[]) { - VMSTATE_INT32(apic_irq_delivered, APICCommonState), - VMSTATE_END_OF_LIST() - } -}; - static const VMStateDescription vmstate_apic_common = { .name = "apic", .version_id = 3, @@ -465,7 +433,6 @@ static const VMStateDescription vmstate_apic_common = { }, .subsections = (const VMStateDescription*[]) { &vmstate_apic_common_sipi, - &vmstate_apic_irq_delivered, NULL } }; diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c index 81f0403117..19aab56072 100644 --- a/hw/intc/arm_gicv3_kvm.c +++ b/hw/intc/arm_gicv3_kvm.c @@ -614,12 +614,6 @@ static void arm_gicv3_icc_reset(CPUARMState *env, const ARMCPRegInfo *ri) s = c->gic; cpu = ARM_CPU(c->cpu); - /* Initialize to actual HW supported configuration */ - kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, - KVM_VGIC_ATTR(ICC_CTLR_EL1, cpu->mp_affinity), - &c->icc_ctlr_el1[GICV3_NS], false); - - c->icc_ctlr_el1[GICV3_S] = c->icc_ctlr_el1[GICV3_NS]; c->icc_pmr_el1 = 0; c->icc_bpr[GICV3_G0] = GIC_MIN_BPR; c->icc_bpr[GICV3_G1] = GIC_MIN_BPR; @@ -628,6 +622,17 @@ static void arm_gicv3_icc_reset(CPUARMState *env, const ARMCPRegInfo *ri) c->icc_sre_el1 = 0x7; memset(c->icc_apr, 0, sizeof(c->icc_apr)); memset(c->icc_igrpen, 0, sizeof(c->icc_igrpen)); + + if (s->migration_blocker) { + return; + } + + /* Initialize to actual HW supported configuration */ + kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + KVM_VGIC_ATTR(ICC_CTLR_EL1, cpu->mp_affinity), + &c->icc_ctlr_el1[GICV3_NS], false); + + c->icc_ctlr_el1[GICV3_S] = c->icc_ctlr_el1[GICV3_NS]; } static void kvm_arm_gicv3_reset(DeviceState *dev) diff --git a/hw/intc/s390_flic.c b/hw/intc/s390_flic.c index bef4caf980..711c11454f 100644 --- a/hw/intc/s390_flic.c +++ b/hw/intc/s390_flic.c @@ -21,11 +21,14 @@ S390FLICState *s390_get_flic(void) { - S390FLICState *fs; + static S390FLICState *fs; - fs = S390_FLIC_COMMON(object_resolve_path(TYPE_KVM_S390_FLIC, NULL)); if (!fs) { - fs = S390_FLIC_COMMON(object_resolve_path(TYPE_QEMU_S390_FLIC, NULL)); + fs = S390_FLIC_COMMON(object_resolve_path(TYPE_KVM_S390_FLIC, NULL)); + if (!fs) { + fs = S390_FLIC_COMMON(object_resolve_path(TYPE_QEMU_S390_FLIC, + NULL)); + } } return fs; } diff --git a/hw/ipmi/isa_ipmi_bt.c b/hw/ipmi/isa_ipmi_bt.c index 1c69cb33f8..2fcc3d2e7c 100644 --- a/hw/ipmi/isa_ipmi_bt.c +++ b/hw/ipmi/isa_ipmi_bt.c @@ -37,40 +37,30 @@ #define IPMI_BT_HBUSY_BIT 6 #define IPMI_BT_BBUSY_BIT 7 -#define IPMI_BT_CLR_WR_MASK (1 << IPMI_BT_CLR_WR_BIT) #define IPMI_BT_GET_CLR_WR(d) (((d) >> IPMI_BT_CLR_WR_BIT) & 0x1) -#define IPMI_BT_SET_CLR_WR(d, v) (d) = (((d) & ~IPMI_BT_CLR_WR_MASK) | \ - (((v & 1) << IPMI_BT_CLR_WR_BIT))) -#define IPMI_BT_CLR_RD_MASK (1 << IPMI_BT_CLR_RD_BIT) #define IPMI_BT_GET_CLR_RD(d) (((d) >> IPMI_BT_CLR_RD_BIT) & 0x1) -#define IPMI_BT_SET_CLR_RD(d, v) (d) = (((d) & ~IPMI_BT_CLR_RD_MASK) | \ - (((v & 1) << IPMI_BT_CLR_RD_BIT))) -#define IPMI_BT_H2B_ATN_MASK (1 << IPMI_BT_H2B_ATN_BIT) #define IPMI_BT_GET_H2B_ATN(d) (((d) >> IPMI_BT_H2B_ATN_BIT) & 0x1) -#define IPMI_BT_SET_H2B_ATN(d, v) (d) = (((d) & ~IPMI_BT_H2B_ATN_MASK) | \ - (((v & 1) << IPMI_BT_H2B_ATN_BIT))) #define IPMI_BT_B2H_ATN_MASK (1 << IPMI_BT_B2H_ATN_BIT) #define IPMI_BT_GET_B2H_ATN(d) (((d) >> IPMI_BT_B2H_ATN_BIT) & 0x1) -#define IPMI_BT_SET_B2H_ATN(d, v) (d) = (((d) & ~IPMI_BT_B2H_ATN_MASK) | \ - (((v & 1) << IPMI_BT_B2H_ATN_BIT))) +#define IPMI_BT_SET_B2H_ATN(d, v) ((d) = (((d) & ~IPMI_BT_B2H_ATN_MASK) | \ + (((v) & 1) << IPMI_BT_B2H_ATN_BIT))) #define IPMI_BT_SMS_ATN_MASK (1 << IPMI_BT_SMS_ATN_BIT) #define IPMI_BT_GET_SMS_ATN(d) (((d) >> IPMI_BT_SMS_ATN_BIT) & 0x1) -#define IPMI_BT_SET_SMS_ATN(d, v) (d) = (((d) & ~IPMI_BT_SMS_ATN_MASK) | \ - (((v & 1) << IPMI_BT_SMS_ATN_BIT))) +#define IPMI_BT_SET_SMS_ATN(d, v) ((d) = (((d) & ~IPMI_BT_SMS_ATN_MASK) | \ + (((v) & 1) << IPMI_BT_SMS_ATN_BIT))) #define IPMI_BT_HBUSY_MASK (1 << IPMI_BT_HBUSY_BIT) #define IPMI_BT_GET_HBUSY(d) (((d) >> IPMI_BT_HBUSY_BIT) & 0x1) -#define IPMI_BT_SET_HBUSY(d, v) (d) = (((d) & ~IPMI_BT_HBUSY_MASK) | \ - (((v & 1) << IPMI_BT_HBUSY_BIT))) +#define IPMI_BT_SET_HBUSY(d, v) ((d) = (((d) & ~IPMI_BT_HBUSY_MASK) | \ + (((v) & 1) << IPMI_BT_HBUSY_BIT))) #define IPMI_BT_BBUSY_MASK (1 << IPMI_BT_BBUSY_BIT) -#define IPMI_BT_GET_BBUSY(d) (((d) >> IPMI_BT_BBUSY_BIT) & 0x1) -#define IPMI_BT_SET_BBUSY(d, v) (d) = (((d) & ~IPMI_BT_BBUSY_MASK) | \ - (((v & 1) << IPMI_BT_BBUSY_BIT))) +#define IPMI_BT_SET_BBUSY(d, v) ((d) = (((d) & ~IPMI_BT_BBUSY_MASK) | \ + (((v) & 1) << IPMI_BT_BBUSY_BIT))) /* Mask register */ @@ -79,13 +69,13 @@ #define IPMI_BT_B2H_IRQ_EN_MASK (1 << IPMI_BT_B2H_IRQ_EN_BIT) #define IPMI_BT_GET_B2H_IRQ_EN(d) (((d) >> IPMI_BT_B2H_IRQ_EN_BIT) & 0x1) -#define IPMI_BT_SET_B2H_IRQ_EN(d, v) (d) = (((d) & ~IPMI_BT_B2H_IRQ_EN_MASK) | \ - (((v & 1) << IPMI_BT_B2H_IRQ_EN_BIT))) +#define IPMI_BT_SET_B2H_IRQ_EN(d, v) ((d) = (((d) & ~IPMI_BT_B2H_IRQ_EN_MASK) |\ + (((v) & 1) << IPMI_BT_B2H_IRQ_EN_BIT))) #define IPMI_BT_B2H_IRQ_MASK (1 << IPMI_BT_B2H_IRQ_BIT) #define IPMI_BT_GET_B2H_IRQ(d) (((d) >> IPMI_BT_B2H_IRQ_BIT) & 0x1) -#define IPMI_BT_SET_B2H_IRQ(d, v) (d) = (((d) & ~IPMI_BT_B2H_IRQ_MASK) | \ - (((v & 1) << IPMI_BT_B2H_IRQ_BIT))) +#define IPMI_BT_SET_B2H_IRQ(d, v) ((d) = (((d) & ~IPMI_BT_B2H_IRQ_MASK) | \ + (((v) & 1) << IPMI_BT_B2H_IRQ_BIT))) typedef struct IPMIBT { IPMIBmc *bmc; diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index 59930dd9d0..a0866c3856 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -312,11 +312,6 @@ void ich9_generate_smi(void) cpu_interrupt(first_cpu, CPU_INTERRUPT_SMI); } -void ich9_generate_nmi(void) -{ - cpu_interrupt(first_cpu, CPU_INTERRUPT_NMI); -} - static int ich9_lpc_sci_irq(ICH9LPCState *lpc) { switch (lpc->d.config[ICH9_LPC_ACPI_CTRL] & diff --git a/hw/misc/exynos4210_pmu.c b/hw/misc/exynos4210_pmu.c index e30dbc7d3d..63a8ccd355 100644 --- a/hw/misc/exynos4210_pmu.c +++ b/hw/misc/exynos4210_pmu.c @@ -401,8 +401,8 @@ static uint64_t exynos4210_pmu_read(void *opaque, hwaddr offset, unsigned size) { Exynos4210PmuState *s = (Exynos4210PmuState *)opaque; - unsigned i; const Exynos4210PmuReg *reg_p = exynos4210_pmu_regs; + unsigned int i; for (i = 0; i < PMU_NUM_OF_REGISTERS; i++) { if (reg_p->offset == offset) { @@ -420,8 +420,8 @@ static void exynos4210_pmu_write(void *opaque, hwaddr offset, uint64_t val, unsigned size) { Exynos4210PmuState *s = (Exynos4210PmuState *)opaque; - unsigned i; const Exynos4210PmuReg *reg_p = exynos4210_pmu_regs; + unsigned int i; for (i = 0; i < PMU_NUM_OF_REGISTERS; i++) { if (reg_p->offset == offset) { diff --git a/hw/net/Makefile.objs b/hw/net/Makefile.objs index 6a95d92d37..5ddaffe63a 100644 --- a/hw/net/Makefile.objs +++ b/hw/net/Makefile.objs @@ -26,6 +26,7 @@ common-obj-$(CONFIG_IMX_FEC) += imx_fec.o common-obj-$(CONFIG_CADENCE) += cadence_gem.o common-obj-$(CONFIG_STELLARIS_ENET) += stellaris_enet.o common-obj-$(CONFIG_LANCE) += lance.o +common-obj-$(CONFIG_FTGMAC100) += ftgmac100.o obj-$(CONFIG_ETRAXFS) += etraxfs_eth.o obj-$(CONFIG_COLDFIRE) += mcf_fec.o diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c index d4de8ad9f1..3943187572 100644 --- a/hw/net/cadence_gem.c +++ b/hw/net/cadence_gem.c @@ -300,6 +300,8 @@ #define DESC_1_RX_SOF 0x00004000 #define DESC_1_RX_EOF 0x00008000 +#define GEM_MODID_VALUE 0x00020118 + static inline unsigned tx_desc_get_buffer(unsigned *desc) { return desc[0]; @@ -481,14 +483,17 @@ static int gem_can_receive(NetClientState *nc) } for (i = 0; i < s->num_priority_queues; i++) { - if (rx_desc_get_ownership(s->rx_desc[i]) == 1) { - if (s->can_rx_state != 2) { - s->can_rx_state = 2; - DB_PRINT("can't receive - busy buffer descriptor (q%d) 0x%x\n", - i, s->rx_desc_addr[i]); - } - return 0; + if (rx_desc_get_ownership(s->rx_desc[i]) != 1) { + break; + } + }; + + if (i == s->num_priority_queues) { + if (s->can_rx_state != 2) { + s->can_rx_state = 2; + DB_PRINT("can't receive - all the buffer descriptors are busy\n"); } + return 0; } if (s->can_rx_state != 0) { @@ -506,7 +511,18 @@ static void gem_update_int_status(CadenceGEMState *s) { int i; - if ((s->num_priority_queues == 1) && s->regs[GEM_ISR]) { + if (!s->regs[GEM_ISR]) { + /* ISR isn't set, clear all the interrupts */ + for (i = 0; i < s->num_priority_queues; ++i) { + qemu_set_irq(s->irq[i], 0); + } + return; + } + + /* If we get here we know s->regs[GEM_ISR] is set, so we don't need to + * check it again. + */ + if (s->num_priority_queues == 1) { /* No priority queues, just trigger the interrupt */ DB_PRINT("asserting int.\n"); qemu_set_irq(s->irq[0], 1); @@ -790,8 +806,8 @@ static void gem_get_rx_desc(CadenceGEMState *s, int q) { DB_PRINT("read descriptor 0x%x\n", (unsigned)s->rx_desc_addr[q]); /* read current descriptor */ - cpu_physical_memory_read(s->rx_desc_addr[0], - (uint8_t *)s->rx_desc[0], sizeof(s->rx_desc[0])); + cpu_physical_memory_read(s->rx_desc_addr[q], + (uint8_t *)s->rx_desc[q], sizeof(s->rx_desc[q])); /* Descriptor owned by software ? */ if (rx_desc_get_ownership(s->rx_desc[q]) == 1) { @@ -1209,7 +1225,7 @@ static void gem_reset(DeviceState *d) s->regs[GEM_TXPAUSE] = 0x0000ffff; s->regs[GEM_TXPARTIALSF] = 0x000003ff; s->regs[GEM_RXPARTIALSF] = 0x000003ff; - s->regs[GEM_MODID] = 0x00020118; + s->regs[GEM_MODID] = s->revision; s->regs[GEM_DESCONF] = 0x02500111; s->regs[GEM_DESCONF2] = 0x2ab13fff; s->regs[GEM_DESCONF5] = 0x002f2145; @@ -1271,7 +1287,6 @@ static uint64_t gem_read(void *opaque, hwaddr offset, unsigned size) { CadenceGEMState *s; uint32_t retval; - int i; s = (CadenceGEMState *)opaque; offset >>= 2; @@ -1282,9 +1297,7 @@ static uint64_t gem_read(void *opaque, hwaddr offset, unsigned size) switch (offset) { case GEM_ISR: DB_PRINT("lowering irqs on ISR read\n"); - for (i = 0; i < s->num_priority_queues; ++i) { - qemu_set_irq(s->irq[i], 0); - } + /* The interrupts get updated at the end of the function. */ break; case GEM_PHYMNTNC: if (retval & GEM_PHYMNTNC_OP_R) { @@ -1508,6 +1521,8 @@ static const VMStateDescription vmstate_cadence_gem = { static Property gem_properties[] = { DEFINE_NIC_PROPERTIES(CadenceGEMState, conf), + DEFINE_PROP_UINT32("revision", CadenceGEMState, revision, + GEM_MODID_VALUE), DEFINE_PROP_UINT8("num-priority-queues", CadenceGEMState, num_priority_queues, 1), DEFINE_PROP_UINT8("num-type1-screeners", CadenceGEMState, diff --git a/hw/net/e1000.c b/hw/net/e1000.c index 93249497f4..f2e5072d27 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -40,7 +40,7 @@ static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; -#define E1000_DEBUG +/* #define E1000_DEBUG */ #ifdef E1000_DEBUG enum { diff --git a/hw/net/ftgmac100.c b/hw/net/ftgmac100.c new file mode 100644 index 0000000000..3c36ab9cec --- /dev/null +++ b/hw/net/ftgmac100.c @@ -0,0 +1,1016 @@ +/* + * Faraday FTGMAC100 Gigabit Ethernet + * + * Copyright (C) 2016-2017, IBM Corporation. + * + * Based on Coldfire Fast Ethernet Controller emulation. + * + * Copyright (c) 2007 CodeSourcery. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "hw/net/ftgmac100.h" +#include "sysemu/dma.h" +#include "qemu/log.h" +#include "net/checksum.h" +#include "net/eth.h" +#include "hw/net/mii.h" + +/* For crc32 */ +#include <zlib.h> + +/* + * FTGMAC100 registers + */ +#define FTGMAC100_ISR 0x00 +#define FTGMAC100_IER 0x04 +#define FTGMAC100_MAC_MADR 0x08 +#define FTGMAC100_MAC_LADR 0x0c +#define FTGMAC100_MATH0 0x10 +#define FTGMAC100_MATH1 0x14 +#define FTGMAC100_NPTXPD 0x18 +#define FTGMAC100_RXPD 0x1C +#define FTGMAC100_NPTXR_BADR 0x20 +#define FTGMAC100_RXR_BADR 0x24 +#define FTGMAC100_HPTXPD 0x28 +#define FTGMAC100_HPTXR_BADR 0x2c +#define FTGMAC100_ITC 0x30 +#define FTGMAC100_APTC 0x34 +#define FTGMAC100_DBLAC 0x38 +#define FTGMAC100_REVR 0x40 +#define FTGMAC100_FEAR1 0x44 +#define FTGMAC100_RBSR 0x4c +#define FTGMAC100_TPAFCR 0x48 + +#define FTGMAC100_MACCR 0x50 +#define FTGMAC100_MACSR 0x54 +#define FTGMAC100_PHYCR 0x60 +#define FTGMAC100_PHYDATA 0x64 +#define FTGMAC100_FCR 0x68 + +/* + * Interrupt status register & interrupt enable register + */ +#define FTGMAC100_INT_RPKT_BUF (1 << 0) +#define FTGMAC100_INT_RPKT_FIFO (1 << 1) +#define FTGMAC100_INT_NO_RXBUF (1 << 2) +#define FTGMAC100_INT_RPKT_LOST (1 << 3) +#define FTGMAC100_INT_XPKT_ETH (1 << 4) +#define FTGMAC100_INT_XPKT_FIFO (1 << 5) +#define FTGMAC100_INT_NO_NPTXBUF (1 << 6) +#define FTGMAC100_INT_XPKT_LOST (1 << 7) +#define FTGMAC100_INT_AHB_ERR (1 << 8) +#define FTGMAC100_INT_PHYSTS_CHG (1 << 9) +#define FTGMAC100_INT_NO_HPTXBUF (1 << 10) + +/* + * Automatic polling timer control register + */ +#define FTGMAC100_APTC_RXPOLL_CNT(x) ((x) & 0xf) +#define FTGMAC100_APTC_RXPOLL_TIME_SEL (1 << 4) +#define FTGMAC100_APTC_TXPOLL_CNT(x) (((x) >> 8) & 0xf) +#define FTGMAC100_APTC_TXPOLL_TIME_SEL (1 << 12) + +/* + * PHY control register + */ +#define FTGMAC100_PHYCR_MIIRD (1 << 26) +#define FTGMAC100_PHYCR_MIIWR (1 << 27) + +#define FTGMAC100_PHYCR_DEV(x) (((x) >> 16) & 0x1f) +#define FTGMAC100_PHYCR_REG(x) (((x) >> 21) & 0x1f) + +/* + * PHY data register + */ +#define FTGMAC100_PHYDATA_MIIWDATA(x) ((x) & 0xffff) +#define FTGMAC100_PHYDATA_MIIRDATA(x) (((x) >> 16) & 0xffff) + +/* + * Feature Register + */ +#define FTGMAC100_REVR_NEW_MDIO_INTERFACE (1 << 31) + +/* + * MAC control register + */ +#define FTGMAC100_MACCR_TXDMA_EN (1 << 0) +#define FTGMAC100_MACCR_RXDMA_EN (1 << 1) +#define FTGMAC100_MACCR_TXMAC_EN (1 << 2) +#define FTGMAC100_MACCR_RXMAC_EN (1 << 3) +#define FTGMAC100_MACCR_RM_VLAN (1 << 4) +#define FTGMAC100_MACCR_HPTXR_EN (1 << 5) +#define FTGMAC100_MACCR_LOOP_EN (1 << 6) +#define FTGMAC100_MACCR_ENRX_IN_HALFTX (1 << 7) +#define FTGMAC100_MACCR_FULLDUP (1 << 8) +#define FTGMAC100_MACCR_GIGA_MODE (1 << 9) +#define FTGMAC100_MACCR_CRC_APD (1 << 10) /* not needed */ +#define FTGMAC100_MACCR_RX_RUNT (1 << 12) +#define FTGMAC100_MACCR_JUMBO_LF (1 << 13) +#define FTGMAC100_MACCR_RX_ALL (1 << 14) +#define FTGMAC100_MACCR_HT_MULTI_EN (1 << 15) +#define FTGMAC100_MACCR_RX_MULTIPKT (1 << 16) +#define FTGMAC100_MACCR_RX_BROADPKT (1 << 17) +#define FTGMAC100_MACCR_DISCARD_CRCERR (1 << 18) +#define FTGMAC100_MACCR_FAST_MODE (1 << 19) +#define FTGMAC100_MACCR_SW_RST (1 << 31) + +/* + * Transmit descriptor + */ +#define FTGMAC100_TXDES0_TXBUF_SIZE(x) ((x) & 0x3fff) +#define FTGMAC100_TXDES0_EDOTR (1 << 15) +#define FTGMAC100_TXDES0_CRC_ERR (1 << 19) +#define FTGMAC100_TXDES0_LTS (1 << 28) +#define FTGMAC100_TXDES0_FTS (1 << 29) +#define FTGMAC100_TXDES0_EDOTR_ASPEED (1 << 30) +#define FTGMAC100_TXDES0_TXDMA_OWN (1 << 31) + +#define FTGMAC100_TXDES1_VLANTAG_CI(x) ((x) & 0xffff) +#define FTGMAC100_TXDES1_INS_VLANTAG (1 << 16) +#define FTGMAC100_TXDES1_TCP_CHKSUM (1 << 17) +#define FTGMAC100_TXDES1_UDP_CHKSUM (1 << 18) +#define FTGMAC100_TXDES1_IP_CHKSUM (1 << 19) +#define FTGMAC100_TXDES1_LLC (1 << 22) +#define FTGMAC100_TXDES1_TX2FIC (1 << 30) +#define FTGMAC100_TXDES1_TXIC (1 << 31) + +/* + * Receive descriptor + */ +#define FTGMAC100_RXDES0_VDBC 0x3fff +#define FTGMAC100_RXDES0_EDORR (1 << 15) +#define FTGMAC100_RXDES0_MULTICAST (1 << 16) +#define FTGMAC100_RXDES0_BROADCAST (1 << 17) +#define FTGMAC100_RXDES0_RX_ERR (1 << 18) +#define FTGMAC100_RXDES0_CRC_ERR (1 << 19) +#define FTGMAC100_RXDES0_FTL (1 << 20) +#define FTGMAC100_RXDES0_RUNT (1 << 21) +#define FTGMAC100_RXDES0_RX_ODD_NB (1 << 22) +#define FTGMAC100_RXDES0_FIFO_FULL (1 << 23) +#define FTGMAC100_RXDES0_PAUSE_OPCODE (1 << 24) +#define FTGMAC100_RXDES0_PAUSE_FRAME (1 << 25) +#define FTGMAC100_RXDES0_LRS (1 << 28) +#define FTGMAC100_RXDES0_FRS (1 << 29) +#define FTGMAC100_RXDES0_EDORR_ASPEED (1 << 30) +#define FTGMAC100_RXDES0_RXPKT_RDY (1 << 31) + +#define FTGMAC100_RXDES1_VLANTAG_CI 0xffff +#define FTGMAC100_RXDES1_PROT_MASK (0x3 << 20) +#define FTGMAC100_RXDES1_PROT_NONIP (0x0 << 20) +#define FTGMAC100_RXDES1_PROT_IP (0x1 << 20) +#define FTGMAC100_RXDES1_PROT_TCPIP (0x2 << 20) +#define FTGMAC100_RXDES1_PROT_UDPIP (0x3 << 20) +#define FTGMAC100_RXDES1_LLC (1 << 22) +#define FTGMAC100_RXDES1_DF (1 << 23) +#define FTGMAC100_RXDES1_VLANTAG_AVAIL (1 << 24) +#define FTGMAC100_RXDES1_TCP_CHKSUM_ERR (1 << 25) +#define FTGMAC100_RXDES1_UDP_CHKSUM_ERR (1 << 26) +#define FTGMAC100_RXDES1_IP_CHKSUM_ERR (1 << 27) + +/* + * Receive and transmit Buffer Descriptor + */ +typedef struct { + uint32_t des0; + uint32_t des1; + uint32_t des2; /* not used by HW */ + uint32_t des3; +} FTGMAC100Desc; + +/* + * Specific RTL8211E MII Registers + */ +#define RTL8211E_MII_PHYCR 16 /* PHY Specific Control */ +#define RTL8211E_MII_PHYSR 17 /* PHY Specific Status */ +#define RTL8211E_MII_INER 18 /* Interrupt Enable */ +#define RTL8211E_MII_INSR 19 /* Interrupt Status */ +#define RTL8211E_MII_RXERC 24 /* Receive Error Counter */ +#define RTL8211E_MII_LDPSR 27 /* Link Down Power Saving */ +#define RTL8211E_MII_EPAGSR 30 /* Extension Page Select */ +#define RTL8211E_MII_PAGSEL 31 /* Page Select */ + +/* + * RTL8211E Interrupt Status + */ +#define PHY_INT_AUTONEG_ERROR (1 << 15) +#define PHY_INT_PAGE_RECV (1 << 12) +#define PHY_INT_AUTONEG_COMPLETE (1 << 11) +#define PHY_INT_LINK_STATUS (1 << 10) +#define PHY_INT_ERROR (1 << 9) +#define PHY_INT_DOWN (1 << 8) +#define PHY_INT_JABBER (1 << 0) + +/* + * Max frame size for the receiving buffer + */ +#define FTGMAC100_MAX_FRAME_SIZE 10240 + +/* Limits depending on the type of the frame + * + * 9216 for Jumbo frames (+ 4 for VLAN) + * 1518 for other frames (+ 4 for VLAN) + */ +static int ftgmac100_max_frame_size(FTGMAC100State *s) +{ + return (s->maccr & FTGMAC100_MACCR_JUMBO_LF ? 9216 : 1518) + 4; +} + +static void ftgmac100_update_irq(FTGMAC100State *s) +{ + qemu_set_irq(s->irq, s->isr & s->ier); +} + +/* + * The MII phy could raise a GPIO to the processor which in turn + * could be handled as an interrpt by the OS. + * For now we don't handle any GPIO/interrupt line, so the OS will + * have to poll for the PHY status. + */ +static void phy_update_irq(FTGMAC100State *s) +{ + ftgmac100_update_irq(s); +} + +static void phy_update_link(FTGMAC100State *s) +{ + /* Autonegotiation status mirrors link status. */ + if (qemu_get_queue(s->nic)->link_down) { + s->phy_status &= ~(MII_BMSR_LINK_ST | MII_BMSR_AN_COMP); + s->phy_int |= PHY_INT_DOWN; + } else { + s->phy_status |= (MII_BMSR_LINK_ST | MII_BMSR_AN_COMP); + s->phy_int |= PHY_INT_AUTONEG_COMPLETE; + } + phy_update_irq(s); +} + +static void ftgmac100_set_link(NetClientState *nc) +{ + phy_update_link(FTGMAC100(qemu_get_nic_opaque(nc))); +} + +static void phy_reset(FTGMAC100State *s) +{ + s->phy_status = (MII_BMSR_100TX_FD | MII_BMSR_100TX_HD | MII_BMSR_10T_FD | + MII_BMSR_10T_HD | MII_BMSR_EXTSTAT | MII_BMSR_MFPS | + MII_BMSR_AN_COMP | MII_BMSR_AUTONEG | MII_BMSR_LINK_ST | + MII_BMSR_EXTCAP); + s->phy_control = (MII_BMCR_AUTOEN | MII_BMCR_FD | MII_BMCR_SPEED1000); + s->phy_advertise = (MII_ANAR_PAUSE_ASYM | MII_ANAR_PAUSE | MII_ANAR_TXFD | + MII_ANAR_TX | MII_ANAR_10FD | MII_ANAR_10 | + MII_ANAR_CSMACD); + s->phy_int_mask = 0; + s->phy_int = 0; +} + +static uint32_t do_phy_read(FTGMAC100State *s, int reg) +{ + uint32_t val; + + switch (reg) { + case MII_BMCR: /* Basic Control */ + val = s->phy_control; + break; + case MII_BMSR: /* Basic Status */ + val = s->phy_status; + break; + case MII_PHYID1: /* ID1 */ + val = RTL8211E_PHYID1; + break; + case MII_PHYID2: /* ID2 */ + val = RTL8211E_PHYID2; + break; + case MII_ANAR: /* Auto-neg advertisement */ + val = s->phy_advertise; + break; + case MII_ANLPAR: /* Auto-neg Link Partner Ability */ + val = (MII_ANLPAR_ACK | MII_ANLPAR_PAUSE | MII_ANLPAR_TXFD | + MII_ANLPAR_TX | MII_ANLPAR_10FD | MII_ANLPAR_10 | + MII_ANLPAR_CSMACD); + break; + case MII_ANER: /* Auto-neg Expansion */ + val = MII_ANER_NWAY; + break; + case MII_CTRL1000: /* 1000BASE-T control */ + val = (MII_CTRL1000_HALF | MII_CTRL1000_FULL); + break; + case MII_STAT1000: /* 1000BASE-T status */ + val = MII_STAT1000_FULL; + break; + case RTL8211E_MII_INSR: /* Interrupt status. */ + val = s->phy_int; + s->phy_int = 0; + phy_update_irq(s); + break; + case RTL8211E_MII_INER: /* Interrupt enable */ + val = s->phy_int_mask; + break; + case RTL8211E_MII_PHYCR: + case RTL8211E_MII_PHYSR: + case RTL8211E_MII_RXERC: + case RTL8211E_MII_LDPSR: + case RTL8211E_MII_EPAGSR: + case RTL8211E_MII_PAGSEL: + qemu_log_mask(LOG_UNIMP, "%s: reg %d not implemented\n", + __func__, reg); + val = 0; + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad address at offset %d\n", + __func__, reg); + val = 0; + break; + } + + return val; +} + +#define MII_BMCR_MASK (MII_BMCR_LOOPBACK | MII_BMCR_SPEED100 | \ + MII_BMCR_SPEED | MII_BMCR_AUTOEN | MII_BMCR_PDOWN | \ + MII_BMCR_FD | MII_BMCR_CTST) +#define MII_ANAR_MASK 0x2d7f + +static void do_phy_write(FTGMAC100State *s, int reg, uint32_t val) +{ + switch (reg) { + case MII_BMCR: /* Basic Control */ + if (val & MII_BMCR_RESET) { + phy_reset(s); + } else { + s->phy_control = val & MII_BMCR_MASK; + /* Complete autonegotiation immediately. */ + if (val & MII_BMCR_AUTOEN) { + s->phy_status |= MII_BMSR_AN_COMP; + } + } + break; + case MII_ANAR: /* Auto-neg advertisement */ + s->phy_advertise = (val & MII_ANAR_MASK) | MII_ANAR_TX; + break; + case RTL8211E_MII_INER: /* Interrupt enable */ + s->phy_int_mask = val & 0xff; + phy_update_irq(s); + break; + case RTL8211E_MII_PHYCR: + case RTL8211E_MII_PHYSR: + case RTL8211E_MII_RXERC: + case RTL8211E_MII_LDPSR: + case RTL8211E_MII_EPAGSR: + case RTL8211E_MII_PAGSEL: + qemu_log_mask(LOG_UNIMP, "%s: reg %d not implemented\n", + __func__, reg); + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad address at offset %d\n", + __func__, reg); + break; + } +} + +static int ftgmac100_read_bd(FTGMAC100Desc *bd, dma_addr_t addr) +{ + if (dma_memory_read(&address_space_memory, addr, bd, sizeof(*bd))) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: failed to read descriptor @ 0x%" + HWADDR_PRIx "\n", __func__, addr); + return -1; + } + bd->des0 = le32_to_cpu(bd->des0); + bd->des1 = le32_to_cpu(bd->des1); + bd->des2 = le32_to_cpu(bd->des2); + bd->des3 = le32_to_cpu(bd->des3); + return 0; +} + +static int ftgmac100_write_bd(FTGMAC100Desc *bd, dma_addr_t addr) +{ + FTGMAC100Desc lebd; + + lebd.des0 = cpu_to_le32(bd->des0); + lebd.des1 = cpu_to_le32(bd->des1); + lebd.des2 = cpu_to_le32(bd->des2); + lebd.des3 = cpu_to_le32(bd->des3); + if (dma_memory_write(&address_space_memory, addr, &lebd, sizeof(lebd))) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: failed to write descriptor @ 0x%" + HWADDR_PRIx "\n", __func__, addr); + return -1; + } + return 0; +} + +static void ftgmac100_do_tx(FTGMAC100State *s, uint32_t tx_ring, + uint32_t tx_descriptor) +{ + int frame_size = 0; + uint8_t *ptr = s->frame; + uint32_t addr = tx_descriptor; + uint32_t flags = 0; + int max_frame_size = ftgmac100_max_frame_size(s); + + while (1) { + FTGMAC100Desc bd; + int len; + + if (ftgmac100_read_bd(&bd, addr) || + ((bd.des0 & FTGMAC100_TXDES0_TXDMA_OWN) == 0)) { + /* Run out of descriptors to transmit. */ + s->isr |= FTGMAC100_INT_NO_NPTXBUF; + break; + } + + /* record transmit flags as they are valid only on the first + * segment */ + if (bd.des0 & FTGMAC100_TXDES0_FTS) { + flags = bd.des1; + } + + len = bd.des0 & 0x3FFF; + if (frame_size + len > max_frame_size) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: frame too big : %d bytes\n", + __func__, len); + len = max_frame_size - frame_size; + } + + if (dma_memory_read(&address_space_memory, bd.des3, ptr, len)) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: failed to read packet @ 0x%x\n", + __func__, bd.des3); + s->isr |= FTGMAC100_INT_NO_NPTXBUF; + break; + } + + ptr += len; + frame_size += len; + if (bd.des0 & FTGMAC100_TXDES0_LTS) { + if (flags & FTGMAC100_TXDES1_IP_CHKSUM) { + net_checksum_calculate(s->frame, frame_size); + } + /* Last buffer in frame. */ + qemu_send_packet(qemu_get_queue(s->nic), s->frame, frame_size); + ptr = s->frame; + frame_size = 0; + if (flags & FTGMAC100_TXDES1_TXIC) { + s->isr |= FTGMAC100_INT_XPKT_ETH; + } + } + + if (flags & FTGMAC100_TXDES1_TX2FIC) { + s->isr |= FTGMAC100_INT_XPKT_FIFO; + } + bd.des0 &= ~FTGMAC100_TXDES0_TXDMA_OWN; + + /* Write back the modified descriptor. */ + ftgmac100_write_bd(&bd, addr); + /* Advance to the next descriptor. */ + if (bd.des0 & s->txdes0_edotr) { + addr = tx_ring; + } else { + addr += sizeof(FTGMAC100Desc); + } + } + + s->tx_descriptor = addr; + + ftgmac100_update_irq(s); +} + +static int ftgmac100_can_receive(NetClientState *nc) +{ + FTGMAC100State *s = FTGMAC100(qemu_get_nic_opaque(nc)); + FTGMAC100Desc bd; + + if ((s->maccr & (FTGMAC100_MACCR_RXDMA_EN | FTGMAC100_MACCR_RXMAC_EN)) + != (FTGMAC100_MACCR_RXDMA_EN | FTGMAC100_MACCR_RXMAC_EN)) { + return 0; + } + + if (ftgmac100_read_bd(&bd, s->rx_descriptor)) { + return 0; + } + return !(bd.des0 & FTGMAC100_RXDES0_RXPKT_RDY); +} + +/* + * This is purely informative. The HW can poll the RW (and RX) ring + * buffers for available descriptors but we don't need to trigger a + * timer for that in qemu. + */ +static uint32_t ftgmac100_rxpoll(FTGMAC100State *s) +{ + /* Polling times : + * + * Speed TIME_SEL=0 TIME_SEL=1 + * + * 10 51.2 ms 819.2 ms + * 100 5.12 ms 81.92 ms + * 1000 1.024 ms 16.384 ms + */ + static const int div[] = { 20, 200, 1000 }; + + uint32_t cnt = 1024 * FTGMAC100_APTC_RXPOLL_CNT(s->aptcr); + uint32_t speed = (s->maccr & FTGMAC100_MACCR_FAST_MODE) ? 1 : 0; + uint32_t period; + + if (s->aptcr & FTGMAC100_APTC_RXPOLL_TIME_SEL) { + cnt <<= 4; + } + + if (s->maccr & FTGMAC100_MACCR_GIGA_MODE) { + speed = 2; + } + + period = cnt / div[speed]; + + return period; +} + +static void ftgmac100_reset(DeviceState *d) +{ + FTGMAC100State *s = FTGMAC100(d); + + /* Reset the FTGMAC100 */ + s->isr = 0; + s->ier = 0; + s->rx_enabled = 0; + s->rx_ring = 0; + s->rbsr = 0x640; + s->rx_descriptor = 0; + s->tx_ring = 0; + s->tx_descriptor = 0; + s->math[0] = 0; + s->math[1] = 0; + s->itc = 0; + s->aptcr = 1; + s->dblac = 0x00022f00; + s->revr = 0; + s->fear1 = 0; + s->tpafcr = 0xf1; + + s->maccr = 0; + s->phycr = 0; + s->phydata = 0; + s->fcr = 0x400; + + /* and the PHY */ + phy_reset(s); +} + +static uint64_t ftgmac100_read(void *opaque, hwaddr addr, unsigned size) +{ + FTGMAC100State *s = FTGMAC100(opaque); + + switch (addr & 0xff) { + case FTGMAC100_ISR: + return s->isr; + case FTGMAC100_IER: + return s->ier; + case FTGMAC100_MAC_MADR: + return (s->conf.macaddr.a[0] << 8) | s->conf.macaddr.a[1]; + case FTGMAC100_MAC_LADR: + return ((uint32_t) s->conf.macaddr.a[2] << 24) | + (s->conf.macaddr.a[3] << 16) | (s->conf.macaddr.a[4] << 8) | + s->conf.macaddr.a[5]; + case FTGMAC100_MATH0: + return s->math[0]; + case FTGMAC100_MATH1: + return s->math[1]; + case FTGMAC100_ITC: + return s->itc; + case FTGMAC100_DBLAC: + return s->dblac; + case FTGMAC100_REVR: + return s->revr; + case FTGMAC100_FEAR1: + return s->fear1; + case FTGMAC100_TPAFCR: + return s->tpafcr; + case FTGMAC100_FCR: + return s->fcr; + case FTGMAC100_MACCR: + return s->maccr; + case FTGMAC100_PHYCR: + return s->phycr; + case FTGMAC100_PHYDATA: + return s->phydata; + + /* We might want to support these one day */ + case FTGMAC100_HPTXPD: /* High Priority Transmit Poll Demand */ + case FTGMAC100_HPTXR_BADR: /* High Priority Transmit Ring Base Address */ + case FTGMAC100_MACSR: /* MAC Status Register (MACSR) */ + qemu_log_mask(LOG_UNIMP, "%s: read to unimplemented register 0x%" + HWADDR_PRIx "\n", __func__, addr); + return 0; + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad address at offset 0x%" + HWADDR_PRIx "\n", __func__, addr); + return 0; + } +} + +static void ftgmac100_write(void *opaque, hwaddr addr, + uint64_t value, unsigned size) +{ + FTGMAC100State *s = FTGMAC100(opaque); + int reg; + + switch (addr & 0xff) { + case FTGMAC100_ISR: /* Interrupt status */ + s->isr &= ~value; + break; + case FTGMAC100_IER: /* Interrupt control */ + s->ier = value; + break; + case FTGMAC100_MAC_MADR: /* MAC */ + s->conf.macaddr.a[0] = value >> 8; + s->conf.macaddr.a[1] = value; + break; + case FTGMAC100_MAC_LADR: + s->conf.macaddr.a[2] = value >> 24; + s->conf.macaddr.a[3] = value >> 16; + s->conf.macaddr.a[4] = value >> 8; + s->conf.macaddr.a[5] = value; + break; + case FTGMAC100_MATH0: /* Multicast Address Hash Table 0 */ + s->math[0] = value; + break; + case FTGMAC100_MATH1: /* Multicast Address Hash Table 1 */ + s->math[1] = value; + break; + case FTGMAC100_ITC: /* TODO: Interrupt Timer Control */ + s->itc = value; + break; + case FTGMAC100_RXR_BADR: /* Ring buffer address */ + s->rx_ring = value; + s->rx_descriptor = s->rx_ring; + break; + + case FTGMAC100_RBSR: /* DMA buffer size */ + s->rbsr = value; + break; + + case FTGMAC100_NPTXR_BADR: /* Transmit buffer address */ + s->tx_ring = value; + s->tx_descriptor = s->tx_ring; + break; + + case FTGMAC100_NPTXPD: /* Trigger transmit */ + if ((s->maccr & (FTGMAC100_MACCR_TXDMA_EN | FTGMAC100_MACCR_TXMAC_EN)) + == (FTGMAC100_MACCR_TXDMA_EN | FTGMAC100_MACCR_TXMAC_EN)) { + /* TODO: high priority tx ring */ + ftgmac100_do_tx(s, s->tx_ring, s->tx_descriptor); + } + if (ftgmac100_can_receive(qemu_get_queue(s->nic))) { + qemu_flush_queued_packets(qemu_get_queue(s->nic)); + } + break; + + case FTGMAC100_RXPD: /* Receive Poll Demand Register */ + if (ftgmac100_can_receive(qemu_get_queue(s->nic))) { + qemu_flush_queued_packets(qemu_get_queue(s->nic)); + } + break; + + case FTGMAC100_APTC: /* Automatic polling */ + s->aptcr = value; + + if (FTGMAC100_APTC_RXPOLL_CNT(s->aptcr)) { + ftgmac100_rxpoll(s); + } + + if (FTGMAC100_APTC_TXPOLL_CNT(s->aptcr)) { + qemu_log_mask(LOG_UNIMP, "%s: no transmit polling\n", __func__); + } + break; + + case FTGMAC100_MACCR: /* MAC Device control */ + s->maccr = value; + if (value & FTGMAC100_MACCR_SW_RST) { + ftgmac100_reset(DEVICE(s)); + } + + if (ftgmac100_can_receive(qemu_get_queue(s->nic))) { + qemu_flush_queued_packets(qemu_get_queue(s->nic)); + } + break; + + case FTGMAC100_PHYCR: /* PHY Device control */ + reg = FTGMAC100_PHYCR_REG(value); + s->phycr = value; + if (value & FTGMAC100_PHYCR_MIIWR) { + do_phy_write(s, reg, s->phydata & 0xffff); + s->phycr &= ~FTGMAC100_PHYCR_MIIWR; + } else { + s->phydata = do_phy_read(s, reg) << 16; + s->phycr &= ~FTGMAC100_PHYCR_MIIRD; + } + break; + case FTGMAC100_PHYDATA: + s->phydata = value & 0xffff; + break; + case FTGMAC100_DBLAC: /* DMA Burst Length and Arbitration Control */ + s->dblac = value; + break; + case FTGMAC100_REVR: /* Feature Register */ + /* TODO: Only Old MDIO interface is supported */ + s->revr = value & ~FTGMAC100_REVR_NEW_MDIO_INTERFACE; + break; + case FTGMAC100_FEAR1: /* Feature Register 1 */ + s->fear1 = value; + break; + case FTGMAC100_TPAFCR: /* Transmit Priority Arbitration and FIFO Control */ + s->tpafcr = value; + break; + case FTGMAC100_FCR: /* Flow Control */ + s->fcr = value; + break; + + case FTGMAC100_HPTXPD: /* High Priority Transmit Poll Demand */ + case FTGMAC100_HPTXR_BADR: /* High Priority Transmit Ring Base Address */ + case FTGMAC100_MACSR: /* MAC Status Register (MACSR) */ + qemu_log_mask(LOG_UNIMP, "%s: write to unimplemented register 0x%" + HWADDR_PRIx "\n", __func__, addr); + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad address at offset 0x%" + HWADDR_PRIx "\n", __func__, addr); + break; + } + + ftgmac100_update_irq(s); +} + +static int ftgmac100_filter(FTGMAC100State *s, const uint8_t *buf, size_t len) +{ + unsigned mcast_idx; + + if (s->maccr & FTGMAC100_MACCR_RX_ALL) { + return 1; + } + + switch (get_eth_packet_type(PKT_GET_ETH_HDR(buf))) { + case ETH_PKT_BCAST: + if (!(s->maccr & FTGMAC100_MACCR_RX_BROADPKT)) { + return 0; + } + break; + case ETH_PKT_MCAST: + if (!(s->maccr & FTGMAC100_MACCR_RX_MULTIPKT)) { + if (!(s->maccr & FTGMAC100_MACCR_HT_MULTI_EN)) { + return 0; + } + + /* TODO: this does not seem to work for ftgmac100 */ + mcast_idx = compute_mcast_idx(buf); + if (!(s->math[mcast_idx / 32] & (1 << (mcast_idx % 32)))) { + return 0; + } + } + break; + case ETH_PKT_UCAST: + if (memcmp(s->conf.macaddr.a, buf, 6)) { + return 0; + } + break; + } + + return 1; +} + +static ssize_t ftgmac100_receive(NetClientState *nc, const uint8_t *buf, + size_t len) +{ + FTGMAC100State *s = FTGMAC100(qemu_get_nic_opaque(nc)); + FTGMAC100Desc bd; + uint32_t flags = 0; + uint32_t addr; + uint32_t crc; + uint32_t buf_addr; + uint8_t *crc_ptr; + uint32_t buf_len; + size_t size = len; + uint32_t first = FTGMAC100_RXDES0_FRS; + int max_frame_size = ftgmac100_max_frame_size(s); + + if ((s->maccr & (FTGMAC100_MACCR_RXDMA_EN | FTGMAC100_MACCR_RXMAC_EN)) + != (FTGMAC100_MACCR_RXDMA_EN | FTGMAC100_MACCR_RXMAC_EN)) { + return -1; + } + + /* TODO : Pad to minimum Ethernet frame length */ + /* handle small packets. */ + if (size < 10) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: dropped frame of %zd bytes\n", + __func__, size); + return size; + } + + if (size < 64 && !(s->maccr & FTGMAC100_MACCR_RX_RUNT)) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: dropped runt frame of %zd bytes\n", + __func__, size); + return size; + } + + if (!ftgmac100_filter(s, buf, size)) { + return size; + } + + /* 4 bytes for the CRC. */ + size += 4; + crc = cpu_to_be32(crc32(~0, buf, size)); + crc_ptr = (uint8_t *) &crc; + + /* Huge frames are truncated. */ + if (size > max_frame_size) { + size = max_frame_size; + qemu_log_mask(LOG_GUEST_ERROR, "%s: frame too big : %zd bytes\n", + __func__, size); + flags |= FTGMAC100_RXDES0_FTL; + } + + switch (get_eth_packet_type(PKT_GET_ETH_HDR(buf))) { + case ETH_PKT_BCAST: + flags |= FTGMAC100_RXDES0_BROADCAST; + break; + case ETH_PKT_MCAST: + flags |= FTGMAC100_RXDES0_MULTICAST; + break; + case ETH_PKT_UCAST: + break; + } + + addr = s->rx_descriptor; + while (size > 0) { + if (!ftgmac100_can_receive(nc)) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: Unexpected packet\n", __func__); + return -1; + } + + if (ftgmac100_read_bd(&bd, addr) || + (bd.des0 & FTGMAC100_RXDES0_RXPKT_RDY)) { + /* No descriptors available. Bail out. */ + qemu_log_mask(LOG_GUEST_ERROR, "%s: Lost end of frame\n", + __func__); + s->isr |= FTGMAC100_INT_NO_RXBUF; + break; + } + buf_len = (size <= s->rbsr) ? size : s->rbsr; + bd.des0 |= buf_len & 0x3fff; + size -= buf_len; + + /* The last 4 bytes are the CRC. */ + if (size < 4) { + buf_len += size - 4; + } + buf_addr = bd.des3; + dma_memory_write(&address_space_memory, buf_addr, buf, buf_len); + buf += buf_len; + if (size < 4) { + dma_memory_write(&address_space_memory, buf_addr + buf_len, + crc_ptr, 4 - size); + crc_ptr += 4 - size; + } + + bd.des0 |= first | FTGMAC100_RXDES0_RXPKT_RDY; + first = 0; + if (size == 0) { + /* Last buffer in frame. */ + bd.des0 |= flags | FTGMAC100_RXDES0_LRS; + s->isr |= FTGMAC100_INT_RPKT_BUF; + } else { + s->isr |= FTGMAC100_INT_RPKT_FIFO; + } + ftgmac100_write_bd(&bd, addr); + if (bd.des0 & s->rxdes0_edorr) { + addr = s->rx_ring; + } else { + addr += sizeof(FTGMAC100Desc); + } + } + s->rx_descriptor = addr; + + ftgmac100_update_irq(s); + return len; +} + +static const MemoryRegionOps ftgmac100_ops = { + .read = ftgmac100_read, + .write = ftgmac100_write, + .valid.min_access_size = 4, + .valid.max_access_size = 4, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static void ftgmac100_cleanup(NetClientState *nc) +{ + FTGMAC100State *s = FTGMAC100(qemu_get_nic_opaque(nc)); + + s->nic = NULL; +} + +static NetClientInfo net_ftgmac100_info = { + .type = NET_CLIENT_DRIVER_NIC, + .size = sizeof(NICState), + .can_receive = ftgmac100_can_receive, + .receive = ftgmac100_receive, + .cleanup = ftgmac100_cleanup, + .link_status_changed = ftgmac100_set_link, +}; + +static void ftgmac100_realize(DeviceState *dev, Error **errp) +{ + FTGMAC100State *s = FTGMAC100(dev); + SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + + if (s->aspeed) { + s->txdes0_edotr = FTGMAC100_TXDES0_EDOTR_ASPEED; + s->rxdes0_edorr = FTGMAC100_RXDES0_EDORR_ASPEED; + } else { + s->txdes0_edotr = FTGMAC100_TXDES0_EDOTR; + s->rxdes0_edorr = FTGMAC100_RXDES0_EDORR; + } + + memory_region_init_io(&s->iomem, OBJECT(dev), &ftgmac100_ops, s, + TYPE_FTGMAC100, 0x2000); + sysbus_init_mmio(sbd, &s->iomem); + sysbus_init_irq(sbd, &s->irq); + qemu_macaddr_default_if_unset(&s->conf.macaddr); + + s->conf.peers.ncs[0] = nd_table[0].netdev; + + s->nic = qemu_new_nic(&net_ftgmac100_info, &s->conf, + object_get_typename(OBJECT(dev)), DEVICE(dev)->id, + s); + qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a); + + s->frame = g_malloc(FTGMAC100_MAX_FRAME_SIZE); +} + +static const VMStateDescription vmstate_ftgmac100 = { + .name = TYPE_FTGMAC100, + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(irq_state, FTGMAC100State), + VMSTATE_UINT32(isr, FTGMAC100State), + VMSTATE_UINT32(ier, FTGMAC100State), + VMSTATE_UINT32(rx_enabled, FTGMAC100State), + VMSTATE_UINT32(rx_ring, FTGMAC100State), + VMSTATE_UINT32(rbsr, FTGMAC100State), + VMSTATE_UINT32(tx_ring, FTGMAC100State), + VMSTATE_UINT32(rx_descriptor, FTGMAC100State), + VMSTATE_UINT32(tx_descriptor, FTGMAC100State), + VMSTATE_UINT32_ARRAY(math, FTGMAC100State, 2), + VMSTATE_UINT32(itc, FTGMAC100State), + VMSTATE_UINT32(aptcr, FTGMAC100State), + VMSTATE_UINT32(dblac, FTGMAC100State), + VMSTATE_UINT32(revr, FTGMAC100State), + VMSTATE_UINT32(fear1, FTGMAC100State), + VMSTATE_UINT32(tpafcr, FTGMAC100State), + VMSTATE_UINT32(maccr, FTGMAC100State), + VMSTATE_UINT32(phycr, FTGMAC100State), + VMSTATE_UINT32(phydata, FTGMAC100State), + VMSTATE_UINT32(fcr, FTGMAC100State), + VMSTATE_UINT32(phy_status, FTGMAC100State), + VMSTATE_UINT32(phy_control, FTGMAC100State), + VMSTATE_UINT32(phy_advertise, FTGMAC100State), + VMSTATE_UINT32(phy_int, FTGMAC100State), + VMSTATE_UINT32(phy_int_mask, FTGMAC100State), + VMSTATE_UINT32(txdes0_edotr, FTGMAC100State), + VMSTATE_UINT32(rxdes0_edorr, FTGMAC100State), + VMSTATE_END_OF_LIST() + } +}; + +static Property ftgmac100_properties[] = { + DEFINE_PROP_BOOL("aspeed", FTGMAC100State, aspeed, false), + DEFINE_NIC_PROPERTIES(FTGMAC100State, conf), + DEFINE_PROP_END_OF_LIST(), +}; + +static void ftgmac100_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->vmsd = &vmstate_ftgmac100; + dc->reset = ftgmac100_reset; + dc->props = ftgmac100_properties; + set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); + dc->realize = ftgmac100_realize; + dc->desc = "Faraday FTGMAC100 Gigabit Ethernet emulation"; +} + +static const TypeInfo ftgmac100_info = { + .name = TYPE_FTGMAC100, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(FTGMAC100State), + .class_init = ftgmac100_class_init, +}; + +static void ftgmac100_register_types(void) +{ + type_register_static(&ftgmac100_info); +} + +type_init(ftgmac100_register_types) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index c32168077a..7d091c9259 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -510,6 +510,10 @@ static int peer_attach(VirtIONet *n, int index) return 0; } + if (n->max_queues == 1) { + return 0; + } + return tap_enable(nc->peer); } diff --git a/hw/pci-host/versatile.c b/hw/pci-host/versatile.c index 467cbb9cb8..27fde46126 100644 --- a/hw/pci-host/versatile.c +++ b/hw/pci-host/versatile.c @@ -380,20 +380,8 @@ static void pci_vpb_reset(DeviceState *d) static void pci_vpb_init(Object *obj) { - PCIHostState *h = PCI_HOST_BRIDGE(obj); PCIVPBState *s = PCI_VPB(obj); - memory_region_init(&s->pci_io_space, OBJECT(s), "pci_io", 1ULL << 32); - memory_region_init(&s->pci_mem_space, OBJECT(s), "pci_mem", 1ULL << 32); - - pci_bus_new_inplace(&s->pci_bus, sizeof(s->pci_bus), DEVICE(obj), "pci", - &s->pci_mem_space, &s->pci_io_space, - PCI_DEVFN(11, 0), TYPE_PCI_BUS); - h->bus = &s->pci_bus; - - object_initialize(&s->pci_dev, sizeof(s->pci_dev), TYPE_VERSATILE_PCI_HOST); - qdev_set_parent_bus(DEVICE(&s->pci_dev), BUS(&s->pci_bus)); - /* Window sizes for VersatilePB; realview_pci's init will override */ s->mem_win_size[0] = 0x0c000000; s->mem_win_size[1] = 0x10000000; @@ -403,10 +391,22 @@ static void pci_vpb_init(Object *obj) static void pci_vpb_realize(DeviceState *dev, Error **errp) { PCIVPBState *s = PCI_VPB(dev); + PCIHostState *h = PCI_HOST_BRIDGE(dev); SysBusDevice *sbd = SYS_BUS_DEVICE(dev); pci_map_irq_fn mapfn; int i; + memory_region_init(&s->pci_io_space, OBJECT(s), "pci_io", 1ULL << 32); + memory_region_init(&s->pci_mem_space, OBJECT(s), "pci_mem", 1ULL << 32); + + pci_bus_new_inplace(&s->pci_bus, sizeof(s->pci_bus), dev, "pci", + &s->pci_mem_space, &s->pci_io_space, + PCI_DEVFN(11, 0), TYPE_PCI_BUS); + h->bus = &s->pci_bus; + + object_initialize(&s->pci_dev, sizeof(s->pci_dev), TYPE_VERSATILE_PCI_HOST); + qdev_set_parent_bus(DEVICE(&s->pci_dev), BUS(&s->pci_bus)); + for (i = 0; i < 4; i++) { sysbus_init_irq(sbd, &s->irq[i]); } @@ -503,8 +503,6 @@ static void pci_vpb_class_init(ObjectClass *klass, void *data) dc->reset = pci_vpb_reset; dc->vmsd = &pci_vpb_vmstate; dc->props = pci_vpb_properties; - /* Reason: object_unref() hangs */ - dc->cannot_destroy_with_object_finalize_yet = true; } static const TypeInfo pci_vpb_info = { @@ -526,19 +524,10 @@ static void pci_realview_init(Object *obj) s->mem_win_size[2] = 0x08000000; } -static void pci_realview_class_init(ObjectClass *class, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(class); - - /* Reason: object_unref() hangs */ - dc->cannot_destroy_with_object_finalize_yet = true; -} - static const TypeInfo pci_realview_info = { .name = "realview_pci", .parent = TYPE_VERSATILE_PCI, .instance_init = pci_realview_init, - .class_init = pci_realview_class_init, }; static void versatile_pci_register_types(void) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index e6b08e1988..259483b1c0 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -869,6 +869,10 @@ static void do_pci_unregister_device(PCIDevice *pci_dev) pci_dev->bus->devices[pci_dev->devfn] = NULL; pci_config_free(pci_dev); + if (memory_region_is_mapped(&pci_dev->bus_master_enable_region)) { + memory_region_del_subregion(&pci_dev->bus_master_container_region, + &pci_dev->bus_master_enable_region); + } address_space_destroy(&pci_dev->bus_master_as); } diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 6ee566d658..35db949dbc 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1524,16 +1524,16 @@ static void htab_save_first_pass(QEMUFile *f, sPAPRMachineState *spapr, /* Consume invalid HPTEs */ while ((index < htabslots) && !HPTE_VALID(HPTE(spapr->htab, index))) { - index++; CLEAN_HPTE(HPTE(spapr->htab, index)); + index++; } /* Consume valid HPTEs */ chunkstart = index; while ((index < htabslots) && (index - chunkstart < USHRT_MAX) && HPTE_VALID(HPTE(spapr->htab, index))) { - index++; CLEAN_HPTE(HPTE(spapr->htab, index)); + index++; } if (index > chunkstart) { @@ -2790,6 +2790,12 @@ static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, goto out; } + if (cc->nr_threads != smp_threads) { + error_setg(errp, "invalid nr-threads %d, must be %d", + cc->nr_threads, smp_threads); + return; + } + core_slot = spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index); if (!core_slot) { error_setg(&local_err, "core id %d out of range", cc->core_id); @@ -3096,6 +3102,11 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) xic->ics_resend = spapr_ics_resend; xic->icp_get = spapr_icp_get; ispc->print_info = spapr_pic_print_info; + /* Force NUMA node memory size to be a multiple of + * SPAPR_MEMORY_BLOCK_SIZE (256M) since that's the granularity + * in which LMBs are represented and hot-added + */ + mc->numa_mem_align_shift = 28; } static const TypeInfo spapr_machine_info = { @@ -3180,6 +3191,7 @@ static void spapr_machine_2_8_class_options(MachineClass *mc) { spapr_machine_2_9_class_options(mc); SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_8); + mc->numa_mem_align_shift = 23; } DEFINE_SPAPR_MACHINE(2_8, "2.8", false); diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c index 150f6bf2c7..a1cdc875b1 100644 --- a/hw/ppc/spapr_drc.c +++ b/hw/ppc/spapr_drc.c @@ -135,6 +135,17 @@ static uint32_t set_allocation_state(sPAPRDRConnector *drc, if (!drc->dev) { return RTAS_OUT_NO_SUCH_INDICATOR; } + if (drc->awaiting_release && drc->awaiting_allocation) { + /* kernel is acknowledging a previous hotplug event + * while we are already removing it. + * it's safe to ignore awaiting_allocation here since we know the + * situation is predicated on the guest either already having done + * so (boot-time hotplug), or never being able to acquire in the + * first place (hotplug followed by immediate unplug). + */ + drc->awaiting_allocation_skippable = true; + return RTAS_OUT_NO_SUCH_INDICATOR; + } } if (drc->type != SPAPR_DR_CONNECTOR_TYPE_PCI) { @@ -436,9 +447,11 @@ static void detach(sPAPRDRConnector *drc, DeviceState *d, } if (drc->awaiting_allocation) { - drc->awaiting_release = true; - trace_spapr_drc_awaiting_allocation(get_index(drc)); - return; + if (!drc->awaiting_allocation_skippable) { + drc->awaiting_release = true; + trace_spapr_drc_awaiting_allocation(get_index(drc)); + return; + } } drc->indicator_state = SPAPR_DR_INDICATOR_STATE_INACTIVE; @@ -448,6 +461,7 @@ static void detach(sPAPRDRConnector *drc, DeviceState *d, } drc->awaiting_release = false; + drc->awaiting_allocation_skippable = false; g_free(drc->fdt); drc->fdt = NULL; drc->fdt_start_offset = 0; diff --git a/hw/s390x/ccw-device.c b/hw/s390x/ccw-device.c index 28ea20440e..fb8d640a7e 100644 --- a/hw/s390x/ccw-device.c +++ b/hw/s390x/ccw-device.c @@ -11,11 +11,51 @@ #include "qemu/osdep.h" #include "ccw-device.h" +static void ccw_device_refill_ids(CcwDevice *dev) +{ + SubchDev *sch = dev->sch; + + assert(sch); + + dev->dev_id.cssid = sch->cssid; + dev->dev_id.ssid = sch->ssid; + dev->dev_id.devid = sch->devno; + dev->dev_id.valid = true; + + dev->subch_id.cssid = sch->cssid; + dev->subch_id.ssid = sch->ssid; + dev->subch_id.devid = sch->schid; + dev->subch_id.valid = true; +} + +static void ccw_device_realize(CcwDevice *dev, Error **errp) +{ + ccw_device_refill_ids(dev); +} + +static Property ccw_device_properties[] = { + DEFINE_PROP_CSS_DEV_ID("devno", CcwDevice, devno), + DEFINE_PROP_CSS_DEV_ID_RO("dev_id", CcwDevice, dev_id), + DEFINE_PROP_CSS_DEV_ID_RO("subch_id", CcwDevice, subch_id), + DEFINE_PROP_END_OF_LIST(), +}; + +static void ccw_device_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + CCWDeviceClass *k = CCW_DEVICE_CLASS(klass); + + k->realize = ccw_device_realize; + k->refill_ids = ccw_device_refill_ids; + dc->props = ccw_device_properties; +} + static const TypeInfo ccw_device_info = { .name = TYPE_CCW_DEVICE, .parent = TYPE_DEVICE, .instance_size = sizeof(CcwDevice), .class_size = sizeof(CCWDeviceClass), + .class_init = ccw_device_class_init, .abstract = true, }; diff --git a/hw/s390x/ccw-device.h b/hw/s390x/ccw-device.h index 59ba01b6c5..89c8e5dff7 100644 --- a/hw/s390x/ccw-device.h +++ b/hw/s390x/ccw-device.h @@ -19,12 +19,19 @@ typedef struct CcwDevice { DeviceState parent_obj; SubchDev *sch; /* <cssid>.<ssid>.<device number> */ - CssDevId bus_id; + /* The user-set busid of the virtual ccw device. */ + CssDevId devno; + /* The actual busid of the virtual ccw device. */ + CssDevId dev_id; + /* The actual busid of the virtual subchannel. */ + CssDevId subch_id; } CcwDevice; typedef struct CCWDeviceClass { DeviceClass parent_class; void (*unplug)(HotplugHandler *, DeviceState *, Error **); + void (*realize)(CcwDevice *, Error **); + void (*refill_ids)(CcwDevice *); } CCWDeviceClass; static inline CcwDevice *to_ccw_dev_fast(DeviceState *d) diff --git a/hw/s390x/css-bridge.c b/hw/s390x/css-bridge.c index 9a7f7ee60c..b54ac01d37 100644 --- a/hw/s390x/css-bridge.c +++ b/hw/s390x/css-bridge.c @@ -107,6 +107,9 @@ VirtualCssBus *virtual_css_bus_init(void) /* Enable hotplugging */ qbus_set_hotplug_handler(bus, dev, &error_abort); + css_register_io_adapters(CSS_IO_ADAPTER_VIRTIO, true, false, + &error_abort); + return cbus; } diff --git a/hw/s390x/css.c b/hw/s390x/css.c index 37caa98195..c03bb20bc9 100644 --- a/hw/s390x/css.c +++ b/hw/s390x/css.c @@ -47,7 +47,6 @@ typedef struct IoAdapter { uint32_t id; uint8_t type; uint8_t isc; - QTAILQ_ENTRY(IoAdapter) sibling; } IoAdapter; typedef struct ChannelSubSys { @@ -61,7 +60,7 @@ typedef struct ChannelSubSys { uint64_t chnmon_area; CssImage *css[MAX_CSSID + 1]; uint8_t default_cssid; - QTAILQ_HEAD(, IoAdapter) io_adapters; + IoAdapter *io_adapters[CSS_IO_ADAPTER_TYPE_NUMS][MAX_ISC + 1]; QTAILQ_HEAD(, IndAddr) indicator_addresses; } ChannelSubSys; @@ -72,7 +71,6 @@ static ChannelSubSys channel_subsys = { .do_crw_mchk = true, .crws_lost = false, .chnmon_active = false, - .io_adapters = QTAILQ_HEAD_INITIALIZER(channel_subsys.io_adapters), .indicator_addresses = QTAILQ_HEAD_INITIALIZER(channel_subsys.indicator_addresses), }; @@ -155,44 +153,67 @@ int css_create_css_image(uint8_t cssid, bool default_image) return 0; } -int css_register_io_adapter(uint8_t type, uint8_t isc, bool swap, - bool maskable, uint32_t *id) +uint32_t css_get_adapter_id(CssIoAdapterType type, uint8_t isc) { + if (type >= CSS_IO_ADAPTER_TYPE_NUMS || isc > MAX_ISC || + !channel_subsys.io_adapters[type][isc]) { + return -1; + } + + return channel_subsys.io_adapters[type][isc]->id; +} + +/** + * css_register_io_adapters: Register I/O adapters per ISC during init + * + * @swap: an indication if byte swap is needed. + * @maskable: an indication if the adapter is subject to the mask operation. + * @errp: location to store error information. + */ +void css_register_io_adapters(CssIoAdapterType type, bool swap, bool maskable, + Error **errp) +{ + uint32_t id; + int ret, isc; IoAdapter *adapter; - bool found = false; - int ret; S390FLICState *fs = s390_get_flic(); S390FLICStateClass *fsc = S390_FLIC_COMMON_GET_CLASS(fs); - *id = 0; - QTAILQ_FOREACH(adapter, &channel_subsys.io_adapters, sibling) { - if ((adapter->type == type) && (adapter->isc == isc)) { - *id = adapter->id; - found = true; - ret = 0; + /* + * Disallow multiple registrations for the same device type. + * Report an error if registering for an already registered type. + */ + if (channel_subsys.io_adapters[type][0]) { + error_setg(errp, "Adapters for type %d already registered", type); + } + + for (isc = 0; isc <= MAX_ISC; isc++) { + id = (type << 3) | isc; + ret = fsc->register_io_adapter(fs, id, isc, swap, maskable); + if (ret == 0) { + adapter = g_new0(IoAdapter, 1); + adapter->id = id; + adapter->isc = isc; + adapter->type = type; + channel_subsys.io_adapters[type][isc] = adapter; + } else { + error_setg_errno(errp, -ret, "Unexpected error %d when " + "registering adapter %d", ret, id); break; } - if (adapter->id >= *id) { - *id = adapter->id + 1; - } - } - if (found) { - goto out; } - adapter = g_new0(IoAdapter, 1); - ret = fsc->register_io_adapter(fs, *id, isc, swap, maskable); - if (ret == 0) { - adapter->id = *id; - adapter->isc = isc; - adapter->type = type; - QTAILQ_INSERT_TAIL(&channel_subsys.io_adapters, adapter, sibling); - } else { - g_free(adapter); - fprintf(stderr, "Unexpected error %d when registering adapter %d\n", - ret, *id); + + /* + * No need to free registered adapters in kvm: kvm will clean up + * when the machine goes away. + */ + if (ret) { + for (isc--; isc >= 0; isc--) { + g_free(channel_subsys.io_adapters[type][isc]); + channel_subsys.io_adapters[type][isc] = NULL; + } } -out: - return ret; + } static void css_clear_io_interrupt(uint16_t subchannel_id, @@ -1894,6 +1915,13 @@ PropertyInfo css_devid_propinfo = { .set = set_css_devid, }; +PropertyInfo css_devid_ro_propinfo = { + .name = "str", + .description = "Read-only identifier of an I/O device in the channel " + "subsystem, example: fe.1.23ab", + .get = get_css_devid, +}; + SubchDev *css_create_virtual_sch(CssDevId bus_id, Error **errp) { uint16_t schid = 0; diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c index 69b0291e8a..a8a1bab50a 100644 --- a/hw/s390x/s390-pci-bus.c +++ b/hw/s390x/s390-pci-bus.c @@ -23,15 +23,17 @@ #include "hw/pci/msi.h" #include "qemu/error-report.h" -/* #define DEBUG_S390PCI_BUS */ -#ifdef DEBUG_S390PCI_BUS -#define DPRINTF(fmt, ...) \ - do { fprintf(stderr, "S390pci-bus: " fmt, ## __VA_ARGS__); } while (0) -#else -#define DPRINTF(fmt, ...) \ - do { } while (0) +#ifndef DEBUG_S390PCI_BUS +#define DEBUG_S390PCI_BUS 0 #endif +#define DPRINTF(fmt, ...) \ + do { \ + if (DEBUG_S390PCI_BUS) { \ + fprintf(stderr, "S390pci-bus: " fmt, ## __VA_ARGS__); \ + } \ + } while (0) + S390pciState *s390_get_phb(void) { static S390pciState *phb; @@ -579,6 +581,9 @@ static int s390_pcihost_init(SysBusDevice *dev) s->bus_no = 0; QTAILQ_INIT(&s->pending_sei); QTAILQ_INIT(&s->zpci_devs); + + css_register_io_adapters(CSS_IO_ADAPTER_PCI, true, false, &error_abort); + return 0; } diff --git a/hw/s390x/s390-pci-bus.h b/hw/s390x/s390-pci-bus.h index dcbf4820c9..cf142a3e68 100644 --- a/hw/s390x/s390-pci-bus.h +++ b/hw/s390x/s390-pci-bus.h @@ -30,7 +30,6 @@ #define FH_MASK_INDEX 0x0000ffff #define FH_SHM_VFIO 0x00010000 #define FH_SHM_EMUL 0x00020000 -#define S390_PCIPT_ADAPTER 2 #define ZPCI_MAX_FID 0xffffffff #define ZPCI_MAX_UID 0xffff #define UID_UNDEFINED 0 diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c index d2a8c0a083..314a9cbad4 100644 --- a/hw/s390x/s390-pci-inst.c +++ b/hw/s390x/s390-pci-inst.c @@ -20,15 +20,17 @@ #include "qemu/error-report.h" #include "sysemu/hw_accel.h" -/* #define DEBUG_S390PCI_INST */ -#ifdef DEBUG_S390PCI_INST -#define DPRINTF(fmt, ...) \ - do { fprintf(stderr, "s390pci-inst: " fmt, ## __VA_ARGS__); } while (0) -#else -#define DPRINTF(fmt, ...) \ - do { } while (0) +#ifndef DEBUG_S390PCI_INST +#define DEBUG_S390PCI_INST 0 #endif +#define DPRINTF(fmt, ...) \ + do { \ + if (DEBUG_S390PCI_INST) { \ + fprintf(stderr, "s390pci-inst: " fmt, ## __VA_ARGS__); \ + } \ + } while (0) + static void s390_set_status_code(CPUS390XState *env, uint8_t r, uint64_t status_code) { @@ -731,12 +733,10 @@ int pcistb_service_call(S390CPU *cpu, uint8_t r1, uint8_t r3, uint64_t gaddr, static int reg_irqs(CPUS390XState *env, S390PCIBusDevice *pbdev, ZpciFib fib) { int ret, len; + uint8_t isc = FIB_DATA_ISC(ldl_p(&fib.data)); - ret = css_register_io_adapter(S390_PCIPT_ADAPTER, - FIB_DATA_ISC(ldl_p(&fib.data)), true, false, - &pbdev->routes.adapter.adapter_id); - assert(ret == 0); - + pbdev->routes.adapter.adapter_id = css_get_adapter_id( + CSS_IO_ADAPTER_PCI, isc); pbdev->summary_ind = get_indicator(ldq_p(&fib.aisb), sizeof(uint64_t)); len = BITS_TO_LONGS(FIB_DATA_NOI(ldl_p(&fib.data))) * sizeof(unsigned long); pbdev->indicator = get_indicator(ldq_p(&fib.aibv), len); @@ -755,7 +755,7 @@ static int reg_irqs(CPUS390XState *env, S390PCIBusDevice *pbdev, ZpciFib fib) pbdev->routes.adapter.summary_offset = FIB_DATA_AISBO(ldl_p(&fib.data)); pbdev->routes.adapter.ind_addr = ldq_p(&fib.aibv); pbdev->routes.adapter.ind_offset = FIB_DATA_AIBVO(ldl_p(&fib.data)); - pbdev->isc = FIB_DATA_ISC(ldl_p(&fib.data)); + pbdev->isc = isc; pbdev->noi = FIB_DATA_NOI(ldl_p(&fib.data)); pbdev->sum = FIB_DATA_SUM(ldl_p(&fib.data)); diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index 40914fde6f..04bd0ebe40 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -113,12 +113,13 @@ static void ccw_init(MachineState *machine) s390_sclp_init(); s390_memory_init(machine->ram_size); + s390_flic_init(); + /* get a BUS */ css_bus = virtual_css_bus_init(); s390_init_ipl_dev(machine->kernel_filename, machine->kernel_cmdline, machine->initrd_filename, "s390-ccw.img", "s390-netboot.img", true); - s390_flic_init(); dev = qdev_create(NULL, TYPE_S390_PCI_HOST_BRIDGE); object_property_add_child(qdev_get_machine(), TYPE_S390_PCI_HOST_BRIDGE, @@ -336,6 +337,9 @@ static const TypeInfo ccw_machine_info = { } \ type_init(ccw_machine_register_##suffix) +#define CCW_COMPAT_2_9 \ + HW_COMPAT_2_9 + #define CCW_COMPAT_2_8 \ HW_COMPAT_2_8 \ {\ @@ -402,14 +406,26 @@ static const TypeInfo ccw_machine_info = { .value = "0",\ }, +static void ccw_machine_2_10_instance_options(MachineState *machine) +{ +} + +static void ccw_machine_2_10_class_options(MachineClass *mc) +{ +} +DEFINE_CCW_MACHINE(2_10, "2.10", true); + static void ccw_machine_2_9_instance_options(MachineState *machine) { + ccw_machine_2_10_instance_options(machine); } static void ccw_machine_2_9_class_options(MachineClass *mc) { + ccw_machine_2_10_class_options(mc); + SET_MACHINE_COMPAT(mc, CCW_COMPAT_2_9); } -DEFINE_CCW_MACHINE(2_9, "2.9", true); +DEFINE_CCW_MACHINE(2_9, "2.9", false); static void ccw_machine_2_8_instance_options(MachineState *machine) { diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c index 00b3bde4e9..e7167e3d05 100644 --- a/hw/s390x/virtio-ccw.c +++ b/hw/s390x/virtio-ccw.c @@ -616,10 +616,9 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw) dev->routes.adapter.ind_offset = ind_bit; dev->routes.adapter.summary_offset = 7; cpu_physical_memory_unmap(thinint, hw_len, 0, hw_len); - ret = css_register_io_adapter(CSS_IO_ADAPTER_VIRTIO, - dev->thinint_isc, true, false, - &dev->routes.adapter.adapter_id); - assert(ret == 0); + dev->routes.adapter.adapter_id = css_get_adapter_id( + CSS_IO_ADAPTER_VIRTIO, + dev->thinint_isc); sch->thinint_active = ((dev->indicators != NULL) && (dev->summary_indicator != NULL)); sch->curr_status.scsw.count = ccw.count - len; @@ -680,7 +679,8 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp) { VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_GET_CLASS(dev); CcwDevice *ccw_dev = CCW_DEVICE(dev); - SubchDev *sch = css_create_virtual_sch(ccw_dev->bus_id, errp); + CCWDeviceClass *ck = CCW_DEVICE_GET_CLASS(ccw_dev); + SubchDev *sch = css_create_virtual_sch(ccw_dev->devno, errp); Error *err = NULL; if (!sch) { @@ -689,8 +689,7 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp) if (!virtio_ccw_rev_max(dev) && dev->force_revision_1) { error_setg(&err, "Invalid value of property max_rev " "(is %d expected >= 1)", virtio_ccw_rev_max(dev)); - error_propagate(errp, err); - return; + goto out_err; } sch->driver_data = dev; @@ -705,7 +704,7 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp) trace_virtio_ccw_new_device( sch->cssid, sch->ssid, sch->schid, sch->devno, - ccw_dev->bus_id.valid ? "user-configured" : "auto-configured"); + ccw_dev->devno.valid ? "user-configured" : "auto-configured"); if (!kvm_eventfds_enabled()) { dev->flags &= ~VIRTIO_CCW_FLAG_USE_IOEVENTFD; @@ -713,13 +712,23 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp) if (k->realize) { k->realize(dev, &err); + if (err) { + goto out_err; + } } + + ck->realize(ccw_dev, &err); if (err) { - error_propagate(errp, err); - css_subch_assign(sch->cssid, sch->ssid, sch->schid, sch->devno, NULL); - ccw_dev->sch = NULL; - g_free(sch); + goto out_err; } + + return; + +out_err: + error_propagate(errp, err); + css_subch_assign(sch->cssid, sch->ssid, sch->schid, sch->devno, NULL); + ccw_dev->sch = NULL; + g_free(sch); } static int virtio_ccw_exit(VirtioCcwDevice *dev) @@ -1261,12 +1270,17 @@ static int virtio_ccw_load_config(DeviceState *d, QEMUFile *f) { VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d); CcwDevice *ccw_dev = CCW_DEVICE(d); + CCWDeviceClass *ck = CCW_DEVICE_GET_CLASS(ccw_dev); SubchDev *s = ccw_dev->sch; VirtIODevice *vdev = virtio_ccw_get_vdev(s); int len; s->driver_data = dev; subch_device_load(s, f); + /* Re-fill subch_id after loading the subchannel states.*/ + if (ck->refill_ids) { + ck->refill_ids(ccw_dev); + } len = qemu_get_be32(f); if (len != 0) { dev->indicators = get_indicator(qemu_get_be64(f), len); @@ -1293,9 +1307,9 @@ static int virtio_ccw_load_config(DeviceState *d, QEMUFile *f) dev->thinint_isc = qemu_get_byte(f); dev->revision = qemu_get_be32(f); if (s->thinint_active) { - return css_register_io_adapter(CSS_IO_ADAPTER_VIRTIO, - dev->thinint_isc, true, false, - &dev->routes.adapter.adapter_id); + dev->routes.adapter.adapter_id = css_get_adapter_id( + CSS_IO_ADAPTER_VIRTIO, + dev->thinint_isc); } return 0; @@ -1354,7 +1368,6 @@ static void virtio_ccw_device_unplugged(DeviceState *d) /**************** Virtio-ccw Bus Device Descriptions *******************/ static Property virtio_ccw_net_properties[] = { - DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id), DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags, VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev, @@ -1383,7 +1396,6 @@ static const TypeInfo virtio_ccw_net = { }; static Property virtio_ccw_blk_properties[] = { - DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id), DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags, VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev, @@ -1412,7 +1424,6 @@ static const TypeInfo virtio_ccw_blk = { }; static Property virtio_ccw_serial_properties[] = { - DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id), DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags, VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev, @@ -1441,7 +1452,6 @@ static const TypeInfo virtio_ccw_serial = { }; static Property virtio_ccw_balloon_properties[] = { - DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id), DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags, VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev, @@ -1470,7 +1480,6 @@ static const TypeInfo virtio_ccw_balloon = { }; static Property virtio_ccw_scsi_properties[] = { - DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id), DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags, VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev, @@ -1500,7 +1509,6 @@ static const TypeInfo virtio_ccw_scsi = { #ifdef CONFIG_VHOST_SCSI static Property vhost_ccw_scsi_properties[] = { - DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id), DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev, VIRTIO_CCW_MAX_REV), DEFINE_PROP_END_OF_LIST(), @@ -1538,7 +1546,6 @@ static void virtio_ccw_rng_instance_init(Object *obj) } static Property virtio_ccw_rng_properties[] = { - DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id), DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags, VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev, @@ -1567,7 +1574,6 @@ static const TypeInfo virtio_ccw_rng = { }; static Property virtio_ccw_crypto_properties[] = { - DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id), DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags, VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev, @@ -1694,7 +1700,6 @@ static const TypeInfo virtio_ccw_bus_info = { #ifdef CONFIG_VIRTFS static Property virtio_ccw_9p_properties[] = { - DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id), DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags, VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev, @@ -1743,7 +1748,6 @@ static const TypeInfo virtio_ccw_9p_info = { #ifdef CONFIG_VHOST_VSOCK static Property vhost_vsock_ccw_properties[] = { - DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id), DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev, VIRTIO_CCW_MAX_REV), DEFINE_PROP_END_OF_LIST(), @@ -1757,9 +1761,7 @@ static void vhost_vsock_ccw_realize(VirtioCcwDevice *ccw_dev, Error **errp) qdev_set_parent_bus(vdev, BUS(&ccw_dev->bus)); object_property_set_bool(OBJECT(vdev), true, "realized", &err); - if (err) { - error_propagate(errp, err); - } + error_propagate(errp, err); } static void vhost_vsock_ccw_class_init(ObjectClass *klass, void *data) diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c index e3d59b7c83..84b8caf901 100644 --- a/hw/scsi/megasas.c +++ b/hw/scsi/megasas.c @@ -291,7 +291,7 @@ static int megasas_map_sgl(MegasasState *s, MegasasCmd *cmd, union mfi_sgl *sgl) if (cmd->iov_size > iov_size) { trace_megasas_iovec_overflow(cmd->index, iov_size, cmd->iov_size); } else if (cmd->iov_size < iov_size) { - trace_megasas_iovec_underflow(cmd->iov_size, iov_size, cmd->iov_size); + trace_megasas_iovec_underflow(cmd->index, iov_size, cmd->iov_size); } cmd->iov_offset = 0; return 0; @@ -1924,8 +1924,8 @@ static int megasas_handle_abort(MegasasState *s, MegasasCmd *cmd) abort_ctx &= (uint64_t)0xFFFFFFFF; } if (abort_cmd->context != abort_ctx) { - trace_megasas_abort_invalid_context(cmd->index, abort_cmd->index, - abort_cmd->context); + trace_megasas_abort_invalid_context(cmd->index, abort_cmd->context, + abort_cmd->index); s->event_count++; return MFI_STAT_ABORT_NOT_POSSIBLE; } diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c index 2933119e7d..a55ff87c22 100644 --- a/hw/scsi/scsi-generic.c +++ b/hw/scsi/scsi-generic.c @@ -237,9 +237,8 @@ static void scsi_read_complete(void * opaque, int ret) assert(max_transfer); stl_be_p(&r->buf[8], max_transfer); /* Also take care of the opt xfer len. */ - if (ldl_be_p(&r->buf[12]) > max_transfer) { - stl_be_p(&r->buf[12], max_transfer); - } + stl_be_p(&r->buf[12], + MIN_NON_ZERO(max_transfer, ldl_be_p(&r->buf[12]))); } scsi_req_data(&r->req, len); scsi_req_unref(&r->req); diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index c491ece1f2..f53bc179da 100644 --- a/hw/scsi/vhost-scsi.c +++ b/hw/scsi/vhost-scsi.c @@ -233,9 +233,11 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp) } } - virtio_scsi_common_realize(dev, &err, vhost_dummy_handle_output, + virtio_scsi_common_realize(dev, vhost_dummy_handle_output, - vhost_dummy_handle_output); + vhost_dummy_handle_output, + vhost_dummy_handle_output, + &err); if (err != NULL) { error_propagate(errp, err); goto close_fd; diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c index 74c95e0e60..944ea4eb53 100644 --- a/hw/scsi/virtio-scsi-dataplane.c +++ b/hw/scsi/virtio-scsi-dataplane.c @@ -52,28 +52,40 @@ void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp) static bool virtio_scsi_data_plane_handle_cmd(VirtIODevice *vdev, VirtQueue *vq) { - VirtIOSCSI *s = (VirtIOSCSI *)vdev; + bool progress; + VirtIOSCSI *s = VIRTIO_SCSI(vdev); + virtio_scsi_acquire(s); assert(s->ctx && s->dataplane_started); - return virtio_scsi_handle_cmd_vq(s, vq); + progress = virtio_scsi_handle_cmd_vq(s, vq); + virtio_scsi_release(s); + return progress; } static bool virtio_scsi_data_plane_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) { + bool progress; VirtIOSCSI *s = VIRTIO_SCSI(vdev); + virtio_scsi_acquire(s); assert(s->ctx && s->dataplane_started); - return virtio_scsi_handle_ctrl_vq(s, vq); + progress = virtio_scsi_handle_ctrl_vq(s, vq); + virtio_scsi_release(s); + return progress; } static bool virtio_scsi_data_plane_handle_event(VirtIODevice *vdev, VirtQueue *vq) { + bool progress; VirtIOSCSI *s = VIRTIO_SCSI(vdev); + virtio_scsi_acquire(s); assert(s->ctx && s->dataplane_started); - return virtio_scsi_handle_event_vq(s, vq); + progress = virtio_scsi_handle_event_vq(s, vq); + virtio_scsi_release(s); + return progress; } static int virtio_scsi_vring_init(VirtIOSCSI *s, VirtQueue *vq, int n, diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 1dbc4bced9..46a3e3f280 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -422,31 +422,15 @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req) } } -static inline void virtio_scsi_acquire(VirtIOSCSI *s) -{ - if (s->ctx) { - aio_context_acquire(s->ctx); - } -} - -static inline void virtio_scsi_release(VirtIOSCSI *s) -{ - if (s->ctx) { - aio_context_release(s->ctx); - } -} - bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq) { VirtIOSCSIReq *req; bool progress = false; - virtio_scsi_acquire(s); while ((req = virtio_scsi_pop_req(s, vq))) { progress = true; virtio_scsi_handle_ctrl_req(s, req); } - virtio_scsi_release(s); return progress; } @@ -460,7 +444,9 @@ static void virtio_scsi_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) return; } } + virtio_scsi_acquire(s); virtio_scsi_handle_ctrl_vq(s, vq); + virtio_scsi_release(s); } static void virtio_scsi_complete_cmd_req(VirtIOSCSIReq *req) @@ -604,7 +590,6 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs); - virtio_scsi_acquire(s); do { virtio_queue_set_notification(vq, 0); @@ -632,7 +617,6 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) QTAILQ_FOREACH_SAFE(req, &reqs, next, next) { virtio_scsi_handle_cmd_req_submit(s, req); } - virtio_scsi_release(s); return progress; } @@ -647,7 +631,9 @@ static void virtio_scsi_handle_cmd(VirtIODevice *vdev, VirtQueue *vq) return; } } + virtio_scsi_acquire(s); virtio_scsi_handle_cmd_vq(s, vq); + virtio_scsi_release(s); } static void virtio_scsi_get_config(VirtIODevice *vdev, @@ -723,12 +709,10 @@ void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, return; } - virtio_scsi_acquire(s); - req = virtio_scsi_pop_req(s, vs->event_vq); if (!req) { s->events_dropped = true; - goto out; + return; } if (s->events_dropped) { @@ -738,7 +722,7 @@ void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, if (virtio_scsi_parse_req(req, 0, sizeof(VirtIOSCSIEvent))) { virtio_scsi_bad_req(req); - goto out; + return; } evt = &req->resp.event; @@ -758,19 +742,14 @@ void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, evt->lun[3] = dev->lun & 0xFF; } virtio_scsi_complete_req(req); -out: - virtio_scsi_release(s); } bool virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq) { - virtio_scsi_acquire(s); if (s->events_dropped) { virtio_scsi_push_event(s, NULL, VIRTIO_SCSI_T_NO_EVENT, 0); - virtio_scsi_release(s); return true; } - virtio_scsi_release(s); return false; } @@ -784,7 +763,9 @@ static void virtio_scsi_handle_event(VirtIODevice *vdev, VirtQueue *vq) return; } } + virtio_scsi_acquire(s); virtio_scsi_handle_event_vq(s, vq); + virtio_scsi_release(s); } static void virtio_scsi_change(SCSIBus *bus, SCSIDevice *dev, SCSISense sense) @@ -794,8 +775,10 @@ static void virtio_scsi_change(SCSIBus *bus, SCSIDevice *dev, SCSISense sense) if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_CHANGE) && dev->type != TYPE_ROM) { + virtio_scsi_acquire(s); virtio_scsi_push_event(s, dev, VIRTIO_SCSI_T_PARAM_CHANGE, sense.asc | (sense.ascq << 8)); + virtio_scsi_release(s); } } @@ -817,9 +800,11 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, } if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) { + virtio_scsi_acquire(s); virtio_scsi_push_event(s, sd, VIRTIO_SCSI_T_TRANSPORT_RESET, VIRTIO_SCSI_EVT_RESET_RESCAN); + virtio_scsi_release(s); } } @@ -831,9 +816,11 @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev, SCSIDevice *sd = SCSI_DEVICE(dev); if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) { + virtio_scsi_acquire(s); virtio_scsi_push_event(s, sd, VIRTIO_SCSI_T_TRANSPORT_RESET, VIRTIO_SCSI_EVT_RESET_REMOVED); + virtio_scsi_release(s); } qdev_simple_device_unplug_cb(hotplug_dev, dev, errp); @@ -854,10 +841,11 @@ static struct SCSIBusInfo virtio_scsi_scsi_info = { .load_request = virtio_scsi_load_request, }; -void virtio_scsi_common_realize(DeviceState *dev, Error **errp, +void virtio_scsi_common_realize(DeviceState *dev, VirtIOHandleOutput ctrl, VirtIOHandleOutput evt, - VirtIOHandleOutput cmd) + VirtIOHandleOutput cmd, + Error **errp) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtIOSCSICommon *s = VIRTIO_SCSI_COMMON(dev); @@ -891,9 +879,11 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp) VirtIOSCSI *s = VIRTIO_SCSI(dev); Error *err = NULL; - virtio_scsi_common_realize(dev, &err, virtio_scsi_handle_ctrl, + virtio_scsi_common_realize(dev, + virtio_scsi_handle_ctrl, virtio_scsi_handle_event, - virtio_scsi_handle_cmd); + virtio_scsi_handle_cmd, + &err); if (err != NULL) { error_propagate(errp, err); return; diff --git a/hw/sh4/r2d.c b/hw/sh4/r2d.c index 6d06968f8b..8f520cec1c 100644 --- a/hw/sh4/r2d.c +++ b/hw/sh4/r2d.c @@ -277,8 +277,15 @@ static void r2d_init(MachineState *machine) sysbus_connect_irq(busdev, 2, irq[PCI_INTC]); sysbus_connect_irq(busdev, 3, irq[PCI_INTD]); - sm501_init(address_space_mem, 0x10000000, SM501_VRAM_SIZE, - irq[SM501], serial_hds[2]); + dev = qdev_create(NULL, "sysbus-sm501"); + busdev = SYS_BUS_DEVICE(dev); + qdev_prop_set_uint32(dev, "vram-size", SM501_VRAM_SIZE); + qdev_prop_set_uint32(dev, "base", 0x10000000); + qdev_prop_set_ptr(dev, "chr-state", serial_hds[2]); + qdev_init_nofail(dev); + sysbus_mmio_map(busdev, 0, 0x10000000); + sysbus_mmio_map(busdev, 1, 0x13e00000); + sysbus_connect_irq(busdev, 0, irq[SM501]); /* onboard CF (True IDE mode, Master only). */ dinfo = drive_get(IF_IDE, 0, 0); diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c index 873cd7df9a..5f022cc08d 100644 --- a/hw/sparc/sun4m.c +++ b/hw/sparc/sun4m.c @@ -491,7 +491,6 @@ static void tcx_init(hwaddr addr, qemu_irq irq, int vram_size, int width, qdev_prop_set_uint16(dev, "width", width); qdev_prop_set_uint16(dev, "height", height); qdev_prop_set_uint16(dev, "depth", depth); - qdev_prop_set_uint64(dev, "prom_addr", addr); qdev_init_nofail(dev); s = SYS_BUS_DEVICE(dev); @@ -544,7 +543,6 @@ static void cg3_init(hwaddr addr, qemu_irq irq, int vram_size, int width, qdev_prop_set_uint16(dev, "width", width); qdev_prop_set_uint16(dev, "height", height); qdev_prop_set_uint16(dev, "depth", depth); - qdev_prop_set_uint64(dev, "prom-addr", addr); qdev_init_nofail(dev); s = SYS_BUS_DEVICE(dev); diff --git a/hw/timer/exynos4210_mct.c b/hw/timer/exynos4210_mct.c index 0c189348ae..a2ec3920f8 100644 --- a/hw/timer/exynos4210_mct.c +++ b/hw/timer/exynos4210_mct.c @@ -53,6 +53,7 @@ */ #include "qemu/osdep.h" +#include "qemu/log.h" #include "hw/sysbus.h" #include "qemu/timer.h" #include "qemu/main-loop.h" @@ -1372,8 +1373,9 @@ break; case L0_TCNTO: case L1_TCNTO: case L0_ICNTO: case L1_ICNTO: case L0_FRCNTO: case L1_FRCNTO: - fprintf(stderr, "\n[exynos4210.mct: write to RO register " - TARGET_FMT_plx "]\n\n", offset); + qemu_log_mask(LOG_GUEST_ERROR, + "exynos4210.mct: write to RO register " TARGET_FMT_plx, + offset); break; case L0_INT_CSTAT: case L1_INT_CSTAT: diff --git a/hw/timer/exynos4210_pwm.c b/hw/timer/exynos4210_pwm.c index f5765075c7..87f63f057e 100644 --- a/hw/timer/exynos4210_pwm.c +++ b/hw/timer/exynos4210_pwm.c @@ -21,6 +21,7 @@ */ #include "qemu/osdep.h" +#include "qemu/log.h" #include "hw/sysbus.h" #include "qemu/timer.h" #include "qemu-common.h" @@ -252,9 +253,9 @@ static uint64_t exynos4210_pwm_read(void *opaque, hwaddr offset, break; default: - fprintf(stderr, - "[exynos4210.pwm: bad read offset " TARGET_FMT_plx "]\n", - offset); + qemu_log_mask(LOG_GUEST_ERROR, + "exynos4210.pwm: bad read offset " TARGET_FMT_plx, + offset); break; } return value; @@ -343,9 +344,9 @@ static void exynos4210_pwm_write(void *opaque, hwaddr offset, break; default: - fprintf(stderr, - "[exynos4210.pwm: bad write offset " TARGET_FMT_plx "]\n", - offset); + qemu_log_mask(LOG_GUEST_ERROR, + "exynos4210.pwm: bad write offset " TARGET_FMT_plx, + offset); break; } diff --git a/hw/timer/exynos4210_rtc.c b/hw/timer/exynos4210_rtc.c index 1a648c5d9e..4607833e3e 100644 --- a/hw/timer/exynos4210_rtc.c +++ b/hw/timer/exynos4210_rtc.c @@ -26,6 +26,7 @@ */ #include "qemu/osdep.h" +#include "qemu/log.h" #include "hw/sysbus.h" #include "qemu/timer.h" #include "qemu-common.h" @@ -370,9 +371,9 @@ static uint64_t exynos4210_rtc_read(void *opaque, hwaddr offset, break; default: - fprintf(stderr, - "[exynos4210.rtc: bad read offset " TARGET_FMT_plx "]\n", - offset); + qemu_log_mask(LOG_GUEST_ERROR, + "exynos4210.rtc: bad read offset " TARGET_FMT_plx, + offset); break; } return value; @@ -433,9 +434,9 @@ static void exynos4210_rtc_write(void *opaque, hwaddr offset, if (value > TICNT_THRESHOLD) { s->reg_ticcnt = value; } else { - fprintf(stderr, - "[exynos4210.rtc: bad TICNT value %u ]\n", - (uint32_t)value); + qemu_log_mask(LOG_GUEST_ERROR, + "exynos4210.rtc: bad TICNT value %u", + (uint32_t)value); } break; @@ -500,9 +501,9 @@ static void exynos4210_rtc_write(void *opaque, hwaddr offset, break; default: - fprintf(stderr, - "[exynos4210.rtc: bad write offset " TARGET_FMT_plx "]\n", - offset); + qemu_log_mask(LOG_GUEST_ERROR, + "exynos4210.rtc: bad write offset " TARGET_FMT_plx, + offset); break; } diff --git a/hw/usb/bus.c b/hw/usb/bus.c index 24f1608b4b..5939b273b9 100644 --- a/hw/usb/bus.c +++ b/hw/usb/bus.c @@ -762,9 +762,7 @@ static void usb_set_attached(Object *obj, bool value, Error **errp) if (value) { usb_device_attach(dev, &err); - if (err) { - error_propagate(errp, err); - } + error_propagate(errp, err); } else { usb_device_detach(dev); } diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c index f0af852709..a2d3143bf4 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c @@ -2063,7 +2063,7 @@ static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid, static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid) { XHCIState *xhci = epctx->xhci; - XHCIStreamContext *stctx; + XHCIStreamContext *stctx = NULL; XHCITransfer *xfer; XHCIRing *ring; USBEndpoint *ep = NULL; @@ -2186,6 +2186,8 @@ static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid) break; } } + /* update ring dequeue ptr */ + xhci_set_ep_state(xhci, epctx, stctx, epctx->state); epctx->kick_active--; ep = xhci_epid_to_usbep(epctx); diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c index c9876a5b0f..f9c8eafe06 100644 --- a/hw/usb/host-libusb.c +++ b/hw/usb/host-libusb.c @@ -159,7 +159,10 @@ static void usb_host_attach_kernel(USBHostDevice *s); #define BULK_TIMEOUT 0 /* unlimited */ #define INTR_TIMEOUT 0 /* unlimited */ -#if LIBUSBX_API_VERSION >= 0x01000103 +#ifndef LIBUSB_API_VERSION +# define LIBUSB_API_VERSION LIBUSBX_API_VERSION +#endif +#if LIBUSB_API_VERSION >= 0x01000103 # define HAVE_STREAMS 1 #endif @@ -269,7 +272,7 @@ static int usb_host_get_port(libusb_device *dev, char *port, size_t len) size_t off; int rc, i; -#if LIBUSBX_API_VERSION >= 0x01000102 +#if LIBUSB_API_VERSION >= 0x01000102 rc = libusb_get_port_numbers(dev, path, 7); #else rc = libusb_get_port_path(ctx, dev, path, 7); diff --git a/hw/vfio/common.c b/hw/vfio/common.c index f3ba9b9007..6b33b9f55d 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -478,8 +478,13 @@ static void vfio_listener_region_add(MemoryListener *listener, giommu->iommu_offset = section->offset_within_address_space - section->offset_within_region; giommu->container = container; - giommu->n.notify = vfio_iommu_map_notify; - giommu->n.notifier_flags = IOMMU_NOTIFIER_ALL; + llend = int128_add(int128_make64(section->offset_within_region), + section->size); + llend = int128_sub(llend, int128_one()); + iommu_notifier_init(&giommu->n, vfio_iommu_map_notify, + IOMMU_NOTIFIER_ALL, + section->offset_within_region, + int128_get64(llend)); QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next); memory_region_register_iommu_notifier(giommu->iommu, &giommu->n); @@ -550,7 +555,8 @@ static void vfio_listener_region_del(MemoryListener *listener, VFIOGuestIOMMU *giommu; QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) { - if (giommu->iommu == section->mr) { + if (giommu->iommu == section->mr && + giommu->n.start == section->offset_within_region) { memory_region_unregister_iommu_notifier(giommu->iommu, &giommu->n); QLIST_REMOVE(giommu, giommu_next); diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index e995e32dee..349085ea12 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -660,7 +660,7 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr) VFIOConfigWindowQuirk *window; if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) || - !vdev->vga || nr != 5) { + !vdev->vga || nr != 5 || !vdev->bars[5].ioport) { return; } @@ -1367,45 +1367,14 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) uint16_t cmd_orig, cmd; Error *err = NULL; - /* This must be an Intel VGA device. */ - if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || - !vfio_is_vga(vdev) || nr != 4) { - return; - } - - /* - * IGD is not a standard, they like to change their specs often. We - * only attempt to support back to SandBridge and we hope that newer - * devices maintain compatibility with generation 8. - */ - gen = igd_gen(vdev); - if (gen != 6 && gen != 8) { - error_report("IGD device %s is unsupported by IGD quirks, " - "try SandyBridge or newer", vdev->vbasedev.name); - return; - } - - /* - * Regardless of running in UPT or legacy mode, the guest graphics - * driver may attempt to use stolen memory, however only legacy mode - * has BIOS support for reserving stolen memory in the guest VM. - * Emulate the GMCH register in all cases and zero out the stolen - * memory size here. Legacy mode may request allocation and re-write - * this below. - */ - gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4); - gmch &= ~((gen < 8 ? 0x1f : 0xff) << (gen < 8 ? 3 : 8)); - - /* GMCH is read-only, emulated */ - pci_set_long(vdev->pdev.config + IGD_GMCH, gmch); - pci_set_long(vdev->pdev.wmask + IGD_GMCH, 0); - pci_set_long(vdev->emulated_config_bits + IGD_GMCH, ~0); - /* - * This must be at address 00:02.0 for us to even onsider enabling - * legacy mode. The vBIOS has dependencies on the PCI bus address. + * This must be an Intel VGA device at address 00:02.0 for us to even + * consider enabling legacy mode. The vBIOS has dependencies on the + * PCI bus address. */ - if (&vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev), + if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || + !vfio_is_vga(vdev) || nr != 4 || + &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev), 0, PCI_DEVFN(0x2, 0))) { return; } @@ -1425,6 +1394,18 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) } /* + * IGD is not a standard, they like to change their specs often. We + * only attempt to support back to SandBridge and we hope that newer + * devices maintain compatibility with generation 8. + */ + gen = igd_gen(vdev); + if (gen != 6 && gen != 8) { + error_report("IGD device %s is unsupported in legacy mode, " + "try SandyBridge or newer", vdev->vbasedev.name); + return; + } + + /* * Most of what we're doing here is to enable the ROM to run, so if * there's no ROM, there's no point in setting up this quirk. * NB. We only seem to get BIOS ROMs, so a UEFI VM would need CSM support. @@ -1479,6 +1460,8 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) goto out; } + gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4); + /* * If IGD VGA Disable is clear (expected) and VGA is not already enabled, * try to enable it. Probably shouldn't be using legacy mode without VGA, @@ -1549,11 +1532,12 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) * when IVD (IGD VGA Disable) is clear, but the claim is that it's unused, * so let's not waste VM memory for it. */ + gmch &= ~((gen < 8 ? 0x1f : 0xff) << (gen < 8 ? 3 : 8)); + if (vdev->igd_gms) { if (vdev->igd_gms <= 0x10) { gms_mb = vdev->igd_gms * 32; gmch |= vdev->igd_gms << (gen < 8 ? 3 : 8); - pci_set_long(vdev->pdev.config + IGD_GMCH, gmch); } else { error_report("Unsupported IGD GMS value 0x%x", vdev->igd_gms); vdev->igd_gms = 0; @@ -1573,6 +1557,11 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr) fw_cfg_add_file(fw_cfg_find(), "etc/igd-bdsm-size", bdsm_size, sizeof(*bdsm_size)); + /* GMCH is read-only, emulated */ + pci_set_long(vdev->pdev.config + IGD_GMCH, gmch); + pci_set_long(vdev->pdev.wmask + IGD_GMCH, 0); + pci_set_long(vdev->emulated_config_bits + IGD_GMCH, ~0); + /* BDSM is read-write, emulated. The BIOS needs to be able to write it */ pci_set_long(vdev->pdev.config + IGD_BDSM, 0); pci_set_long(vdev->pdev.wmask + IGD_BDSM, ~0); diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 6926eedd3f..1f7a7c1ae1 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -11,8 +11,11 @@ virtio_set_status(void *vdev, uint8_t val) "vdev %p val %u" # hw/virtio/virtio-rng.c virtio_rng_guest_not_ready(void *rng) "rng %p: guest not ready" +virtio_rng_cpu_is_stopped(void *rng, int size) "rng %p: cpu is stopped, dropping %d bytes" +virtio_rng_popped(void *rng) "rng %p: elem popped" virtio_rng_pushed(void *rng, size_t len) "rng %p: %zd bytes pushed" virtio_rng_request(void *rng, size_t size, unsigned quota) "rng %p: %zd bytes requested, %u bytes quota left" +virtio_rng_vm_state_change(void *rng, int running, int state) "rng %p: state change to running %d state %d" # hw/virtio/virtio-balloon.c # diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index febe519bbd..0001e60b77 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -425,10 +425,8 @@ static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size) static int vhost_dev_has_iommu(struct vhost_dev *dev) { VirtIODevice *vdev = dev->vdev; - AddressSpace *dma_as = vdev->dma_as; - return memory_region_is_iommu(dma_as->root) && - virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); + return virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); } static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr, @@ -720,6 +718,70 @@ static void vhost_region_del(MemoryListener *listener, } } +static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) +{ + struct vhost_iommu *iommu = container_of(n, struct vhost_iommu, n); + struct vhost_dev *hdev = iommu->hdev; + hwaddr iova = iotlb->iova + iommu->iommu_offset; + + if (hdev->vhost_ops->vhost_invalidate_device_iotlb(hdev, iova, + iotlb->addr_mask + 1)) { + error_report("Fail to invalidate device iotlb"); + } +} + +static void vhost_iommu_region_add(MemoryListener *listener, + MemoryRegionSection *section) +{ + struct vhost_dev *dev = container_of(listener, struct vhost_dev, + iommu_listener); + struct vhost_iommu *iommu; + Int128 end; + + if (!memory_region_is_iommu(section->mr)) { + return; + } + + iommu = g_malloc0(sizeof(*iommu)); + end = int128_add(int128_make64(section->offset_within_region), + section->size); + end = int128_sub(end, int128_one()); + iommu_notifier_init(&iommu->n, vhost_iommu_unmap_notify, + IOMMU_NOTIFIER_UNMAP, + section->offset_within_region, + int128_get64(end)); + iommu->mr = section->mr; + iommu->iommu_offset = section->offset_within_address_space - + section->offset_within_region; + iommu->hdev = dev; + memory_region_register_iommu_notifier(section->mr, &iommu->n); + QLIST_INSERT_HEAD(&dev->iommu_list, iommu, iommu_next); + /* TODO: can replay help performance here? */ +} + +static void vhost_iommu_region_del(MemoryListener *listener, + MemoryRegionSection *section) +{ + struct vhost_dev *dev = container_of(listener, struct vhost_dev, + iommu_listener); + struct vhost_iommu *iommu; + + if (!memory_region_is_iommu(section->mr)) { + return; + } + + QLIST_FOREACH(iommu, &dev->iommu_list, iommu_next) { + if (iommu->mr == section->mr && + iommu->n.start == section->offset_within_region) { + memory_region_unregister_iommu_notifier(iommu->mr, + &iommu->n); + QLIST_REMOVE(iommu, iommu_next); + g_free(iommu); + break; + } + } +} + static void vhost_region_nop(MemoryListener *listener, MemoryRegionSection *section) { @@ -1161,17 +1223,6 @@ static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq) event_notifier_cleanup(&vq->masked_notifier); } -static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) -{ - struct vhost_dev *hdev = container_of(n, struct vhost_dev, n); - - if (hdev->vhost_ops->vhost_invalidate_device_iotlb(hdev, - iotlb->iova, - iotlb->addr_mask + 1)) { - error_report("Fail to invalidate device iotlb"); - } -} - int vhost_dev_init(struct vhost_dev *hdev, void *opaque, VhostBackendType backend_type, uint32_t busyloop_timeout) { @@ -1244,8 +1295,10 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, .priority = 10 }; - hdev->n.notify = vhost_iommu_unmap_notify; - hdev->n.notifier_flags = IOMMU_NOTIFIER_UNMAP; + hdev->iommu_listener = (MemoryListener) { + .region_add = vhost_iommu_region_add, + .region_del = vhost_iommu_region_del, + }; if (hdev->migration_blocker == NULL) { if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) { @@ -1455,8 +1508,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) } if (vhost_dev_has_iommu(hdev)) { - memory_region_register_iommu_notifier(vdev->dma_as->root, - &hdev->n); + memory_listener_register(&hdev->iommu_listener, vdev->dma_as); } r = hdev->vhost_ops->vhost_set_mem_table(hdev, hdev->mem); @@ -1538,8 +1590,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev) if (vhost_dev_has_iommu(hdev)) { hdev->vhost_ops->vhost_set_iotlb_callback(hdev, false); - memory_region_unregister_iommu_notifier(vdev->dma_as->root, - &hdev->n); + memory_listener_unregister(&hdev->iommu_listener); } vhost_log_put(hdev, true); hdev->started = false; diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c index a886011e75..3042232daf 100644 --- a/hw/virtio/virtio-bus.c +++ b/hw/virtio/virtio-bus.c @@ -25,6 +25,7 @@ #include "qemu/osdep.h" #include "hw/hw.h" #include "qemu/error-report.h" +#include "qapi/error.h" #include "hw/qdev.h" #include "hw/virtio/virtio-bus.h" #include "hw/virtio/virtio.h" @@ -48,20 +49,33 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp) VirtioBusClass *klass = VIRTIO_BUS_GET_CLASS(bus); VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); bool has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); + Error *local_err = NULL; DPRINTF("%s: plug device.\n", qbus->name); if (klass->pre_plugged != NULL) { - klass->pre_plugged(qbus->parent, errp); + klass->pre_plugged(qbus->parent, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } } /* Get the features of the plugged device. */ assert(vdc->get_features != NULL); vdev->host_features = vdc->get_features(vdev, vdev->host_features, - errp); + &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } if (klass->device_plugged != NULL) { - klass->device_plugged(qbus->parent, errp); + klass->device_plugged(qbus->parent, &local_err); + } + if (local_err) { + error_propagate(errp, local_err); + return; } if (klass->get_dma_as != NULL && has_iommu) { diff --git a/hw/virtio/virtio-rng.c b/hw/virtio/virtio-rng.c index 9639f4e89b..a6ee501051 100644 --- a/hw/virtio/virtio-rng.c +++ b/hw/virtio/virtio-rng.c @@ -53,6 +53,15 @@ static void chr_read(void *opaque, const void *buf, size_t size) return; } + /* we can't modify the virtqueue until + * our state is fully synced + */ + + if (!runstate_check(RUN_STATE_RUNNING)) { + trace_virtio_rng_cpu_is_stopped(vrng, size); + return; + } + vrng->quota_remaining -= size; offset = 0; @@ -61,6 +70,7 @@ static void chr_read(void *opaque, const void *buf, size_t size) if (!elem) { break; } + trace_virtio_rng_popped(vrng); len = iov_from_buf(elem->in_sg, elem->in_num, 0, buf + offset, size - offset); offset += len; @@ -120,17 +130,21 @@ static uint64_t get_features(VirtIODevice *vdev, uint64_t f, Error **errp) return f; } -static int virtio_rng_post_load(void *opaque, int version_id) +static void virtio_rng_vm_state_change(void *opaque, int running, + RunState state) { VirtIORNG *vrng = opaque; + trace_virtio_rng_vm_state_change(vrng, running, state); + /* We may have an element ready but couldn't process it due to a quota - * limit. Make sure to try again after live migration when the quota may - * have been reset. + * limit or because CPU was stopped. Make sure to try again when the + * CPU restart. */ - virtio_rng_process(vrng); - return 0; + if (running && is_guest_ready(vrng)) { + virtio_rng_process(vrng); + } } static void check_rate_limit(void *opaque) @@ -198,6 +212,9 @@ static void virtio_rng_device_realize(DeviceState *dev, Error **errp) vrng->rate_limit_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, check_rate_limit, vrng); vrng->activate_timer = true; + + vrng->vmstate = qemu_add_vm_change_state_handler(virtio_rng_vm_state_change, + vrng); } static void virtio_rng_device_unrealize(DeviceState *dev, Error **errp) @@ -205,6 +222,7 @@ static void virtio_rng_device_unrealize(DeviceState *dev, Error **errp) VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtIORNG *vrng = VIRTIO_RNG(dev); + qemu_del_vm_change_state_handler(vrng->vmstate); timer_del(vrng->rate_limit_timer); timer_free(vrng->rate_limit_timer); virtio_cleanup(vdev); @@ -218,7 +236,6 @@ static const VMStateDescription vmstate_virtio_rng = { VMSTATE_VIRTIO_DEVICE, VMSTATE_END_OF_LIST() }, - .post_load = virtio_rng_post_load, }; static Property virtio_rng_properties[] = { diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 82b6060b2a..03592c542a 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -1528,7 +1528,18 @@ static void virtio_queue_notify_vq(VirtQueue *vq) void virtio_queue_notify(VirtIODevice *vdev, int n) { - virtio_queue_notify_vq(&vdev->vq[n]); + VirtQueue *vq = &vdev->vq[n]; + + if (unlikely(!vq->vring.desc || vdev->broken)) { + return; + } + + trace_virtio_queue_notify(vdev, vq - vdev->vq, vq); + if (vq->handle_aio_output) { + event_notifier_set(&vq->host_notifier); + } else if (vq->handle_output) { + vq->handle_output(vdev, vq); + } } uint16_t virtio_queue_vector(VirtIODevice *vdev, int n) diff --git a/include/block/aio.h b/include/block/aio.h index 677b6ffc25..406e32305a 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -511,6 +511,15 @@ void aio_co_schedule(AioContext *ctx, struct Coroutine *co); void aio_co_wake(struct Coroutine *co); /** + * aio_co_enter: + * @ctx: the context to run the coroutine + * @co: the coroutine to run + * + * Enter a coroutine in the specified AioContext. + */ +void aio_co_enter(AioContext *ctx, struct Coroutine *co); + +/** * Return the AioContext whose event loop runs in the current thread. * * If called from an IOThread this will be the IOThread's AioContext. If diff --git a/include/block/block.h b/include/block/block.h index 5149260827..144df0ddfb 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -366,6 +366,8 @@ void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp); void bdrv_invalidate_cache_all(Error **errp); int bdrv_inactivate_all(void); +void blk_resume_after_migration(Error **errp); + /* Ensure contents are flushed to disk. */ int bdrv_flush(BlockDriverState *bs); int coroutine_fn bdrv_co_flush(BlockDriverState *bs); @@ -379,12 +381,13 @@ void bdrv_drain_all(void); #define BDRV_POLL_WHILE(bs, cond) ({ \ bool waited_ = false; \ + bool busy_ = true; \ BlockDriverState *bs_ = (bs); \ AioContext *ctx_ = bdrv_get_aio_context(bs_); \ if (aio_context_in_iothread(ctx_)) { \ - while ((cond)) { \ - aio_poll(ctx_, true); \ - waited_ = true; \ + while ((cond) || busy_) { \ + busy_ = aio_poll(ctx_, (cond)); \ + waited_ |= !!(cond) | busy_; \ } \ } else { \ assert(qemu_get_current_aio_context() == \ @@ -396,11 +399,16 @@ void bdrv_drain_all(void); */ \ assert(!bs_->wakeup); \ bs_->wakeup = true; \ - while ((cond)) { \ - aio_context_release(ctx_); \ - aio_poll(qemu_get_aio_context(), true); \ - aio_context_acquire(ctx_); \ - waited_ = true; \ + while (busy_) { \ + if ((cond)) { \ + waited_ = busy_ = true; \ + aio_context_release(ctx_); \ + aio_poll(qemu_get_aio_context(), true); \ + aio_context_acquire(ctx_); \ + } else { \ + busy_ = aio_poll(ctx_, false); \ + waited_ |= busy_; \ + } \ } \ bs_->wakeup = false; \ } \ @@ -426,6 +434,8 @@ int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base, int64_t sector_num, int nb_sectors, int *pnum); bool bdrv_is_read_only(BlockDriverState *bs); +int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, Error **errp); +int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp); bool bdrv_is_sg(BlockDriverState *bs); bool bdrv_is_inserted(BlockDriverState *bs); int bdrv_media_changed(BlockDriverState *bs); @@ -501,7 +511,7 @@ int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, void bdrv_img_create(const char *filename, const char *fmt, const char *base_filename, const char *base_fmt, char *options, uint64_t img_size, int flags, - Error **errp, bool quiet); + bool quiet, Error **errp); /* Returns the alignment in bytes that is required so that no bounce buffer * is required throughout the stack */ @@ -556,6 +566,11 @@ bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag); AioContext *bdrv_get_aio_context(BlockDriverState *bs); /** + * Transfer control to @co in the aio context of @bs + */ +void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co); + +/** * bdrv_set_aio_context: * * Changes the #AioContext used for fd handlers, timers, and BHs by this @@ -571,6 +586,22 @@ void bdrv_io_plug(BlockDriverState *bs); void bdrv_io_unplug(BlockDriverState *bs); /** + * bdrv_parent_drained_begin: + * + * Begin a quiesced section of all users of @bs. This is part of + * bdrv_drained_begin. + */ +void bdrv_parent_drained_begin(BlockDriverState *bs); + +/** + * bdrv_parent_drained_end: + * + * End a quiesced section of all users of @bs. This is part of + * bdrv_drained_end. + */ +void bdrv_parent_drained_end(BlockDriverState *bs); + +/** * bdrv_drained_begin: * * Begin a quiesced section for exclusive access to the BDS, by disabling diff --git a/include/block/block_int.h b/include/block/block_int.h index 59400bd848..4f8cd29ae4 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -805,16 +805,16 @@ void commit_start(const char *job_id, BlockDriverState *bs, * a node name should be autogenerated. * @cb: Completion function for the job. * @opaque: Opaque pointer value passed to @cb. - * @errp: Error object. * @auto_complete: Auto complete the job. + * @errp: Error object. * */ void commit_active_start(const char *job_id, BlockDriverState *bs, BlockDriverState *base, int creation_flags, int64_t speed, BlockdevOnError on_error, const char *filter_node_name, - BlockCompletionFunc *cb, void *opaque, Error **errp, - bool auto_complete); + BlockCompletionFunc *cb, void *opaque, + bool auto_complete, Error **errp); /* * mirror_start: * @job_id: The id of the newly-created job, or %NULL to use the diff --git a/include/crypto/block.h b/include/crypto/block.h index b6971de921..4a053a3ffa 100644 --- a/include/crypto/block.h +++ b/include/crypto/block.h @@ -30,23 +30,23 @@ typedef struct QCryptoBlock QCryptoBlock; * and QCryptoBlockOpenOptions in qapi/crypto.json */ typedef ssize_t (*QCryptoBlockReadFunc)(QCryptoBlock *block, + void *opaque, size_t offset, uint8_t *buf, size_t buflen, - Error **errp, - void *opaque); + Error **errp); typedef ssize_t (*QCryptoBlockInitFunc)(QCryptoBlock *block, + void *opaque, size_t headerlen, - Error **errp, - void *opaque); + Error **errp); typedef ssize_t (*QCryptoBlockWriteFunc)(QCryptoBlock *block, + void *opaque, size_t offset, const uint8_t *buf, size_t buflen, - Error **errp, - void *opaque); + Error **errp); /** * qcrypto_block_has_format: diff --git a/include/exec/memory.h b/include/exec/memory.h index e39256ad03..99e0f54d86 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -55,6 +55,8 @@ typedef enum { IOMMU_RW = 3, } IOMMUAccessFlags; +#define IOMMU_ACCESS_FLAG(r, w) (((r) ? IOMMU_RO : 0) | ((w) ? IOMMU_WO : 0)) + struct IOMMUTLBEntry { AddressSpace *target_as; hwaddr iova; @@ -77,13 +79,30 @@ typedef enum { #define IOMMU_NOTIFIER_ALL (IOMMU_NOTIFIER_MAP | IOMMU_NOTIFIER_UNMAP) +struct IOMMUNotifier; +typedef void (*IOMMUNotify)(struct IOMMUNotifier *notifier, + IOMMUTLBEntry *data); + struct IOMMUNotifier { - void (*notify)(struct IOMMUNotifier *notifier, IOMMUTLBEntry *data); + IOMMUNotify notify; IOMMUNotifierFlag notifier_flags; + /* Notify for address space range start <= addr <= end */ + hwaddr start; + hwaddr end; QLIST_ENTRY(IOMMUNotifier) node; }; typedef struct IOMMUNotifier IOMMUNotifier; +static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn, + IOMMUNotifierFlag flags, + hwaddr start, hwaddr end) +{ + n->notify = fn; + n->notifier_flags = flags; + n->start = start; + n->end = end; +} + /* New-style MMIO accessors can indicate that the transaction failed. * A zero (MEMTX_OK) response means success; anything else is a failure * of some kind. The memory subsystem will bitwise-OR together results @@ -174,6 +193,8 @@ struct MemoryRegionIOMMUOps { void (*notify_flag_changed)(MemoryRegion *iommu, IOMMUNotifierFlag old_flags, IOMMUNotifierFlag new_flags); + /* Set this up to provide customized IOMMU replay function */ + void (*replay)(MemoryRegion *iommu, IOMMUNotifier *notifier); }; typedef struct CoalescedMemoryRange CoalescedMemoryRange; @@ -222,6 +243,9 @@ struct MemoryRegion { IOMMUNotifierFlag iommu_notify_flags; }; +#define IOMMU_NOTIFIER_FOREACH(n, mr) \ + QLIST_FOREACH((n), &(mr)->iommu_notify, node) + /** * MemoryListener: callbacks structure for updates to the physical memory map * @@ -668,6 +692,21 @@ void memory_region_notify_iommu(MemoryRegion *mr, IOMMUTLBEntry entry); /** + * memory_region_notify_one: notify a change in an IOMMU translation + * entry to a single notifier + * + * This works just like memory_region_notify_iommu(), but it only + * notifies a specific notifier, not all of them. + * + * @notifier: the notifier to be notified + * @entry: the new entry in the IOMMU translation table. The entry + * replaces all old entries for the same virtual I/O address range. + * Deleted entries have .@perm == 0. + */ +void memory_region_notify_one(IOMMUNotifier *notifier, + IOMMUTLBEntry *entry); + +/** * memory_region_register_iommu_notifier: register a notifier for changes to * IOMMU translation entries. * @@ -693,6 +732,14 @@ void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n, bool is_write); /** + * memory_region_iommu_replay_all: replay existing IOMMU translations + * to all the notifiers registered. + * + * @mr: the memory region to observe + */ +void memory_region_iommu_replay_all(MemoryRegion *mr); + +/** * memory_region_unregister_iommu_notifier: unregister a notifier for * changes to IOMMU translation entries. * @@ -871,6 +918,53 @@ void memory_region_set_dirty(MemoryRegion *mr, hwaddr addr, */ bool memory_region_test_and_clear_dirty(MemoryRegion *mr, hwaddr addr, hwaddr size, unsigned client); + +/** + * memory_region_snapshot_and_clear_dirty: Get a snapshot of the dirty + * bitmap and clear it. + * + * Creates a snapshot of the dirty bitmap, clears the dirty bitmap and + * returns the snapshot. The snapshot can then be used to query dirty + * status, using memory_region_snapshot_get_dirty. Unlike + * memory_region_test_and_clear_dirty this allows to query the same + * page multiple times, which is especially useful for display updates + * where the scanlines often are not page aligned. + * + * The dirty bitmap region which gets copyed into the snapshot (and + * cleared afterwards) can be larger than requested. The boundaries + * are rounded up/down so complete bitmap longs (covering 64 pages on + * 64bit hosts) can be copied over into the bitmap snapshot. Which + * isn't a problem for display updates as the extra pages are outside + * the visible area, and in case the visible area changes a full + * display redraw is due anyway. Should other use cases for this + * function emerge we might have to revisit this implementation + * detail. + * + * Use g_free to release DirtyBitmapSnapshot. + * + * @mr: the memory region being queried. + * @addr: the address (relative to the start of the region) being queried. + * @size: the size of the range being queried. + * @client: the user of the logging information; typically %DIRTY_MEMORY_VGA. + */ +DirtyBitmapSnapshot *memory_region_snapshot_and_clear_dirty(MemoryRegion *mr, + hwaddr addr, + hwaddr size, + unsigned client); + +/** + * memory_region_snapshot_get_dirty: Check whether a range of bytes is dirty + * in the specified dirty bitmap snapshot. + * + * @mr: the memory region being queried. + * @snap: the dirty bitmap snapshot + * @addr: the address (relative to the start of the region) being queried. + * @size: the size of the range being queried. + */ +bool memory_region_snapshot_get_dirty(MemoryRegion *mr, + DirtyBitmapSnapshot *snap, + hwaddr addr, hwaddr size); + /** * memory_region_sync_dirty_bitmap: Synchronize a region's dirty bitmap with * any external TLBs (e.g. kvm) @@ -1426,13 +1520,11 @@ void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val); struct MemoryRegionCache { hwaddr xlat; - void *ptr; hwaddr len; - MemoryRegion *mr; - bool is_write; + AddressSpace *as; }; -#define MEMORY_REGION_CACHE_INVALID ((MemoryRegionCache) { .mr = NULL }) +#define MEMORY_REGION_CACHE_INVALID ((MemoryRegionCache) { .as = NULL }) /* address_space_cache_init: prepare for repeated access to a physical * memory region @@ -1688,7 +1780,7 @@ address_space_read_cached(MemoryRegionCache *cache, hwaddr addr, void *buf, int len) { assert(addr < cache->len && len <= cache->len - addr); - memcpy(buf, cache->ptr + addr, len); + address_space_read(cache->as, cache->xlat + addr, MEMTXATTRS_UNSPECIFIED, buf, len); } /** @@ -1704,7 +1796,7 @@ address_space_write_cached(MemoryRegionCache *cache, hwaddr addr, void *buf, int len) { assert(addr < cache->len && len <= cache->len - addr); - memcpy(cache->ptr + addr, buf, len); + address_space_write(cache->as, cache->xlat + addr, MEMTXATTRS_UNSPECIFIED, buf, len); } #endif diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index 29647303b0..dbe2f08d47 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -53,7 +53,7 @@ static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset) } long qemu_getrampagesize(void); -ram_addr_t last_ram_offset(void); +unsigned long last_ram_page(void); RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, bool share, const char *mem_path, Error **errp); @@ -343,6 +343,13 @@ bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, ram_addr_t length, unsigned client); +DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty + (ram_addr_t start, ram_addr_t length, unsigned client); + +bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap, + ram_addr_t start, + ram_addr_t length); + static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start, ram_addr_t length) { @@ -354,11 +361,13 @@ static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start, static inline uint64_t cpu_physical_memory_sync_dirty_bitmap(unsigned long *dest, + RAMBlock *rb, ram_addr_t start, ram_addr_t length, - int64_t *real_dirty_pages) + uint64_t *real_dirty_pages) { ram_addr_t addr; + start = rb->offset + start; unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); uint64_t num_dirty = 0; @@ -411,7 +420,5 @@ uint64_t cpu_physical_memory_sync_dirty_bitmap(unsigned long *dest, return num_dirty; } - -void migration_bitmap_extend(ram_addr_t old, ram_addr_t new); #endif #endif diff --git a/include/hw/acpi/vmgenid.h b/include/hw/acpi/vmgenid.h index db7fa0e633..7beb9592fb 100644 --- a/include/hw/acpi/vmgenid.h +++ b/include/hw/acpi/vmgenid.h @@ -21,8 +21,10 @@ typedef struct VmGenIdState { DeviceClass parent_obj; QemuUUID guid; /* The 128-bit GUID seen by the guest */ uint8_t vmgenid_addr_le[8]; /* Address of the GUID (little-endian) */ + bool write_pointer_available; } VmGenIdState; +/* returns NULL unless there is exactly one device */ static inline Object *find_vmgenid_dev(void) { return object_resolve_path_type("", VMGENID_DEVICE, NULL); diff --git a/include/hw/arm/aspeed_soc.h b/include/hw/arm/aspeed_soc.h index dbec0c1598..4c5fc66a1e 100644 --- a/include/hw/arm/aspeed_soc.h +++ b/include/hw/arm/aspeed_soc.h @@ -20,6 +20,7 @@ #include "hw/i2c/aspeed_i2c.h" #include "hw/ssi/aspeed_smc.h" #include "hw/watchdog/wdt_aspeed.h" +#include "hw/net/ftgmac100.h" #define ASPEED_SPIS_NUM 2 @@ -39,6 +40,7 @@ typedef struct AspeedSoCState { AspeedSMCState spi[ASPEED_SPIS_NUM]; AspeedSDMCState sdmc; AspeedWDTState wdt; + FTGMAC100State ftgmac100; } AspeedSoCState; #define TYPE_ASPEED_SOC "aspeed-soc" diff --git a/include/hw/boards.h b/include/hw/boards.h index 269d0ba399..31d9c72fb0 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -135,6 +135,7 @@ struct MachineClass { bool rom_file_has_mr; int minimum_page_bits; bool has_hotpluggable_cpus; + int numa_mem_align_shift; HotplugHandler *(*get_hotplug_handler)(MachineState *machine, DeviceState *dev); diff --git a/include/hw/compat.h b/include/hw/compat.h index fc8c3e0600..846b90eb67 100644 --- a/include/hw/compat.h +++ b/include/hw/compat.h @@ -1,6 +1,9 @@ #ifndef HW_COMPAT_H #define HW_COMPAT_H +#define HW_COMPAT_2_9 \ + /* empty */ + #define HW_COMPAT_2_8 \ {\ .driver = "fw_cfg_mem",\ @@ -131,6 +134,10 @@ .driver = "fw_cfg_io",\ .property = "dma_enabled",\ .value = "off",\ + },{\ + .driver = "vmgenid",\ + .property = "x-write-pointer-available",\ + .value = "off",\ }, #define HW_COMPAT_2_3 \ diff --git a/include/hw/devices.h b/include/hw/devices.h index 7475b714de..861ddea8af 100644 --- a/include/hw/devices.h +++ b/include/hw/devices.h @@ -62,9 +62,4 @@ void tc6393xb_gpio_out_set(TC6393xbState *s, int line, qemu_irq *tc6393xb_gpio_in_get(TC6393xbState *s); qemu_irq tc6393xb_l3v_get(TC6393xbState *s); -/* sm501.c */ -void sm501_init(struct MemoryRegion *address_space_mem, uint32_t base, - uint32_t local_mem_bytes, qemu_irq irq, - Chardev *chr); - #endif diff --git a/include/hw/i386/apic_internal.h b/include/hw/i386/apic_internal.h index 20ad28c95b..1209eb483a 100644 --- a/include/hw/i386/apic_internal.h +++ b/include/hw/i386/apic_internal.h @@ -189,8 +189,6 @@ struct APICCommonState { DeviceState *vapic; hwaddr vapic_paddr; /* note: persistence via kvmvapic */ bool legacy_instance_id; - - int apic_irq_delivered; /* for saving static variable */ }; typedef struct VAPICState { diff --git a/include/hw/i386/ich9.h b/include/hw/i386/ich9.h index 18dcca7ebc..673d13d28f 100644 --- a/include/hw/i386/ich9.h +++ b/include/hw/i386/ich9.h @@ -21,7 +21,6 @@ void ich9_lpc_pm_init(PCIDevice *pci_lpc, bool smm_enabled); I2CBus *ich9_smb_init(PCIBus *bus, int devfn, uint32_t smb_io_base); void ich9_generate_smi(void); -void ich9_generate_nmi(void); #define ICH9_CC_SIZE (16 * 1024) /* 16KB. Chipset configuration registers */ diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index fe645aa93a..3e51876b75 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -63,6 +63,7 @@ typedef union VTD_IR_TableEntry VTD_IR_TableEntry; typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress; typedef struct VTDIrq VTDIrq; typedef struct VTD_MSIMessage VTD_MSIMessage; +typedef struct IntelIOMMUNotifierNode IntelIOMMUNotifierNode; /* Context-Entry */ struct VTDContextEntry { @@ -83,6 +84,8 @@ struct VTDAddressSpace { uint8_t devfn; AddressSpace as; MemoryRegion iommu; + MemoryRegion root; + MemoryRegion sys_alias; MemoryRegion iommu_ir; /* Interrupt region: 0xfeeXXXXX */ IntelIOMMUState *iommu_state; VTDContextCacheEntry context_cache_entry; @@ -247,6 +250,11 @@ struct VTD_MSIMessage { /* When IR is enabled, all MSI/MSI-X data bits should be zero */ #define VTD_IR_MSI_DATA (0) +struct IntelIOMMUNotifierNode { + VTDAddressSpace *vtd_as; + QLIST_ENTRY(IntelIOMMUNotifierNode) next; +}; + /* The iommu (DMAR) device state struct */ struct IntelIOMMUState { X86IOMMUState x86_iommu; @@ -284,6 +292,8 @@ struct IntelIOMMUState { MemoryRegionIOMMUOps iommu_ops; GHashTable *vtd_as_by_busptr; /* VTDBus objects indexed by PCIBus* reference */ VTDBus *vtd_as_by_bus_num[VTD_PCI_BUS_MAX]; /* VTDBus objects indexed by bus number */ + /* list of registered notifiers */ + QLIST_HEAD(, IntelIOMMUNotifierNode) notifiers_list; /* interrupt remapping */ bool intr_enabled; /* Whether guest enabled IR */ diff --git a/include/hw/net/cadence_gem.h b/include/hw/net/cadence_gem.h index c469ffe69b..35de622063 100644 --- a/include/hw/net/cadence_gem.h +++ b/include/hw/net/cadence_gem.h @@ -50,6 +50,7 @@ typedef struct CadenceGEMState { uint8_t num_priority_queues; uint8_t num_type1_screeners; uint8_t num_type2_screeners; + uint32_t revision; /* GEM registers backing store */ uint32_t regs[CADENCE_GEM_MAXREG]; diff --git a/include/hw/net/ftgmac100.h b/include/hw/net/ftgmac100.h new file mode 100644 index 0000000000..d9bc589fbf --- /dev/null +++ b/include/hw/net/ftgmac100.h @@ -0,0 +1,64 @@ +/* + * Faraday FTGMAC100 Gigabit Ethernet + * + * Copyright (C) 2016-2017, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ + +#ifndef FTGMAC100_H +#define FTGMAC100_H + +#define TYPE_FTGMAC100 "ftgmac100" +#define FTGMAC100(obj) OBJECT_CHECK(FTGMAC100State, (obj), TYPE_FTGMAC100) + +#include "hw/sysbus.h" +#include "net/net.h" + +typedef struct FTGMAC100State { + /*< private >*/ + SysBusDevice parent_obj; + + /*< public >*/ + NICState *nic; + NICConf conf; + qemu_irq irq; + MemoryRegion iomem; + + uint8_t *frame; + + uint32_t irq_state; + uint32_t isr; + uint32_t ier; + uint32_t rx_enabled; + uint32_t rx_ring; + uint32_t rx_descriptor; + uint32_t tx_ring; + uint32_t tx_descriptor; + uint32_t math[2]; + uint32_t rbsr; + uint32_t itc; + uint32_t aptcr; + uint32_t dblac; + uint32_t revr; + uint32_t fear1; + uint32_t tpafcr; + uint32_t maccr; + uint32_t phycr; + uint32_t phydata; + uint32_t fcr; + + + uint32_t phy_status; + uint32_t phy_control; + uint32_t phy_advertise; + uint32_t phy_int; + uint32_t phy_int_mask; + + bool aspeed; + uint32_t txdes0_edotr; + uint32_t rxdes0_edorr; +} FTGMAC100State; + +#endif diff --git a/include/hw/net/mii.h b/include/hw/net/mii.h index 9fdd7bbe75..6ce48a6d78 100644 --- a/include/hw/net/mii.h +++ b/include/hw/net/mii.h @@ -22,13 +22,20 @@ #define MII_H /* PHY registers */ -#define MII_BMCR 0 -#define MII_BMSR 1 -#define MII_PHYID1 2 -#define MII_PHYID2 3 -#define MII_ANAR 4 -#define MII_ANLPAR 5 -#define MII_ANER 6 +#define MII_BMCR 0 /* Basic mode control register */ +#define MII_BMSR 1 /* Basic mode status register */ +#define MII_PHYID1 2 /* ID register 1 */ +#define MII_PHYID2 3 /* ID register 2 */ +#define MII_ANAR 4 /* Autonegotiation advertisement */ +#define MII_ANLPAR 5 /* Autonegotiation lnk partner abilities */ +#define MII_ANER 6 /* Autonegotiation expansion */ +#define MII_ANNP 7 /* Autonegotiation next page */ +#define MII_ANLPRNP 8 /* Autonegotiation link partner rx next page */ +#define MII_CTRL1000 9 /* 1000BASE-T control */ +#define MII_STAT1000 10 /* 1000BASE-T status */ +#define MII_MDDACR 13 /* MMD access control */ +#define MII_MDDAADR 14 /* MMD access address data */ +#define MII_EXTSTAT 15 /* Extended Status */ #define MII_NSR 16 #define MII_LBREMR 17 #define MII_REC 18 @@ -38,19 +45,33 @@ /* PHY registers fields */ #define MII_BMCR_RESET (1 << 15) #define MII_BMCR_LOOPBACK (1 << 14) -#define MII_BMCR_SPEED (1 << 13) -#define MII_BMCR_AUTOEN (1 << 12) -#define MII_BMCR_FD (1 << 8) +#define MII_BMCR_SPEED100 (1 << 13) /* LSB of Speed (100) */ +#define MII_BMCR_SPEED MII_BMCR_SPEED100 +#define MII_BMCR_AUTOEN (1 << 12) /* Autonegotiation enable */ +#define MII_BMCR_PDOWN (1 << 11) /* Enable low power state */ +#define MII_BMCR_ISOLATE (1 << 10) /* Isolate data paths from MII */ +#define MII_BMCR_ANRESTART (1 << 9) /* Auto negotiation restart */ +#define MII_BMCR_FD (1 << 8) /* Set duplex mode */ +#define MII_BMCR_CTST (1 << 7) /* Collision test */ +#define MII_BMCR_SPEED1000 (1 << 6) /* MSB of Speed (1000) */ -#define MII_BMSR_100TX_FD (1 << 14) -#define MII_BMSR_100TX_HD (1 << 13) -#define MII_BMSR_10T_FD (1 << 12) -#define MII_BMSR_10T_HD (1 << 11) -#define MII_BMSR_MFPS (1 << 6) -#define MII_BMSR_AN_COMP (1 << 5) -#define MII_BMSR_AUTONEG (1 << 3) -#define MII_BMSR_LINK_ST (1 << 2) +#define MII_BMSR_100TX_FD (1 << 14) /* Can do 100mbps, full-duplex */ +#define MII_BMSR_100TX_HD (1 << 13) /* Can do 100mbps, half-duplex */ +#define MII_BMSR_10T_FD (1 << 12) /* Can do 10mbps, full-duplex */ +#define MII_BMSR_10T_HD (1 << 11) /* Can do 10mbps, half-duplex */ +#define MII_BMSR_100T2_FD (1 << 10) /* Can do 100mbps T2, full-duplex */ +#define MII_BMSR_100T2_HD (1 << 9) /* Can do 100mbps T2, half-duplex */ +#define MII_BMSR_EXTSTAT (1 << 8) /* Extended status in register 15 */ +#define MII_BMSR_MFPS (1 << 6) /* MII Frame Preamble Suppression */ +#define MII_BMSR_AN_COMP (1 << 5) /* Auto-negotiation complete */ +#define MII_BMSR_RFAULT (1 << 4) /* Remote fault */ +#define MII_BMSR_AUTONEG (1 << 3) /* Able to do auto-negotiation */ +#define MII_BMSR_LINK_ST (1 << 2) /* Link status */ +#define MII_BMSR_JABBER (1 << 1) /* Jabber detected */ +#define MII_BMSR_EXTCAP (1 << 0) /* Ext-reg capability */ +#define MII_ANAR_PAUSE_ASYM (1 << 11) /* Try for asymetric pause */ +#define MII_ANAR_PAUSE (1 << 10) /* Try for pause */ #define MII_ANAR_TXFD (1 << 8) #define MII_ANAR_TX (1 << 7) #define MII_ANAR_10FD (1 << 6) @@ -58,17 +79,31 @@ #define MII_ANAR_CSMACD (1 << 0) #define MII_ANLPAR_ACK (1 << 14) +#define MII_ANLPAR_PAUSEASY (1 << 11) /* can pause asymmetrically */ +#define MII_ANLPAR_PAUSE (1 << 10) /* can pause */ #define MII_ANLPAR_TXFD (1 << 8) #define MII_ANLPAR_TX (1 << 7) #define MII_ANLPAR_10FD (1 << 6) #define MII_ANLPAR_10 (1 << 5) #define MII_ANLPAR_CSMACD (1 << 0) +#define MII_ANER_NWAY (1 << 0) /* Can do N-way auto-nego */ + +#define MII_CTRL1000_FULL (1 << 9) /* 1000BASE-T full duplex */ +#define MII_CTRL1000_HALF (1 << 8) /* 1000BASE-T half duplex */ + +#define MII_STAT1000_FULL (1 << 11) /* 1000BASE-T full duplex */ +#define MII_STAT1000_HALF (1 << 10) /* 1000BASE-T half duplex */ + /* List of vendor identifiers */ /* RealTek 8201 */ #define RTL8201CP_PHYID1 0x0000 #define RTL8201CP_PHYID2 0x8201 +/* RealTek 8211E */ +#define RTL8211E_PHYID1 0x001c +#define RTL8211E_PHYID2 0xc915 + /* National Semiconductor DP83848 */ #define DP83848_PHYID1 0x2000 #define DP83848_PHYID2 0x5c90 diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h index d22ad8dd3b..3752ddc93a 100644 --- a/include/hw/pci/pci_ids.h +++ b/include/hw/pci/pci_ids.h @@ -207,6 +207,9 @@ #define PCI_VENDOR_ID_MARVELL 0x11ab +#define PCI_VENDOR_ID_SILICON_MOTION 0x126f +#define PCI_DEVICE_ID_SM501 0x0501 + #define PCI_VENDOR_ID_ENSONIQ 0x1274 #define PCI_DEVICE_ID_ENSONIQ_ES1370 0x5000 diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h index fa531d5c26..5524247cdc 100644 --- a/include/hw/ppc/spapr_drc.h +++ b/include/hw/ppc/spapr_drc.h @@ -154,6 +154,7 @@ typedef struct sPAPRDRConnector { bool awaiting_release; bool signalled; bool awaiting_allocation; + bool awaiting_allocation_skippable; /* device pointer, via link property */ DeviceState *dev; diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index b44b476765..4bf86b0ad8 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -113,19 +113,6 @@ typedef struct DeviceClass { * TODO remove once we're there */ bool cannot_instantiate_with_device_add_yet; - /* - * Does this device model survive object_unref(object_new(TNAME))? - * All device models should, and this flag shouldn't exist. Some - * devices crash in object_new(), some crash or hang in - * object_unref(). Makes introspecting properties with - * qmp_device_list_properties() dangerous. Bad, because it's used - * by -device FOO,help. This flag serves to protect that code. - * It should never be set without a comment explaining why it is - * set. - * TODO remove once we're there - */ - bool cannot_destroy_with_object_finalize_yet; - bool hotpluggable; /* callbacks */ @@ -386,7 +373,8 @@ Object *qdev_get_machine(void); /* FIXME: make this a link<> */ void qdev_set_parent_bus(DeviceState *dev, BusState *bus); -extern int qdev_hotplug; +extern bool qdev_hotplug; +extern bool qdev_hot_removed; char *qdev_get_dev_path(DeviceState *dev); diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h index 7ac315331a..1d69fa7a8f 100644 --- a/include/hw/qdev-properties.h +++ b/include/hw/qdev-properties.h @@ -188,7 +188,8 @@ void qdev_prop_set_chr(DeviceState *dev, const char *name, Chardev *value); void qdev_prop_set_netdev(DeviceState *dev, const char *name, NetClientState *value); void qdev_prop_set_drive(DeviceState *dev, const char *name, BlockBackend *value, Error **errp); -void qdev_prop_set_macaddr(DeviceState *dev, const char *name, uint8_t *value); +void qdev_prop_set_macaddr(DeviceState *dev, const char *name, + const uint8_t *value); void qdev_prop_set_enum(DeviceState *dev, const char *name, int value); /* FIXME: Remove opaque pointer properties. */ void qdev_prop_set_ptr(DeviceState *dev, const char *name, void *value); diff --git a/include/hw/s390x/css.h b/include/hw/s390x/css.h index c96c862057..f1f0d7f07a 100644 --- a/include/hw/s390x/css.h +++ b/include/hw/s390x/css.h @@ -23,6 +23,8 @@ #define MAX_CSSID 255 #define MAX_CHPID 255 +#define MAX_ISC 7 + #define MAX_CIWS 62 #define VIRTUAL_CSSID 0xfe @@ -124,9 +126,15 @@ void css_generate_css_crws(uint8_t cssid); void css_clear_sei_pending(void); void css_adapter_interrupt(uint8_t isc); -#define CSS_IO_ADAPTER_VIRTIO 1 -int css_register_io_adapter(uint8_t type, uint8_t isc, bool swap, - bool maskable, uint32_t *id); +typedef enum { + CSS_IO_ADAPTER_VIRTIO = 0, + CSS_IO_ADAPTER_PCI = 1, + CSS_IO_ADAPTER_TYPE_NUMS, +} CssIoAdapterType; + +uint32_t css_get_adapter_id(CssIoAdapterType type, uint8_t isc); +void css_register_io_adapters(CssIoAdapterType type, bool swap, bool maskable, + Error **errp); #ifndef CONFIG_USER_ONLY SubchDev *css_find_subch(uint8_t m, uint8_t cssid, uint8_t ssid, @@ -172,6 +180,11 @@ extern PropertyInfo css_devid_propinfo; #define DEFINE_PROP_CSS_DEV_ID(_n, _s, _f) \ DEFINE_PROP(_n, _s, _f, css_devid_propinfo, CssDevId) +extern PropertyInfo css_devid_ro_propinfo; + +#define DEFINE_PROP_CSS_DEV_ID_RO(_n, _s, _f) \ + DEFINE_PROP(_n, _s, _f, css_devid_ro_propinfo, CssDevId) + /** * Create a subchannel for the given bus id. * diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index 52f633ec89..a45032163d 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -37,10 +37,20 @@ struct vhost_log { vhost_log_chunk_t *log; }; +struct vhost_dev; +struct vhost_iommu { + struct vhost_dev *hdev; + MemoryRegion *mr; + hwaddr iommu_offset; + IOMMUNotifier n; + QLIST_ENTRY(vhost_iommu) iommu_next; +}; + struct vhost_memory; struct vhost_dev { VirtIODevice *vdev; MemoryListener memory_listener; + MemoryListener iommu_listener; struct vhost_memory *mem; int n_mem_sections; MemoryRegionSection *mem_sections; @@ -64,6 +74,7 @@ struct vhost_dev { void *opaque; struct vhost_log *log; QLIST_ENTRY(vhost_dev) entry; + QLIST_HEAD(, vhost_iommu) iommu_list; IOMMUNotifier n; }; diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h index f3a98a3261..f3ffdceca4 100644 --- a/include/hw/virtio/virtio-gpu.h +++ b/include/hw/virtio/virtio-gpu.h @@ -72,6 +72,8 @@ struct virtio_gpu_conf { uint64_t max_hostmem; uint32_t max_outputs; uint32_t flags; + uint32_t xres; + uint32_t yres; }; struct virtio_gpu_ctrl_command { diff --git a/include/hw/virtio/virtio-input.h b/include/hw/virtio/virtio-input.h index 55db31087a..91df57eca4 100644 --- a/include/hw/virtio/virtio-input.h +++ b/include/hw/virtio/virtio-input.h @@ -62,7 +62,10 @@ struct VirtIOInput { VirtQueue *evt, *sts; char *serial; - virtio_input_event *queue; + struct { + virtio_input_event event; + VirtQueueElement *elem; + } *queue; uint32_t qindex, qsize; bool active; diff --git a/include/hw/virtio/virtio-rng.h b/include/hw/virtio/virtio-rng.h index 2d40abdbdb..922dce7cac 100644 --- a/include/hw/virtio/virtio-rng.h +++ b/include/hw/virtio/virtio-rng.h @@ -45,6 +45,8 @@ typedef struct VirtIORNG { QEMUTimer *rate_limit_timer; int64_t quota_remaining; bool activate_timer; + + VMChangeStateEntry *vmstate; } VirtIORNG; #endif diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h index f536f77e68..8c8453cf19 100644 --- a/include/hw/virtio/virtio-scsi.h +++ b/include/hw/virtio/virtio-scsi.h @@ -121,9 +121,25 @@ typedef struct VirtIOSCSIReq { } req; } VirtIOSCSIReq; -void virtio_scsi_common_realize(DeviceState *dev, Error **errp, - VirtIOHandleOutput ctrl, VirtIOHandleOutput evt, - VirtIOHandleOutput cmd); +static inline void virtio_scsi_acquire(VirtIOSCSI *s) +{ + if (s->ctx) { + aio_context_acquire(s->ctx); + } +} + +static inline void virtio_scsi_release(VirtIOSCSI *s) +{ + if (s->ctx) { + aio_context_release(s->ctx); + } +} + +void virtio_scsi_common_realize(DeviceState *dev, + VirtIOHandleOutput ctrl, + VirtIOHandleOutput evt, + VirtIOHandleOutput cmd, + Error **errp); void virtio_scsi_common_unrealize(DeviceState *dev, Error **errp); bool virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq); diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 15efcf2057..7b6edbafd7 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -34,7 +34,7 @@ struct VirtQueue; static inline hwaddr vring_align(hwaddr addr, unsigned long align) { - return (addr + align - 1) & ~(align - 1); + return QEMU_ALIGN_UP(addr, align); } typedef struct VirtQueue VirtQueue; diff --git a/include/migration/migration.h b/include/migration/migration.h index 5720c884f4..ba1a16cbc1 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -128,18 +128,6 @@ struct MigrationIncomingState { MigrationIncomingState *migration_incoming_get_current(void); void migration_incoming_state_destroy(void); -/* - * An outstanding page request, on the source, having been received - * and queued - */ -struct MigrationSrcPageRequest { - RAMBlock *rb; - hwaddr offset; - hwaddr len; - - QSIMPLEQ_ENTRY(MigrationSrcPageRequest) next_req; -}; - struct MigrationState { size_t bytes_xfer; @@ -166,14 +154,9 @@ struct MigrationState int64_t total_time; int64_t downtime; int64_t expected_downtime; - int64_t dirty_pages_rate; - int64_t dirty_bytes_rate; bool enabled_capabilities[MIGRATION_CAPABILITY__MAX]; int64_t xbzrle_cache_size; int64_t setup_time; - int64_t dirty_sync_count; - /* Count of requests incoming from destination */ - int64_t postcopy_requests; /* Flag set once the migration has been asked to enter postcopy */ bool start_postcopy; @@ -186,11 +169,6 @@ struct MigrationState /* Flag set once the migration thread called bdrv_inactivate_all */ bool block_inactive; - /* Queue of outstanding page requests from the destination */ - QemuMutex src_page_req_mutex; - QSIMPLEQ_HEAD(src_page_requests, MigrationSrcPageRequest) src_page_requests; - /* The RAMBlock used in the last src_page_request */ - RAMBlock *last_req_rb; /* The semaphore is used to notify COLO thread that failover is finished */ QemuSemaphore colo_exit_sem; @@ -256,11 +234,11 @@ void remove_migration_state_change_notifier(Notifier *notify); MigrationState *migrate_init(const MigrationParams *params); bool migration_is_blocked(Error **errp); bool migration_in_setup(MigrationState *); -bool migration_is_idle(MigrationState *s); +bool migration_is_idle(void); bool migration_has_finished(MigrationState *); bool migration_has_failed(MigrationState *); /* True if outgoing migration has entered postcopy phase */ -bool migration_in_postcopy(MigrationState *); +bool migration_in_postcopy(void); /* ...and after the device transmission */ bool migration_in_postcopy_after_devices(MigrationState *); MigrationState *migrate_get_current(void); @@ -272,15 +250,14 @@ void migrate_decompress_threads_join(void); uint64_t ram_bytes_remaining(void); uint64_t ram_bytes_transferred(void); uint64_t ram_bytes_total(void); +uint64_t ram_dirty_sync_count(void); +uint64_t ram_dirty_pages_rate(void); +uint64_t ram_postcopy_requests(void); void free_xbzrle_decoded_buf(void); void acct_update_position(QEMUFile *f, size_t size, bool zero); -uint64_t dup_mig_bytes_transferred(void); uint64_t dup_mig_pages_transferred(void); -uint64_t skipped_mig_bytes_transferred(void); -uint64_t skipped_mig_pages_transferred(void); -uint64_t norm_mig_bytes_transferred(void); uint64_t norm_mig_pages_transferred(void); uint64_t xbzrle_mig_bytes_transferred(void); uint64_t xbzrle_mig_pages_transferred(void); @@ -293,8 +270,7 @@ void ram_debug_dump_bitmap(unsigned long *todump, bool expected); /* For outgoing discard bitmap */ int ram_postcopy_send_discard_bitmap(MigrationState *ms); /* For incoming postcopy discard */ -int ram_discard_range(MigrationIncomingState *mis, const char *block_name, - uint64_t start, size_t length); +int ram_discard_range(const char *block_name, uint64_t start, size_t length); int ram_postcopy_incoming_init(MigrationIncomingState *mis); void ram_postcopy_migrated_memory_release(MigrationState *ms); @@ -377,9 +353,8 @@ void savevm_skip_configuration(void); int global_state_store(void); void global_state_store_running(void); -void flush_page_queue(MigrationState *ms); -int ram_save_queue_pages(MigrationState *ms, const char *rbname, - ram_addr_t start, ram_addr_t len); +void migration_page_queue_free(void); +int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len); uint64_t ram_pagesize_summary(void); PostcopyState postcopy_state_get(void); diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index f2dbf8410a..dad3984c07 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -56,7 +56,8 @@ typedef struct SaveVMHandlers { /* This runs outside the iothread lock! */ int (*save_live_setup)(QEMUFile *f, void *opaque); - void (*save_live_pending)(QEMUFile *f, void *opaque, uint64_t max_size, + void (*save_live_pending)(QEMUFile *f, void *opaque, + uint64_t threshold_size, uint64_t *non_postcopiable_pending, uint64_t *postcopiable_pending); LoadStateHandler *load_state; diff --git a/include/net/eth.h b/include/net/eth.h index afeb45be34..09054a506d 100644 --- a/include/net/eth.h +++ b/include/net/eth.h @@ -209,6 +209,7 @@ struct tcp_hdr { #define ETH_P_IPV6 (0x86dd) #define ETH_P_VLAN (0x8100) #define ETH_P_DVLAN (0x88a8) +#define ETH_P_NCSI (0x88f8) #define ETH_P_UNKNOWN (0xffff) #define VLAN_VID_MASK 0x0fff #define IP_HEADER_VERSION_4 (4) diff --git a/include/qemu/bitmap.h b/include/qemu/bitmap.h index 63ea2d0b1e..c318da12d7 100644 --- a/include/qemu/bitmap.h +++ b/include/qemu/bitmap.h @@ -220,6 +220,8 @@ void bitmap_set(unsigned long *map, long i, long len); void bitmap_set_atomic(unsigned long *map, long i, long len); void bitmap_clear(unsigned long *map, long start, long nr); bool bitmap_test_and_clear_atomic(unsigned long *map, long start, long nr); +void bitmap_copy_and_clear_atomic(unsigned long *dst, unsigned long *src, + long nr); unsigned long bitmap_find_next_zero_area(unsigned long *map, unsigned long size, unsigned long start, diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h index e0ce9ffb28..18e610083a 100644 --- a/include/qemu/compiler.h +++ b/include/qemu/compiler.h @@ -24,17 +24,9 @@ #define QEMU_NORETURN __attribute__ ((__noreturn__)) -#if QEMU_GNUC_PREREQ(3, 4) #define QEMU_WARN_UNUSED_RESULT __attribute__((warn_unused_result)) -#else -#define QEMU_WARN_UNUSED_RESULT -#endif -#if QEMU_GNUC_PREREQ(4, 0) #define QEMU_SENTINEL __attribute__((sentinel)) -#else -#define QEMU_SENTINEL -#endif #if QEMU_GNUC_PREREQ(4, 3) #define QEMU_ARTIFICIAL __attribute__((always_inline, artificial)) diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h index e60beaff81..a4509bd977 100644 --- a/include/qemu/coroutine.h +++ b/include/qemu/coroutine.h @@ -77,6 +77,11 @@ void qemu_coroutine_enter(Coroutine *coroutine); void qemu_coroutine_enter_if_inactive(Coroutine *co); /** + * Transfer control to a coroutine and associate it with ctx + */ +void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co); + +/** * Transfer control back to a coroutine's caller * * This function does not return until the coroutine is re-entered using diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h index a38be42253..95cf4f4163 100644 --- a/include/qemu/host-utils.h +++ b/include/qemu/host-utils.h @@ -115,37 +115,7 @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c) */ static inline int clz32(uint32_t val) { -#if QEMU_GNUC_PREREQ(3, 4) return val ? __builtin_clz(val) : 32; -#else - /* Binary search for the leading one bit. */ - int cnt = 0; - - if (!(val & 0xFFFF0000U)) { - cnt += 16; - val <<= 16; - } - if (!(val & 0xFF000000U)) { - cnt += 8; - val <<= 8; - } - if (!(val & 0xF0000000U)) { - cnt += 4; - val <<= 4; - } - if (!(val & 0xC0000000U)) { - cnt += 2; - val <<= 2; - } - if (!(val & 0x80000000U)) { - cnt++; - val <<= 1; - } - if (!(val & 0x80000000U)) { - cnt++; - } - return cnt; -#endif } /** @@ -168,19 +138,7 @@ static inline int clo32(uint32_t val) */ static inline int clz64(uint64_t val) { -#if QEMU_GNUC_PREREQ(3, 4) return val ? __builtin_clzll(val) : 64; -#else - int cnt = 0; - - if (!(val >> 32)) { - cnt += 32; - } else { - val >>= 32; - } - - return cnt + clz32(val); -#endif } /** @@ -203,39 +161,7 @@ static inline int clo64(uint64_t val) */ static inline int ctz32(uint32_t val) { -#if QEMU_GNUC_PREREQ(3, 4) return val ? __builtin_ctz(val) : 32; -#else - /* Binary search for the trailing one bit. */ - int cnt; - - cnt = 0; - if (!(val & 0x0000FFFFUL)) { - cnt += 16; - val >>= 16; - } - if (!(val & 0x000000FFUL)) { - cnt += 8; - val >>= 8; - } - if (!(val & 0x0000000FUL)) { - cnt += 4; - val >>= 4; - } - if (!(val & 0x00000003UL)) { - cnt += 2; - val >>= 2; - } - if (!(val & 0x00000001UL)) { - cnt++; - val >>= 1; - } - if (!(val & 0x00000001UL)) { - cnt++; - } - - return cnt; -#endif } /** @@ -258,19 +184,7 @@ static inline int cto32(uint32_t val) */ static inline int ctz64(uint64_t val) { -#if QEMU_GNUC_PREREQ(3, 4) return val ? __builtin_ctzll(val) : 64; -#else - int cnt; - - cnt = 0; - if (!((uint32_t)val)) { - cnt += 32; - val >>= 32; - } - - return cnt + ctz32(val); -#endif } /** @@ -322,15 +236,7 @@ static inline int clrsb64(uint64_t val) */ static inline int ctpop8(uint8_t val) { -#if QEMU_GNUC_PREREQ(3, 4) return __builtin_popcount(val); -#else - val = (val & 0x55) + ((val >> 1) & 0x55); - val = (val & 0x33) + ((val >> 2) & 0x33); - val = (val + (val >> 4)) & 0x0f; - - return val; -#endif } /** @@ -339,16 +245,7 @@ static inline int ctpop8(uint8_t val) */ static inline int ctpop16(uint16_t val) { -#if QEMU_GNUC_PREREQ(3, 4) return __builtin_popcount(val); -#else - val = (val & 0x5555) + ((val >> 1) & 0x5555); - val = (val & 0x3333) + ((val >> 2) & 0x3333); - val = (val + (val >> 4)) & 0x0f0f; - val = (val + (val >> 8)) & 0x00ff; - - return val; -#endif } /** @@ -357,16 +254,7 @@ static inline int ctpop16(uint16_t val) */ static inline int ctpop32(uint32_t val) { -#if QEMU_GNUC_PREREQ(3, 4) return __builtin_popcount(val); -#else - val = (val & 0x55555555) + ((val >> 1) & 0x55555555); - val = (val & 0x33333333) + ((val >> 2) & 0x33333333); - val = (val + (val >> 4)) & 0x0f0f0f0f; - val = (val * 0x01010101) >> 24; - - return val; -#endif } /** @@ -375,16 +263,7 @@ static inline int ctpop32(uint32_t val) */ static inline int ctpop64(uint64_t val) { -#if QEMU_GNUC_PREREQ(3, 4) return __builtin_popcountll(val); -#else - val = (val & 0x5555555555555555ULL) + ((val >> 1) & 0x5555555555555555ULL); - val = (val & 0x3333333333333333ULL) + ((val >> 2) & 0x3333333333333333ULL); - val = (val + (val >> 4)) & 0x0f0f0f0f0f0f0f0fULL; - val = (val * 0x0101010101010101ULL) >> 56; - - return val; -#endif } /** diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h index 5f1bab9b3e..af285321b8 100644 --- a/include/qemu/sockets.h +++ b/include/qemu/sockets.h @@ -36,8 +36,9 @@ int inet_ai_family_from_address(InetSocketAddress *addr, Error **errp); InetSocketAddress *inet_parse(const char *str, Error **errp); int inet_connect(const char *str, Error **errp); -int inet_connect_saddr(InetSocketAddress *saddr, Error **errp, - NonBlockingConnectHandler *callback, void *opaque); +int inet_connect_saddr(InetSocketAddress *saddr, + NonBlockingConnectHandler *callback, void *opaque, + Error **errp); NetworkAddressFamily inet_netfamily(int family); @@ -45,8 +46,8 @@ int unix_listen(const char *path, char *ostr, int olen, Error **errp); int unix_connect(const char *path, Error **errp); SocketAddress *socket_parse(const char *str, Error **errp); -int socket_connect(SocketAddress *addr, Error **errp, - NonBlockingConnectHandler *callback, void *opaque); +int socket_connect(SocketAddress *addr, NonBlockingConnectHandler *callback, + void *opaque, Error **errp); int socket_listen(SocketAddress *addr, Error **errp); void socket_listen_cleanup(int fd, Error **errp); int socket_dgram(SocketAddress *remote, SocketAddress *local, Error **errp); @@ -119,4 +120,15 @@ SocketAddress *socket_remote_address(int fd, Error **errp); */ char *socket_address_to_string(struct SocketAddress *addr, Error **errp); +/** + * socket_address_crumple: + * @addr_flat: the socket address to crumple + * + * Convert SocketAddressFlat to SocketAddress. Caller is responsible + * for freeing with qapi_free_SocketAddress(). + * + * Returns: the argument converted to SocketAddress. + */ +SocketAddress *socket_address_crumple(SocketAddressFlat *addr_flat); + #endif /* QEMU_SOCKETS_H */ diff --git a/include/qemu/thread-win32.h b/include/qemu/thread-win32.h index 5fb6541ae9..4c4a261cf4 100644 --- a/include/qemu/thread-win32.h +++ b/include/qemu/thread-win32.h @@ -4,8 +4,7 @@ #include <windows.h> struct QemuMutex { - CRITICAL_SECTION lock; - LONG owner; + SRWLOCK lock; }; typedef struct QemuRecMutex QemuRecMutex; @@ -19,9 +18,7 @@ int qemu_rec_mutex_trylock(QemuRecMutex *mutex); void qemu_rec_mutex_unlock(QemuRecMutex *mutex); struct QemuCond { - LONG waiters, target; - HANDLE sema; - HANDLE continue_event; + CONDITION_VARIABLE var; }; struct QemuSemaphore { diff --git a/include/qemu/timer.h b/include/qemu/timer.h index e1742f2f3d..8a1eb74839 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -869,6 +869,7 @@ int64_t cpu_get_icount_raw(void); int64_t cpu_get_icount(void); int64_t cpu_get_clock(void); int64_t cpu_icount_to_ns(int64_t icount); +void cpu_update_icount(CPUState *cpu); /*******************************************/ /* host CPU ticks (if available) */ diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h index e95f28cfec..f08d327aec 100644 --- a/include/qemu/typedefs.h +++ b/include/qemu/typedefs.h @@ -23,6 +23,7 @@ typedef struct CPUAddressSpace CPUAddressSpace; typedef struct CPUState CPUState; typedef struct DeviceListener DeviceListener; typedef struct DeviceState DeviceState; +typedef struct DirtyBitmapSnapshot DirtyBitmapSnapshot; typedef struct DisplayChangeListener DisplayChangeListener; typedef struct DisplayState DisplayState; typedef struct DisplaySurface DisplaySurface; diff --git a/include/qom/cpu.h b/include/qom/cpu.h index c3292efe1c..5d10359c8f 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -332,6 +332,7 @@ struct CPUState { /* updates protected by BQL */ uint32_t interrupt_request; int singlestep_enabled; + int64_t icount_budget; int64_t icount_extra; sigjmp_buf jmp_env; diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 096c17fce0..7462228ac1 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -58,6 +58,14 @@ typedef struct BlockDevOps { * Runs when the size changed (e.g. monitor command block_resize) */ void (*resize_cb)(void *opaque); + /* + * Runs when the backend receives a drain request. + */ + void (*drained_begin)(void *opaque); + /* + * Runs when the backend's last drain request ends. + */ + void (*drained_end)(void *opaque); } BlockDevOps; /* This struct is embedded in (the private) BlockBackend struct and contains diff --git a/include/sysemu/hostmem.h b/include/sysemu/hostmem.h index ecae0cff19..ed6a437f4d 100644 --- a/include/sysemu/hostmem.h +++ b/include/sysemu/hostmem.h @@ -62,6 +62,7 @@ struct HostMemoryBackend { MemoryRegion mr; }; +bool host_memory_backend_mr_inited(HostMemoryBackend *backend); MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp); diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index 576c7ce640..16175f7295 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -67,7 +67,7 @@ int qemu_reset_requested_get(void); void qemu_system_killed(int signal, pid_t pid); void qemu_system_reset(bool report); void qemu_system_guest_panicked(GuestPanicInformation *info); -size_t qemu_target_page_bits(void); +size_t qemu_target_page_size(void); void qemu_add_exit_notifier(Notifier *notify); void qemu_remove_exit_notifier(Notifier *notify); diff --git a/io/channel-socket.c b/io/channel-socket.c index f546c6830e..53386b7ba3 100644 --- a/io/channel-socket.c +++ b/io/channel-socket.c @@ -140,7 +140,7 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc, int fd; trace_qio_channel_socket_connect_sync(ioc, addr); - fd = socket_connect(addr, errp, NULL, NULL); + fd = socket_connect(addr, NULL, NULL, errp); if (fd < 0) { trace_qio_channel_socket_connect_fail(ioc); return -1; @@ -331,16 +331,10 @@ qio_channel_socket_accept(QIOChannelSocket *ioc, { QIOChannelSocket *cioc; - cioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET)); - cioc->fd = -1; + cioc = qio_channel_socket_new(); cioc->remoteAddrLen = sizeof(ioc->remoteAddr); cioc->localAddrLen = sizeof(ioc->localAddr); -#ifdef WIN32 - QIO_CHANNEL(cioc)->event = CreateEvent(NULL, FALSE, FALSE, NULL); -#endif - - retry: trace_qio_channel_socket_accept(ioc); cioc->fd = qemu_accept(ioc->fd, (struct sockaddr *)&cioc->remoteAddr, diff --git a/io/dns-resolver.c b/io/dns-resolver.c index 0ac6b23c02..759d1b40d7 100644 --- a/io/dns-resolver.c +++ b/io/dns-resolver.c @@ -158,6 +158,7 @@ int qio_dns_resolver_lookup_sync(QIODNSResolver *resolver, case SOCKET_ADDRESS_KIND_UNIX: case SOCKET_ADDRESS_KIND_VSOCK: + case SOCKET_ADDRESS_KIND_FD: return qio_dns_resolver_lookup_sync_nop(resolver, addr, naddrs, @@ -165,8 +166,7 @@ int qio_dns_resolver_lookup_sync(QIODNSResolver *resolver, errp); default: - error_setg(errp, "Unknown socket address kind"); - return -1; + abort(); } } @@ -906,12 +906,6 @@ void memory_region_transaction_begin(void) ++memory_region_transaction_depth; } -static void memory_region_clear_pending(void) -{ - memory_region_update_pending = false; - ioeventfd_update_pending = false; -} - void memory_region_transaction_commit(void) { AddressSpace *as; @@ -927,14 +921,14 @@ void memory_region_transaction_commit(void) QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { address_space_update_topology(as); } - + memory_region_update_pending = false; MEMORY_LISTENER_CALL_GLOBAL(commit, Forward); } else if (ioeventfd_update_pending) { QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { address_space_update_ioeventfds(as); } + ioeventfd_update_pending = false; } - memory_region_clear_pending(); } } @@ -1589,7 +1583,7 @@ static void memory_region_update_iommu_notify_flags(MemoryRegion *mr) IOMMUNotifierFlag flags = IOMMU_NOTIFIER_NONE; IOMMUNotifier *iommu_notifier; - QLIST_FOREACH(iommu_notifier, &mr->iommu_notify, node) { + IOMMU_NOTIFIER_FOREACH(iommu_notifier, mr) { flags |= iommu_notifier->notifier_flags; } @@ -1612,6 +1606,7 @@ void memory_region_register_iommu_notifier(MemoryRegion *mr, /* We need to register for at least one bitfield */ assert(n->notifier_flags != IOMMU_NOTIFIER_NONE); + assert(n->start <= n->end); QLIST_INSERT_HEAD(&mr->iommu_notify, n, node); memory_region_update_iommu_notify_flags(mr); } @@ -1631,6 +1626,12 @@ void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n, hwaddr addr, granularity; IOMMUTLBEntry iotlb; + /* If the IOMMU has its own replay callback, override */ + if (mr->iommu_ops->replay) { + mr->iommu_ops->replay(mr, n); + return; + } + granularity = memory_region_iommu_get_min_page_size(mr); for (addr = 0; addr < memory_region_size(mr); addr += granularity) { @@ -1647,6 +1648,15 @@ void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n, } } +void memory_region_iommu_replay_all(MemoryRegion *mr) +{ + IOMMUNotifier *notifier; + + IOMMU_NOTIFIER_FOREACH(notifier, mr) { + memory_region_iommu_replay(mr, notifier, false); + } +} + void memory_region_unregister_iommu_notifier(MemoryRegion *mr, IOMMUNotifier *n) { @@ -1658,24 +1668,40 @@ void memory_region_unregister_iommu_notifier(MemoryRegion *mr, memory_region_update_iommu_notify_flags(mr); } -void memory_region_notify_iommu(MemoryRegion *mr, - IOMMUTLBEntry entry) +void memory_region_notify_one(IOMMUNotifier *notifier, + IOMMUTLBEntry *entry) { - IOMMUNotifier *iommu_notifier; IOMMUNotifierFlag request_flags; - assert(memory_region_is_iommu(mr)); + /* + * Skip the notification if the notification does not overlap + * with registered range. + */ + if (notifier->start > entry->iova + entry->addr_mask + 1 || + notifier->end < entry->iova) { + return; + } - if (entry.perm & IOMMU_RW) { + if (entry->perm & IOMMU_RW) { request_flags = IOMMU_NOTIFIER_MAP; } else { request_flags = IOMMU_NOTIFIER_UNMAP; } - QLIST_FOREACH(iommu_notifier, &mr->iommu_notify, node) { - if (iommu_notifier->notifier_flags & request_flags) { - iommu_notifier->notify(iommu_notifier, &entry); - } + if (notifier->notifier_flags & request_flags) { + notifier->notify(notifier, entry); + } +} + +void memory_region_notify_iommu(MemoryRegion *mr, + IOMMUTLBEntry entry) +{ + IOMMUNotifier *iommu_notifier; + + assert(memory_region_is_iommu(mr)); + + IOMMU_NOTIFIER_FOREACH(iommu_notifier, mr) { + memory_region_notify_one(iommu_notifier, &entry); } } @@ -1722,6 +1748,23 @@ bool memory_region_test_and_clear_dirty(MemoryRegion *mr, hwaddr addr, memory_region_get_ram_addr(mr) + addr, size, client); } +DirtyBitmapSnapshot *memory_region_snapshot_and_clear_dirty(MemoryRegion *mr, + hwaddr addr, + hwaddr size, + unsigned client) +{ + assert(mr->ram_block); + return cpu_physical_memory_snapshot_and_clear_dirty( + memory_region_get_ram_addr(mr) + addr, size, client); +} + +bool memory_region_snapshot_get_dirty(MemoryRegion *mr, DirtyBitmapSnapshot *snap, + hwaddr addr, hwaddr size) +{ + assert(mr->ram_block); + return cpu_physical_memory_snapshot_get_dirty(snap, + memory_region_get_ram_addr(mr) + addr, size); +} void memory_region_sync_dirty_bitmap(MemoryRegion *mr) { diff --git a/migration/block.c b/migration/block.c index 7734ff728a..060087fa32 100644 --- a/migration/block.c +++ b/migration/block.c @@ -885,6 +885,8 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) int64_t total_sectors = 0; int nr_sectors; int ret; + BlockDriverInfo bdi; + int cluster_size = BLOCK_SIZE; do { addr = qemu_get_be64(f); @@ -919,6 +921,15 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) error_report_err(local_err); return -EINVAL; } + + ret = bdrv_get_info(blk_bs(blk), &bdi); + if (ret == 0 && bdi.cluster_size > 0 && + bdi.cluster_size <= BLOCK_SIZE && + BLOCK_SIZE % bdi.cluster_size == 0) { + cluster_size = bdi.cluster_size; + } else { + cluster_size = BLOCK_SIZE; + } } if (total_sectors - addr < BDRV_SECTORS_PER_DIRTY_CHUNK) { @@ -932,10 +943,30 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) nr_sectors * BDRV_SECTOR_SIZE, BDRV_REQ_MAY_UNMAP); } else { + int i; + int64_t cur_addr; + uint8_t *cur_buf; + buf = g_malloc(BLOCK_SIZE); qemu_get_buffer(f, buf, BLOCK_SIZE); - ret = blk_pwrite(blk, addr * BDRV_SECTOR_SIZE, buf, - nr_sectors * BDRV_SECTOR_SIZE, 0); + for (i = 0; i < BLOCK_SIZE / cluster_size; i++) { + cur_addr = addr * BDRV_SECTOR_SIZE + i * cluster_size; + cur_buf = buf + i * cluster_size; + + if ((!block_mig_state.zero_blocks || + cluster_size < BLOCK_SIZE) && + buffer_is_zero(cur_buf, cluster_size)) { + ret = blk_pwrite_zeroes(blk, cur_addr, + cluster_size, + BDRV_REQ_MAY_UNMAP); + } else { + ret = blk_pwrite(blk, cur_addr, cur_buf, + cluster_size, 0); + } + if (ret < 0) { + break; + } + } g_free(buf); } diff --git a/migration/migration.c b/migration/migration.c index 54060f749a..353f2728cf 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -109,7 +109,6 @@ MigrationState *migrate_get_current(void) }; if (!once) { - qemu_mutex_init(¤t_migration.src_page_req_mutex); current_migration.parameters.tls_creds = g_strdup(""); current_migration.parameters.tls_hostname = g_strdup(""); once = true; @@ -349,6 +348,14 @@ static void process_incoming_migration_bh(void *opaque) exit(EXIT_FAILURE); } + /* If we get an error here, just don't restart the VM yet. */ + blk_resume_after_migration(&local_err); + if (local_err) { + error_free(local_err); + local_err = NULL; + autostart = false; + } + /* * This must happen after all error conditions are dealt with and * we're sure the VM is going to be running on this host. @@ -428,9 +435,6 @@ static void process_incoming_migration_co(void *opaque) qemu_thread_join(&mis->colo_incoming_thread); } - qemu_fclose(f); - free_xbzrle_decoded_buf(); - if (ret < 0) { migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_FAILED); @@ -439,6 +443,9 @@ static void process_incoming_migration_co(void *opaque) exit(EXIT_FAILURE); } + qemu_fclose(f); + free_xbzrle_decoded_buf(); + mis->bh = qemu_bh_new(process_incoming_migration_bh, mis); qemu_bh_schedule(mis->bh); } @@ -643,16 +650,19 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) info->ram->transferred = ram_bytes_transferred(); info->ram->total = ram_bytes_total(); info->ram->duplicate = dup_mig_pages_transferred(); - info->ram->skipped = skipped_mig_pages_transferred(); + /* legacy value. It is not used anymore */ + info->ram->skipped = 0; info->ram->normal = norm_mig_pages_transferred(); - info->ram->normal_bytes = norm_mig_bytes_transferred(); + info->ram->normal_bytes = norm_mig_pages_transferred() * + qemu_target_page_size(); info->ram->mbps = s->mbps; - info->ram->dirty_sync_count = s->dirty_sync_count; - info->ram->postcopy_requests = s->postcopy_requests; + info->ram->dirty_sync_count = ram_dirty_sync_count(); + info->ram->postcopy_requests = ram_postcopy_requests(); + info->ram->page_size = qemu_target_page_size(); if (s->state != MIGRATION_STATUS_COMPLETED) { info->ram->remaining = ram_bytes_remaining(); - info->ram->dirty_pages_rate = s->dirty_pages_rate; + info->ram->dirty_pages_rate = ram_dirty_pages_rate(); } } @@ -947,7 +957,7 @@ static void migrate_fd_cleanup(void *opaque) qemu_bh_delete(s->cleanup_bh); s->cleanup_bh = NULL; - flush_page_queue(s); + migration_page_queue_free(); if (s->to_dst_file) { trace_migrate_fd_cleanup(); @@ -1053,21 +1063,21 @@ bool migration_has_failed(MigrationState *s) s->state == MIGRATION_STATUS_FAILED); } -bool migration_in_postcopy(MigrationState *s) +bool migration_in_postcopy(void) { + MigrationState *s = migrate_get_current(); + return (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); } bool migration_in_postcopy_after_devices(MigrationState *s) { - return migration_in_postcopy(s) && s->postcopy_after_devices; + return migration_in_postcopy() && s->postcopy_after_devices; } -bool migration_is_idle(MigrationState *s) +bool migration_is_idle(void) { - if (!s) { - s = migrate_get_current(); - } + MigrationState *s = migrate_get_current(); switch (s->state) { case MIGRATION_STATUS_NONE: @@ -1108,22 +1118,15 @@ MigrationState *migrate_init(const MigrationParams *params) s->mbps = 0.0; s->downtime = 0; s->expected_downtime = 0; - s->dirty_pages_rate = 0; - s->dirty_bytes_rate = 0; s->setup_time = 0; - s->dirty_sync_count = 0; s->start_postcopy = false; s->postcopy_after_devices = false; - s->postcopy_requests = 0; s->migration_thread_running = false; - s->last_req_rb = NULL; error_free(s->error); s->error = NULL; migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP); - QSIMPLEQ_INIT(&s->src_page_requests); - s->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); return s; } @@ -1139,7 +1142,7 @@ int migrate_add_blocker(Error *reason, Error **errp) return -EACCES; } - if (migration_is_idle(NULL)) { + if (migration_is_idle()) { migration_blockers = g_slist_prepend(migration_blockers, reason); return 0; } @@ -1477,7 +1480,7 @@ static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname, return; } - if (ram_save_queue_pages(ms, rbname, start, len)) { + if (ram_save_queue_pages(rbname, start, len)) { mark_source_rp_bad(ms); } } @@ -1907,7 +1910,12 @@ static void *migration_thread(void *opaque) int64_t initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST); int64_t initial_bytes = 0; - int64_t max_size = 0; + /* + * The final stage happens when the remaining data is smaller than + * this threshold; it's calculated from the requested downtime and + * measured bandwidth + */ + int64_t threshold_size = 0; int64_t start_time = initial_time; int64_t end_time; bool old_vm_running = false; @@ -1938,7 +1946,6 @@ static void *migration_thread(void *opaque) qemu_savevm_state_begin(s->to_dst_file, &s->params); s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start; - current_active_state = MIGRATION_STATUS_ACTIVE; migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_ACTIVE); @@ -1952,17 +1959,17 @@ static void *migration_thread(void *opaque) if (!qemu_file_rate_limit(s->to_dst_file)) { uint64_t pend_post, pend_nonpost; - qemu_savevm_state_pending(s->to_dst_file, max_size, &pend_nonpost, - &pend_post); + qemu_savevm_state_pending(s->to_dst_file, threshold_size, + &pend_nonpost, &pend_post); pending_size = pend_nonpost + pend_post; - trace_migrate_pending(pending_size, max_size, + trace_migrate_pending(pending_size, threshold_size, pend_post, pend_nonpost); - if (pending_size && pending_size >= max_size) { + if (pending_size && pending_size >= threshold_size) { /* Still a significant amount to transfer */ if (migrate_postcopy_ram() && s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE && - pend_nonpost <= max_size && + pend_nonpost <= threshold_size && atomic_read(&s->start_postcopy)) { if (!postcopy_start(s, &old_vm_running)) { @@ -1994,17 +2001,18 @@ static void *migration_thread(void *opaque) initial_bytes; uint64_t time_spent = current_time - initial_time; double bandwidth = (double)transferred_bytes / time_spent; - max_size = bandwidth * s->parameters.downtime_limit; + threshold_size = bandwidth * s->parameters.downtime_limit; s->mbps = (((double) transferred_bytes * 8.0) / ((double) time_spent / 1000.0)) / 1000.0 / 1000.0; trace_migrate_transferred(transferred_bytes, time_spent, - bandwidth, max_size); + bandwidth, threshold_size); /* if we haven't sent anything, we don't want to recalculate 10000 is a small enough number for our purposes */ - if (s->dirty_bytes_rate && transferred_bytes > 10000) { - s->expected_downtime = s->dirty_bytes_rate / bandwidth; + if (ram_dirty_pages_rate() && transferred_bytes > 10000) { + s->expected_downtime = ram_dirty_pages_rate() * + qemu_target_page_size() / bandwidth; } qemu_file_reset_rate_limit(s->to_dst_file); diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index dc80dbb67f..85fd8d72b3 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -123,7 +123,7 @@ bool postcopy_ram_supported_by_host(void) struct uffdio_range range_struct; uint64_t feature_mask; - if ((1ul << qemu_target_page_bits()) > pagesize) { + if (qemu_target_page_size() > pagesize) { error_report("Target page size bigger than host page size"); goto out; } @@ -213,8 +213,6 @@ out: static int init_range(const char *block_name, void *host_addr, ram_addr_t offset, ram_addr_t length, void *opaque) { - MigrationIncomingState *mis = opaque; - trace_postcopy_init_range(block_name, host_addr, offset, length); /* @@ -223,7 +221,7 @@ static int init_range(const char *block_name, void *host_addr, * - we're going to get the copy from the source anyway. * (Precopy will just overwrite this data, so doesn't need the discard) */ - if (ram_discard_range(mis, block_name, 0, length)) { + if (ram_discard_range(block_name, 0, length)) { return -1; } @@ -271,7 +269,7 @@ static int cleanup_range(const char *block_name, void *host_addr, */ int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages) { - if (qemu_ram_foreach_block(init_range, mis)) { + if (qemu_ram_foreach_block(init_range, NULL)) { return -1; } @@ -745,10 +743,10 @@ PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms, void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds, unsigned long start, unsigned long length) { - size_t tp_bits = qemu_target_page_bits(); + size_t tp_size = qemu_target_page_size(); /* Convert to byte offsets within the RAM block */ - pds->start_list[pds->cur_entry] = (start - pds->offset) << tp_bits; - pds->length_list[pds->cur_entry] = length << tp_bits; + pds->start_list[pds->cur_entry] = (start - pds->offset) * tp_size; + pds->length_list[pds->cur_entry] = length * tp_size; trace_postcopy_discard_send_range(pds->ramblock_name, start, length); pds->cur_entry++; pds->nsentwords++; diff --git a/migration/ram.c b/migration/ram.c index de1e0a3b18..f48664ec62 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -45,10 +45,6 @@ #include "qemu/rcu_queue.h" #include "migration/colo.h" -static int dirty_rate_high_cnt; - -static uint64_t bitmap_sync_count; - /***********************************************************/ /* ram save/restore */ @@ -96,11 +92,17 @@ static void XBZRLE_cache_unlock(void) qemu_mutex_unlock(&XBZRLE.lock); } -/* - * called from qmp_migrate_set_cache_size in main thread, possibly while - * a migration is in progress. - * A running migration maybe using the cache and might finish during this - * call, hence changes to the cache are protected by XBZRLE.lock(). +/** + * xbzrle_cache_resize: resize the xbzrle cache + * + * This function is called from qmp_migrate_set_cache_size in main + * thread, possibly while a migration is in progress. A running + * migration may be using the cache and might finish during this call, + * hence changes to the cache are protected by XBZRLE.lock(). + * + * Returns the new_size or negative in case of error. + * + * @new_size: new cache size */ int64_t xbzrle_cache_resize(int64_t new_size) { @@ -136,115 +138,171 @@ out: return ret; } -/* accounting for migration statistics */ -typedef struct AccountingInfo { - uint64_t dup_pages; - uint64_t skipped_pages; +struct RAMBitmap { + struct rcu_head rcu; + /* Main migration bitmap */ + unsigned long *bmap; + /* bitmap of pages that haven't been sent even once + * only maintained and used in postcopy at the moment + * where it's used to send the dirtymap at the start + * of the postcopy phase + */ + unsigned long *unsentmap; +}; +typedef struct RAMBitmap RAMBitmap; + +/* + * An outstanding page request, on the source, having been received + * and queued + */ +struct RAMSrcPageRequest { + RAMBlock *rb; + hwaddr offset; + hwaddr len; + + QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req; +}; + +/* State of RAM for migration */ +struct RAMState { + /* QEMUFile used for this migration */ + QEMUFile *f; + /* Last block that we have visited searching for dirty pages */ + RAMBlock *last_seen_block; + /* Last block from where we have sent data */ + RAMBlock *last_sent_block; + /* Last dirty target page we have sent */ + ram_addr_t last_page; + /* last ram version we have seen */ + uint32_t last_version; + /* We are in the first round */ + bool ram_bulk_stage; + /* How many times we have dirty too many pages */ + int dirty_rate_high_cnt; + /* How many times we have synchronized the bitmap */ + uint64_t bitmap_sync_count; + /* these variables are used for bitmap sync */ + /* last time we did a full bitmap_sync */ + int64_t time_last_bitmap_sync; + /* bytes transferred at start_time */ + uint64_t bytes_xfer_prev; + /* number of dirty pages since start_time */ + uint64_t num_dirty_pages_period; + /* xbzrle misses since the beginning of the period */ + uint64_t xbzrle_cache_miss_prev; + /* number of iterations at the beginning of period */ + uint64_t iterations_prev; + /* Accounting fields */ + /* number of zero pages. It used to be pages filled by the same char. */ + uint64_t zero_pages; + /* number of normal transferred pages */ uint64_t norm_pages; + /* Iterations since start */ uint64_t iterations; + /* xbzrle transmitted bytes. Notice that this is with + * compression, they can't be calculated from the pages */ uint64_t xbzrle_bytes; + /* xbzrle transmmited pages */ uint64_t xbzrle_pages; + /* xbzrle number of cache miss */ uint64_t xbzrle_cache_miss; + /* xbzrle miss rate */ double xbzrle_cache_miss_rate; + /* xbzrle number of overflows */ uint64_t xbzrle_overflows; -} AccountingInfo; + /* number of dirty bits in the bitmap */ + uint64_t migration_dirty_pages; + /* total number of bytes transferred */ + uint64_t bytes_transferred; + /* number of dirtied pages in the last second */ + uint64_t dirty_pages_rate; + /* Count of requests incoming from destination */ + uint64_t postcopy_requests; + /* protects modification of the bitmap */ + QemuMutex bitmap_mutex; + /* Ram Bitmap protected by RCU */ + RAMBitmap *ram_bitmap; + /* The RAMBlock used in the last src_page_requests */ + RAMBlock *last_req_rb; + /* Queue of outstanding page requests from the destination */ + QemuMutex src_page_req_mutex; + QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests; +}; +typedef struct RAMState RAMState; -static AccountingInfo acct_info; +static RAMState ram_state; -static void acct_clear(void) +uint64_t dup_mig_pages_transferred(void) { - memset(&acct_info, 0, sizeof(acct_info)); + return ram_state.zero_pages; } -uint64_t dup_mig_bytes_transferred(void) +uint64_t norm_mig_pages_transferred(void) { - return acct_info.dup_pages * TARGET_PAGE_SIZE; + return ram_state.norm_pages; } -uint64_t dup_mig_pages_transferred(void) +uint64_t xbzrle_mig_bytes_transferred(void) { - return acct_info.dup_pages; + return ram_state.xbzrle_bytes; } -uint64_t skipped_mig_bytes_transferred(void) +uint64_t xbzrle_mig_pages_transferred(void) { - return acct_info.skipped_pages * TARGET_PAGE_SIZE; + return ram_state.xbzrle_pages; } -uint64_t skipped_mig_pages_transferred(void) +uint64_t xbzrle_mig_pages_cache_miss(void) { - return acct_info.skipped_pages; + return ram_state.xbzrle_cache_miss; } -uint64_t norm_mig_bytes_transferred(void) +double xbzrle_mig_cache_miss_rate(void) { - return acct_info.norm_pages * TARGET_PAGE_SIZE; + return ram_state.xbzrle_cache_miss_rate; } -uint64_t norm_mig_pages_transferred(void) +uint64_t xbzrle_mig_pages_overflow(void) { - return acct_info.norm_pages; + return ram_state.xbzrle_overflows; } -uint64_t xbzrle_mig_bytes_transferred(void) +uint64_t ram_bytes_transferred(void) { - return acct_info.xbzrle_bytes; + return ram_state.bytes_transferred; } -uint64_t xbzrle_mig_pages_transferred(void) +uint64_t ram_bytes_remaining(void) { - return acct_info.xbzrle_pages; + return ram_state.migration_dirty_pages * TARGET_PAGE_SIZE; } -uint64_t xbzrle_mig_pages_cache_miss(void) +uint64_t ram_dirty_sync_count(void) { - return acct_info.xbzrle_cache_miss; + return ram_state.bitmap_sync_count; } -double xbzrle_mig_cache_miss_rate(void) +uint64_t ram_dirty_pages_rate(void) { - return acct_info.xbzrle_cache_miss_rate; + return ram_state.dirty_pages_rate; } -uint64_t xbzrle_mig_pages_overflow(void) +uint64_t ram_postcopy_requests(void) { - return acct_info.xbzrle_overflows; + return ram_state.postcopy_requests; } -/* This is the last block that we have visited serching for dirty pages - */ -static RAMBlock *last_seen_block; -/* This is the last block from where we have sent data */ -static RAMBlock *last_sent_block; -static ram_addr_t last_offset; -static QemuMutex migration_bitmap_mutex; -static uint64_t migration_dirty_pages; -static uint32_t last_version; -static bool ram_bulk_stage; - /* used by the search for pages to send */ struct PageSearchStatus { /* Current block being searched */ RAMBlock *block; - /* Current offset to search from */ - ram_addr_t offset; + /* Current page to search from */ + unsigned long page; /* Set once we wrap around */ bool complete_round; }; typedef struct PageSearchStatus PageSearchStatus; -static struct BitmapRcu { - struct rcu_head rcu; - /* Main migration bitmap */ - unsigned long *bmap; - /* bitmap of pages that haven't been sent even once - * only maintained and used in postcopy at the moment - * where it's used to send the dirtymap at the start - * of the postcopy phase - */ - unsigned long *unsentmap; -} *migration_bitmap_rcu; - struct CompressParam { bool done; bool quit; @@ -278,7 +336,6 @@ static QemuCond comp_done_cond; /* The empty QEMUFileOps will be used by file in CompressParam */ static const QEMUFileOps empty_ops = { }; -static bool compression_switch; static DecompressParam *decomp_param; static QemuThread *decompress_threads; static QemuMutex decomp_done_lock; @@ -323,6 +380,7 @@ static inline void terminate_compression_threads(void) int idx, thread_count; thread_count = migrate_compress_threads(); + for (idx = 0; idx < thread_count; idx++) { qemu_mutex_lock(&comp_param[idx].mutex); comp_param[idx].quit = true; @@ -361,7 +419,6 @@ void migrate_compress_threads_create(void) if (!migrate_use_compression()) { return; } - compression_switch = true; thread_count = migrate_compress_threads(); compress_threads = g_new0(QemuThread, thread_count); comp_param = g_new0(CompressParam, thread_count); @@ -383,38 +440,45 @@ void migrate_compress_threads_create(void) } /** - * save_page_header: Write page header to wire + * save_page_header: write page header to wire * * If this is the 1st block, it also writes the block identification * - * Returns: Number of bytes written + * Returns the number of bytes written * * @f: QEMUFile where to send the data * @block: block that contains the page we want to send * @offset: offset inside the block for the page * in the lower bits, it contains flags */ -static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset) +static size_t save_page_header(RAMState *rs, RAMBlock *block, ram_addr_t offset) { size_t size, len; - qemu_put_be64(f, offset); + if (block == rs->last_sent_block) { + offset |= RAM_SAVE_FLAG_CONTINUE; + } + qemu_put_be64(rs->f, offset); size = 8; if (!(offset & RAM_SAVE_FLAG_CONTINUE)) { len = strlen(block->idstr); - qemu_put_byte(f, len); - qemu_put_buffer(f, (uint8_t *)block->idstr, len); + qemu_put_byte(rs->f, len); + qemu_put_buffer(rs->f, (uint8_t *)block->idstr, len); size += 1 + len; + rs->last_sent_block = block; } return size; } -/* Reduce amount of guest cpu execution to hopefully slow down memory writes. - * If guest dirty memory rate is reduced below the rate at which we can - * transfer pages to the destination then we should be able to complete - * migration. Some workloads dirty memory way too fast and will not effectively - * converge, even with auto-converge. +/** + * mig_throttle_guest_down: throotle down the guest + * + * Reduce amount of guest cpu execution to hopefully slow down memory + * writes. If guest dirty memory rate is reduced below the rate at + * which we can transfer pages to the destination then we should be + * able to complete migration. Some workloads dirty memory way too + * fast and will not effectively converge, even with auto-converge. */ static void mig_throttle_guest_down(void) { @@ -431,22 +495,28 @@ static void mig_throttle_guest_down(void) } } -/* Update the xbzrle cache to reflect a page that's been sent as all 0. +/** + * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache + * + * @rs: current RAM state + * @current_addr: address for the zero page + * + * Update the xbzrle cache to reflect a page that's been sent as all 0. * The important thing is that a stale (not-yet-0'd) page be replaced * by the new data. * As a bonus, if the page wasn't in the cache it gets added so that - * when a small write is made into the 0'd page it gets XBZRLE sent + * when a small write is made into the 0'd page it gets XBZRLE sent. */ -static void xbzrle_cache_zero_page(ram_addr_t current_addr) +static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr) { - if (ram_bulk_stage || !migrate_use_xbzrle()) { + if (rs->ram_bulk_stage || !migrate_use_xbzrle()) { return; } /* We don't care if this fails to allocate a new cache page * as long as it updated an old one */ cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE, - bitmap_sync_count); + rs->bitmap_sync_count); } #define ENCODING_FLAG_XBZRLE 0x1 @@ -458,27 +528,25 @@ static void xbzrle_cache_zero_page(ram_addr_t current_addr) * 0 means that page is identical to the one already sent * -1 means that xbzrle would be longer than normal * - * @f: QEMUFile where to send the data - * @current_data: - * @current_addr: + * @rs: current RAM state + * @current_data: pointer to the address of the page contents + * @current_addr: addr of the page * @block: block that contains the page we want to send * @offset: offset inside the block for the page * @last_stage: if we are at the completion stage - * @bytes_transferred: increase it with the number of transferred bytes */ -static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data, +static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, ram_addr_t current_addr, RAMBlock *block, - ram_addr_t offset, bool last_stage, - uint64_t *bytes_transferred) + ram_addr_t offset, bool last_stage) { int encoded_len = 0, bytes_xbzrle; uint8_t *prev_cached_page; - if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) { - acct_info.xbzrle_cache_miss++; + if (!cache_is_cached(XBZRLE.cache, current_addr, rs->bitmap_sync_count)) { + rs->xbzrle_cache_miss++; if (!last_stage) { if (cache_insert(XBZRLE.cache, current_addr, *current_data, - bitmap_sync_count) == -1) { + rs->bitmap_sync_count) == -1) { return -1; } else { /* update *current_data when the page has been @@ -503,7 +571,7 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data, return 0; } else if (encoded_len == -1) { trace_save_xbzrle_page_overflow(); - acct_info.xbzrle_overflows++; + rs->xbzrle_overflows++; /* update data in the cache */ if (!last_stage) { memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE); @@ -518,92 +586,86 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data, } /* Send XBZRLE based compressed page */ - bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE); - qemu_put_byte(f, ENCODING_FLAG_XBZRLE); - qemu_put_be16(f, encoded_len); - qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); + bytes_xbzrle = save_page_header(rs, block, + offset | RAM_SAVE_FLAG_XBZRLE); + qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE); + qemu_put_be16(rs->f, encoded_len); + qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len); bytes_xbzrle += encoded_len + 1 + 2; - acct_info.xbzrle_pages++; - acct_info.xbzrle_bytes += bytes_xbzrle; - *bytes_transferred += bytes_xbzrle; + rs->xbzrle_pages++; + rs->xbzrle_bytes += bytes_xbzrle; + rs->bytes_transferred += bytes_xbzrle; return 1; } -/* Called with rcu_read_lock() to protect migration_bitmap - * rb: The RAMBlock to search for dirty pages in - * start: Start address (typically so we can continue from previous page) - * ram_addr_abs: Pointer into which to store the address of the dirty page - * within the global ram_addr space +/** + * migration_bitmap_find_dirty: find the next dirty page from start + * + * Called with rcu_read_lock() to protect migration_bitmap * - * Returns: byte offset within memory region of the start of a dirty page + * Returns the byte offset within memory region of the start of a dirty page + * + * @rs: current RAM state + * @rb: RAMBlock where to search for dirty pages + * @start: page where we start the search */ static inline -ram_addr_t migration_bitmap_find_dirty(RAMBlock *rb, - ram_addr_t start, - ram_addr_t *ram_addr_abs) +unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb, + unsigned long start) { unsigned long base = rb->offset >> TARGET_PAGE_BITS; - unsigned long nr = base + (start >> TARGET_PAGE_BITS); + unsigned long nr = base + start; uint64_t rb_size = rb->used_length; unsigned long size = base + (rb_size >> TARGET_PAGE_BITS); unsigned long *bitmap; unsigned long next; - bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap; - if (ram_bulk_stage && nr > base) { + bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap; + if (rs->ram_bulk_stage && nr > base) { next = nr + 1; } else { next = find_next_bit(bitmap, size, nr); } - *ram_addr_abs = next << TARGET_PAGE_BITS; - return (next - base) << TARGET_PAGE_BITS; + return next - base; } -static inline bool migration_bitmap_clear_dirty(ram_addr_t addr) +static inline bool migration_bitmap_clear_dirty(RAMState *rs, + RAMBlock *rb, + unsigned long page) { bool ret; - int nr = addr >> TARGET_PAGE_BITS; - unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap; + unsigned long *bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap; + unsigned long nr = (rb->offset >> TARGET_PAGE_BITS) + page; ret = test_and_clear_bit(nr, bitmap); if (ret) { - migration_dirty_pages--; + rs->migration_dirty_pages--; } return ret; } -static int64_t num_dirty_pages_period; -static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length) +static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb, + ram_addr_t start, ram_addr_t length) { unsigned long *bitmap; - bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap; - migration_dirty_pages += cpu_physical_memory_sync_dirty_bitmap(bitmap, - start, length, &num_dirty_pages_period); -} - -/* Fix me: there are too many global variables used in migration process. */ -static int64_t start_time; -static int64_t bytes_xfer_prev; -static uint64_t xbzrle_cache_miss_prev; -static uint64_t iterations_prev; - -static void migration_bitmap_sync_init(void) -{ - start_time = 0; - bytes_xfer_prev = 0; - num_dirty_pages_period = 0; - xbzrle_cache_miss_prev = 0; - iterations_prev = 0; + bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap; + rs->migration_dirty_pages += + cpu_physical_memory_sync_dirty_bitmap(bitmap, rb, start, length, + &rs->num_dirty_pages_period); } -/* Returns a summary bitmap of the page sizes of all RAMBlocks; - * for VMs with just normal pages this is equivalent to the - * host page size. If it's got some huge pages then it's the OR - * of all the different page sizes. +/** + * ram_pagesize_summary: calculate all the pagesizes of a VM + * + * Returns a summary bitmap of the page sizes of all RAMBlocks + * + * For VMs with just normal pages this is equivalent to the host page + * size. If it's got some huge pages then it's the OR of all the + * different page sizes. */ uint64_t ram_pagesize_summary(void) { @@ -617,40 +679,39 @@ uint64_t ram_pagesize_summary(void) return summary; } -static void migration_bitmap_sync(void) +static void migration_bitmap_sync(RAMState *rs) { RAMBlock *block; - MigrationState *s = migrate_get_current(); int64_t end_time; - int64_t bytes_xfer_now; + uint64_t bytes_xfer_now; - bitmap_sync_count++; + rs->bitmap_sync_count++; - if (!bytes_xfer_prev) { - bytes_xfer_prev = ram_bytes_transferred(); + if (!rs->bytes_xfer_prev) { + rs->bytes_xfer_prev = ram_bytes_transferred(); } - if (!start_time) { - start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + if (!rs->time_last_bitmap_sync) { + rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); } trace_migration_bitmap_sync_start(); memory_global_dirty_log_sync(); - qemu_mutex_lock(&migration_bitmap_mutex); + qemu_mutex_lock(&rs->bitmap_mutex); rcu_read_lock(); QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { - migration_bitmap_sync_range(block->offset, block->used_length); + migration_bitmap_sync_range(rs, block, 0, block->used_length); } rcu_read_unlock(); - qemu_mutex_unlock(&migration_bitmap_mutex); + qemu_mutex_unlock(&rs->bitmap_mutex); - trace_migration_bitmap_sync_end(num_dirty_pages_period); + trace_migration_bitmap_sync_end(rs->num_dirty_pages_period); end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); /* more than 1 second = 1000 millisecons */ - if (end_time > start_time + 1000) { + if (end_time > rs->time_last_bitmap_sync + 1000) { if (migrate_auto_converge()) { /* The following detection logic can be refined later. For now: Check to see if the dirtied bytes is 50% more than the approx. @@ -659,94 +720,87 @@ static void migration_bitmap_sync(void) throttling */ bytes_xfer_now = ram_bytes_transferred(); - if (s->dirty_pages_rate && - (num_dirty_pages_period * TARGET_PAGE_SIZE > - (bytes_xfer_now - bytes_xfer_prev)/2) && - (dirty_rate_high_cnt++ >= 2)) { + if (rs->dirty_pages_rate && + (rs->num_dirty_pages_period * TARGET_PAGE_SIZE > + (bytes_xfer_now - rs->bytes_xfer_prev) / 2) && + (rs->dirty_rate_high_cnt++ >= 2)) { trace_migration_throttle(); - dirty_rate_high_cnt = 0; + rs->dirty_rate_high_cnt = 0; mig_throttle_guest_down(); } - bytes_xfer_prev = bytes_xfer_now; + rs->bytes_xfer_prev = bytes_xfer_now; } if (migrate_use_xbzrle()) { - if (iterations_prev != acct_info.iterations) { - acct_info.xbzrle_cache_miss_rate = - (double)(acct_info.xbzrle_cache_miss - - xbzrle_cache_miss_prev) / - (acct_info.iterations - iterations_prev); + if (rs->iterations_prev != rs->iterations) { + rs->xbzrle_cache_miss_rate = + (double)(rs->xbzrle_cache_miss - + rs->xbzrle_cache_miss_prev) / + (rs->iterations - rs->iterations_prev); } - iterations_prev = acct_info.iterations; - xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss; + rs->iterations_prev = rs->iterations; + rs->xbzrle_cache_miss_prev = rs->xbzrle_cache_miss; } - s->dirty_pages_rate = num_dirty_pages_period * 1000 - / (end_time - start_time); - s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE; - start_time = end_time; - num_dirty_pages_period = 0; + rs->dirty_pages_rate = rs->num_dirty_pages_period * 1000 + / (end_time - rs->time_last_bitmap_sync); + rs->time_last_bitmap_sync = end_time; + rs->num_dirty_pages_period = 0; } - s->dirty_sync_count = bitmap_sync_count; if (migrate_use_events()) { - qapi_event_send_migration_pass(bitmap_sync_count, NULL); + qapi_event_send_migration_pass(rs->bitmap_sync_count, NULL); } } /** - * save_zero_page: Send the zero page to the stream + * save_zero_page: send the zero page to the stream * - * Returns: Number of pages written. + * Returns the number of pages written. * - * @f: QEMUFile where to send the data + * @rs: current RAM state * @block: block that contains the page we want to send * @offset: offset inside the block for the page * @p: pointer to the page - * @bytes_transferred: increase it with the number of transferred bytes */ -static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset, - uint8_t *p, uint64_t *bytes_transferred) +static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, + uint8_t *p) { int pages = -1; if (is_zero_range(p, TARGET_PAGE_SIZE)) { - acct_info.dup_pages++; - *bytes_transferred += save_page_header(f, block, - offset | RAM_SAVE_FLAG_COMPRESS); - qemu_put_byte(f, 0); - *bytes_transferred += 1; + rs->zero_pages++; + rs->bytes_transferred += + save_page_header(rs, block, offset | RAM_SAVE_FLAG_COMPRESS); + qemu_put_byte(rs->f, 0); + rs->bytes_transferred += 1; pages = 1; } return pages; } -static void ram_release_pages(MigrationState *ms, const char *block_name, - uint64_t offset, int pages) +static void ram_release_pages(const char *rbname, uint64_t offset, int pages) { - if (!migrate_release_ram() || !migration_in_postcopy(ms)) { + if (!migrate_release_ram() || !migration_in_postcopy()) { return; } - ram_discard_range(NULL, block_name, offset, pages << TARGET_PAGE_BITS); + ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS); } /** - * ram_save_page: Send the given page to the stream + * ram_save_page: send the given page to the stream * - * Returns: Number of pages written. + * Returns the number of pages written. * < 0 - error * >=0 - Number of pages written - this might legally be 0 * if xbzrle noticed the page was the same. * - * @ms: The current migration state. - * @f: QEMUFile where to send the data + * @rs: current RAM state * @block: block that contains the page we want to send * @offset: offset inside the block for the page * @last_stage: if we are at the completion stage - * @bytes_transferred: increase it with the number of transferred bytes */ -static int ram_save_page(MigrationState *ms, QEMUFile *f, PageSearchStatus *pss, - bool last_stage, uint64_t *bytes_transferred) +static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage) { int pages = -1; uint64_t bytes_xmit; @@ -755,16 +809,16 @@ static int ram_save_page(MigrationState *ms, QEMUFile *f, PageSearchStatus *pss, int ret; bool send_async = true; RAMBlock *block = pss->block; - ram_addr_t offset = pss->offset; + ram_addr_t offset = pss->page << TARGET_PAGE_BITS; p = block->host + offset; /* In doubt sent page as normal */ bytes_xmit = 0; - ret = ram_control_save_page(f, block->offset, + ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE, &bytes_xmit); if (bytes_xmit) { - *bytes_transferred += bytes_xmit; + rs->bytes_transferred += bytes_xmit; pages = 1; } @@ -772,29 +826,26 @@ static int ram_save_page(MigrationState *ms, QEMUFile *f, PageSearchStatus *pss, current_addr = block->offset + offset; - if (block == last_sent_block) { - offset |= RAM_SAVE_FLAG_CONTINUE; - } if (ret != RAM_SAVE_CONTROL_NOT_SUPP) { if (ret != RAM_SAVE_CONTROL_DELAYED) { if (bytes_xmit > 0) { - acct_info.norm_pages++; + rs->norm_pages++; } else if (bytes_xmit == 0) { - acct_info.dup_pages++; + rs->zero_pages++; } } } else { - pages = save_zero_page(f, block, offset, p, bytes_transferred); + pages = save_zero_page(rs, block, offset, p); if (pages > 0) { /* Must let xbzrle know, otherwise a previous (now 0'd) cached * page would be stale */ - xbzrle_cache_zero_page(current_addr); - ram_release_pages(ms, block->idstr, pss->offset, pages); - } else if (!ram_bulk_stage && - !migration_in_postcopy(ms) && migrate_use_xbzrle()) { - pages = save_xbzrle_page(f, &p, current_addr, block, - offset, last_stage, bytes_transferred); + xbzrle_cache_zero_page(rs, current_addr); + ram_release_pages(block->idstr, offset, pages); + } else if (!rs->ram_bulk_stage && + !migration_in_postcopy() && migrate_use_xbzrle()) { + pages = save_xbzrle_page(rs, &p, current_addr, block, + offset, last_stage); if (!last_stage) { /* Can't send this cached data async, since the cache page * might get updated before it gets to the wire @@ -806,18 +857,18 @@ static int ram_save_page(MigrationState *ms, QEMUFile *f, PageSearchStatus *pss, /* XBZRLE overflow or normal page */ if (pages == -1) { - *bytes_transferred += save_page_header(f, block, - offset | RAM_SAVE_FLAG_PAGE); + rs->bytes_transferred += save_page_header(rs, block, + offset | RAM_SAVE_FLAG_PAGE); if (send_async) { - qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE, + qemu_put_buffer_async(rs->f, p, TARGET_PAGE_SIZE, migrate_release_ram() & - migration_in_postcopy(ms)); + migration_in_postcopy()); } else { - qemu_put_buffer(f, p, TARGET_PAGE_SIZE); + qemu_put_buffer(rs->f, p, TARGET_PAGE_SIZE); } - *bytes_transferred += TARGET_PAGE_SIZE; + rs->bytes_transferred += TARGET_PAGE_SIZE; pages = 1; - acct_info.norm_pages++; + rs->norm_pages++; } XBZRLE_cache_unlock(); @@ -828,10 +879,11 @@ static int ram_save_page(MigrationState *ms, QEMUFile *f, PageSearchStatus *pss, static int do_compress_ram_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset) { + RAMState *rs = &ram_state; int bytes_sent, blen; uint8_t *p = block->host + (offset & TARGET_PAGE_MASK); - bytes_sent = save_page_header(f, block, offset | + bytes_sent = save_page_header(rs, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE); blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE, migrate_compress_level()); @@ -841,16 +893,13 @@ static int do_compress_ram_page(QEMUFile *f, RAMBlock *block, error_report("compressed data failed!"); } else { bytes_sent += blen; - ram_release_pages(migrate_get_current(), block->idstr, - offset & TARGET_PAGE_MASK, 1); + ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1); } return bytes_sent; } -static uint64_t bytes_transferred; - -static void flush_compressed_data(QEMUFile *f) +static void flush_compressed_data(RAMState *rs) { int idx, len, thread_count; @@ -870,8 +919,8 @@ static void flush_compressed_data(QEMUFile *f) for (idx = 0; idx < thread_count; idx++) { qemu_mutex_lock(&comp_param[idx].mutex); if (!comp_param[idx].quit) { - len = qemu_put_qemu_file(f, comp_param[idx].file); - bytes_transferred += len; + len = qemu_put_qemu_file(rs->f, comp_param[idx].file); + rs->bytes_transferred += len; } qemu_mutex_unlock(&comp_param[idx].mutex); } @@ -884,9 +933,8 @@ static inline void set_compress_params(CompressParam *param, RAMBlock *block, param->offset = offset; } -static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block, - ram_addr_t offset, - uint64_t *bytes_transferred) +static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block, + ram_addr_t offset) { int idx, thread_count, bytes_xmit = -1, pages = -1; @@ -896,14 +944,14 @@ static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block, for (idx = 0; idx < thread_count; idx++) { if (comp_param[idx].done) { comp_param[idx].done = false; - bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file); + bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file); qemu_mutex_lock(&comp_param[idx].mutex); set_compress_params(&comp_param[idx], block, offset); qemu_cond_signal(&comp_param[idx].cond); qemu_mutex_unlock(&comp_param[idx].mutex); pages = 1; - acct_info.norm_pages++; - *bytes_transferred += bytes_xmit; + rs->norm_pages++; + rs->bytes_transferred += bytes_xmit; break; } } @@ -921,40 +969,37 @@ static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block, /** * ram_save_compressed_page: compress the given page and send it to the stream * - * Returns: Number of pages written. + * Returns the number of pages written. * - * @ms: The current migration state. - * @f: QEMUFile where to send the data + * @rs: current RAM state * @block: block that contains the page we want to send * @offset: offset inside the block for the page * @last_stage: if we are at the completion stage - * @bytes_transferred: increase it with the number of transferred bytes */ -static int ram_save_compressed_page(MigrationState *ms, QEMUFile *f, - PageSearchStatus *pss, bool last_stage, - uint64_t *bytes_transferred) +static int ram_save_compressed_page(RAMState *rs, PageSearchStatus *pss, + bool last_stage) { int pages = -1; uint64_t bytes_xmit = 0; uint8_t *p; int ret, blen; RAMBlock *block = pss->block; - ram_addr_t offset = pss->offset; + ram_addr_t offset = pss->page << TARGET_PAGE_BITS; p = block->host + offset; - ret = ram_control_save_page(f, block->offset, + ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE, &bytes_xmit); if (bytes_xmit) { - *bytes_transferred += bytes_xmit; + rs->bytes_transferred += bytes_xmit; pages = 1; } if (ret != RAM_SAVE_CONTROL_NOT_SUPP) { if (ret != RAM_SAVE_CONTROL_DELAYED) { if (bytes_xmit > 0) { - acct_info.norm_pages++; + rs->norm_pages++; } else if (bytes_xmit == 0) { - acct_info.dup_pages++; + rs->zero_pages++; } } } else { @@ -964,35 +1009,33 @@ static int ram_save_compressed_page(MigrationState *ms, QEMUFile *f, * out, keeping this order is important, because the 'cont' flag * is used to avoid resending the block name. */ - if (block != last_sent_block) { - flush_compressed_data(f); - pages = save_zero_page(f, block, offset, p, bytes_transferred); + if (block != rs->last_sent_block) { + flush_compressed_data(rs); + pages = save_zero_page(rs, block, offset, p); if (pages == -1) { /* Make sure the first page is sent out before other pages */ - bytes_xmit = save_page_header(f, block, offset | + bytes_xmit = save_page_header(rs, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE); - blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE, + blen = qemu_put_compression_data(rs->f, p, TARGET_PAGE_SIZE, migrate_compress_level()); if (blen > 0) { - *bytes_transferred += bytes_xmit + blen; - acct_info.norm_pages++; + rs->bytes_transferred += bytes_xmit + blen; + rs->norm_pages++; pages = 1; } else { - qemu_file_set_error(f, blen); + qemu_file_set_error(rs->f, blen); error_report("compressed data failed!"); } } if (pages > 0) { - ram_release_pages(ms, block->idstr, pss->offset, pages); + ram_release_pages(block->idstr, offset, pages); } } else { - offset |= RAM_SAVE_FLAG_CONTINUE; - pages = save_zero_page(f, block, offset, p, bytes_transferred); + pages = save_zero_page(rs, block, offset, p); if (pages == -1) { - pages = compress_page_with_multi_thread(f, block, offset, - bytes_transferred); + pages = compress_page_with_multi_thread(rs, block, offset); } else { - ram_release_pages(ms, block->idstr, pss->offset, pages); + ram_release_pages(block->idstr, offset, pages); } } } @@ -1000,25 +1043,21 @@ static int ram_save_compressed_page(MigrationState *ms, QEMUFile *f, return pages; } -/* - * Find the next dirty page and update any state associated with - * the search process. +/** + * find_dirty_block: find the next dirty page and update any state + * associated with the search process. * - * Returns: True if a page is found + * Returns if a page is found * - * @f: Current migration stream. - * @pss: Data about the state of the current dirty page scan. - * @*again: Set to false if the search has scanned the whole of RAM - * *ram_addr_abs: Pointer into which to store the address of the dirty page - * within the global ram_addr space + * @rs: current RAM state + * @pss: data about the state of the current dirty page scan + * @again: set to false if the search has scanned the whole of RAM */ -static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss, - bool *again, ram_addr_t *ram_addr_abs) +static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again) { - pss->offset = migration_bitmap_find_dirty(pss->block, pss->offset, - ram_addr_abs); - if (pss->complete_round && pss->block == last_seen_block && - pss->offset >= last_offset) { + pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page); + if (pss->complete_round && pss->block == rs->last_seen_block && + pss->page >= rs->last_page) { /* * We've been once around the RAM and haven't found anything. * Give up. @@ -1026,22 +1065,21 @@ static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss, *again = false; return false; } - if (pss->offset >= pss->block->used_length) { + if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) { /* Didn't find anything in this RAM Block */ - pss->offset = 0; + pss->page = 0; pss->block = QLIST_NEXT_RCU(pss->block, next); if (!pss->block) { /* Hit the end of the list */ pss->block = QLIST_FIRST_RCU(&ram_list.blocks); /* Flag that we've looped */ pss->complete_round = true; - ram_bulk_stage = false; + rs->ram_bulk_stage = false; if (migrate_use_xbzrle()) { /* If xbzrle is on, stop using the data compression at this * point. In theory, xbzrle can do better than compression. */ - flush_compressed_data(f); - compression_switch = false; + flush_compressed_data(rs); } } /* Didn't find anything this time, but try again on the new block */ @@ -1055,61 +1093,59 @@ static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss, } } -/* +/** + * unqueue_page: gets a page of the queue + * * Helper for 'get_queued_page' - gets a page off the queue - * ms: MigrationState in - * *offset: Used to return the offset within the RAMBlock - * ram_addr_abs: global offset in the dirty/sent bitmaps * - * Returns: block (or NULL if none available) + * Returns the block of the page (or NULL if none available) + * + * @rs: current RAM state + * @offset: used to return the offset within the RAMBlock */ -static RAMBlock *unqueue_page(MigrationState *ms, ram_addr_t *offset, - ram_addr_t *ram_addr_abs) +static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset) { RAMBlock *block = NULL; - qemu_mutex_lock(&ms->src_page_req_mutex); - if (!QSIMPLEQ_EMPTY(&ms->src_page_requests)) { - struct MigrationSrcPageRequest *entry = - QSIMPLEQ_FIRST(&ms->src_page_requests); + qemu_mutex_lock(&rs->src_page_req_mutex); + if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) { + struct RAMSrcPageRequest *entry = + QSIMPLEQ_FIRST(&rs->src_page_requests); block = entry->rb; *offset = entry->offset; - *ram_addr_abs = (entry->offset + entry->rb->offset) & - TARGET_PAGE_MASK; if (entry->len > TARGET_PAGE_SIZE) { entry->len -= TARGET_PAGE_SIZE; entry->offset += TARGET_PAGE_SIZE; } else { memory_region_unref(block->mr); - QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req); + QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req); g_free(entry); } } - qemu_mutex_unlock(&ms->src_page_req_mutex); + qemu_mutex_unlock(&rs->src_page_req_mutex); return block; } -/* - * Unqueue a page from the queue fed by postcopy page requests; skips pages - * that are already sent (!dirty) +/** + * get_queued_page: unqueue a page from the postocpy requests + * + * Skips pages that are already sent (!dirty) * - * ms: MigrationState in - * pss: PageSearchStatus structure updated with found block/offset - * ram_addr_abs: global offset in the dirty/sent bitmaps + * Returns if a queued page is found * - * Returns: true if a queued page is found + * @rs: current RAM state + * @pss: data about the state of the current dirty page scan */ -static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss, - ram_addr_t *ram_addr_abs) +static bool get_queued_page(RAMState *rs, PageSearchStatus *pss) { RAMBlock *block; ram_addr_t offset; bool dirty; do { - block = unqueue_page(ms, &offset, ram_addr_abs); + block = unqueue_page(rs, &offset); /* * We're sending this page, and since it's postcopy nothing else * will dirty it, and we must make sure it doesn't get sent again @@ -1118,18 +1154,18 @@ static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss, */ if (block) { unsigned long *bitmap; - bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap; - dirty = test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, bitmap); + unsigned long page; + + bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap; + page = (block->offset + offset) >> TARGET_PAGE_BITS; + dirty = test_bit(page, bitmap); if (!dirty) { - trace_get_queued_page_not_dirty( - block->idstr, (uint64_t)offset, - (uint64_t)*ram_addr_abs, - test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, - atomic_rcu_read(&migration_bitmap_rcu)->unsentmap)); + trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset, + page, + test_bit(page, + atomic_rcu_read(&rs->ram_bitmap)->unsentmap)); } else { - trace_get_queued_page(block->idstr, - (uint64_t)offset, - (uint64_t)*ram_addr_abs); + trace_get_queued_page(block->idstr, (uint64_t)offset, page); } } @@ -1142,7 +1178,7 @@ static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss, * in (migration_bitmap_find_and_reset_dirty) that every page is * dirty, that's no longer true. */ - ram_bulk_stage = false; + rs->ram_bulk_stage = false; /* * We want the background search to continue from the queued page @@ -1150,52 +1186,58 @@ static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss, * it just requested. */ pss->block = block; - pss->offset = offset; + pss->page = offset >> TARGET_PAGE_BITS; } return !!block; } /** - * flush_page_queue: Flush any remaining pages in the ram request queue - * it should be empty at the end anyway, but in error cases there may be - * some left. + * migration_page_queue_free: drop any remaining pages in the ram + * request queue + * + * It should be empty at the end anyway, but in error cases there may + * be some left. in case that there is any page left, we drop it. * - * ms: MigrationState */ -void flush_page_queue(MigrationState *ms) +void migration_page_queue_free(void) { - struct MigrationSrcPageRequest *mspr, *next_mspr; + struct RAMSrcPageRequest *mspr, *next_mspr; + RAMState *rs = &ram_state; /* This queue generally should be empty - but in the case of a failed * migration might have some droppings in. */ rcu_read_lock(); - QSIMPLEQ_FOREACH_SAFE(mspr, &ms->src_page_requests, next_req, next_mspr) { + QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) { memory_region_unref(mspr->rb->mr); - QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req); + QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req); g_free(mspr); } rcu_read_unlock(); } /** - * Queue the pages for transmission, e.g. a request from postcopy destination - * ms: MigrationStatus in which the queue is held - * rbname: The RAMBlock the request is for - may be NULL (to mean reuse last) - * start: Offset from the start of the RAMBlock - * len: Length (in bytes) to send - * Return: 0 on success + * ram_save_queue_pages: queue the page for transmission + * + * A request from postcopy destination for example. + * + * Returns zero on success or negative on error + * + * @rbname: Name of the RAMBLock of the request. NULL means the + * same that last one. + * @start: starting address from the start of the RAMBlock + * @len: length (in bytes) to send */ -int ram_save_queue_pages(MigrationState *ms, const char *rbname, - ram_addr_t start, ram_addr_t len) +int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) { RAMBlock *ramblock; + RAMState *rs = &ram_state; - ms->postcopy_requests++; + rs->postcopy_requests++; rcu_read_lock(); if (!rbname) { /* Reuse last RAMBlock */ - ramblock = ms->last_req_rb; + ramblock = rs->last_req_rb; if (!ramblock) { /* @@ -1213,7 +1255,7 @@ int ram_save_queue_pages(MigrationState *ms, const char *rbname, error_report("ram_save_queue_pages no block '%s'", rbname); goto err; } - ms->last_req_rb = ramblock; + rs->last_req_rb = ramblock; } trace_ram_save_queue_pages(ramblock->idstr, start, len); if (start+len > ramblock->used_length) { @@ -1223,16 +1265,16 @@ int ram_save_queue_pages(MigrationState *ms, const char *rbname, goto err; } - struct MigrationSrcPageRequest *new_entry = - g_malloc0(sizeof(struct MigrationSrcPageRequest)); + struct RAMSrcPageRequest *new_entry = + g_malloc0(sizeof(struct RAMSrcPageRequest)); new_entry->rb = ramblock; new_entry->offset = start; new_entry->len = len; memory_region_ref(ramblock->mr); - qemu_mutex_lock(&ms->src_page_req_mutex); - QSIMPLEQ_INSERT_TAIL(&ms->src_page_requests, new_entry, next_req); - qemu_mutex_unlock(&ms->src_page_req_mutex); + qemu_mutex_lock(&rs->src_page_req_mutex); + QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req); + qemu_mutex_unlock(&rs->src_page_req_mutex); rcu_read_unlock(); return 0; @@ -1243,51 +1285,43 @@ err: } /** - * ram_save_target_page: Save one target page + * ram_save_target_page: save one target page * + * Returns the number of pages written * - * @f: QEMUFile where to send the data - * @block: pointer to block that contains the page we want to send - * @offset: offset inside the block for the page; + * @rs: current RAM state + * @ms: current migration state + * @pss: data about the page we want to send * @last_stage: if we are at the completion stage - * @bytes_transferred: increase it with the number of transferred bytes - * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space - * - * Returns: Number of pages written. */ -static int ram_save_target_page(MigrationState *ms, QEMUFile *f, - PageSearchStatus *pss, - bool last_stage, - uint64_t *bytes_transferred, - ram_addr_t dirty_ram_abs) +static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss, + bool last_stage) { int res = 0; /* Check the pages is dirty and if it is send it */ - if (migration_bitmap_clear_dirty(dirty_ram_abs)) { + if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) { unsigned long *unsentmap; - if (compression_switch && migrate_use_compression()) { - res = ram_save_compressed_page(ms, f, pss, - last_stage, - bytes_transferred); + /* + * If xbzrle is on, stop using the data compression after first + * round of migration even if compression is enabled. In theory, + * xbzrle can do better than compression. + */ + unsigned long page = + (pss->block->offset >> TARGET_PAGE_BITS) + pss->page; + if (migrate_use_compression() + && (rs->ram_bulk_stage || !migrate_use_xbzrle())) { + res = ram_save_compressed_page(rs, pss, last_stage); } else { - res = ram_save_page(ms, f, pss, last_stage, - bytes_transferred); + res = ram_save_page(rs, pss, last_stage); } if (res < 0) { return res; } - unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap; + unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap; if (unsentmap) { - clear_bit(dirty_ram_abs >> TARGET_PAGE_BITS, unsentmap); - } - /* Only update last_sent_block if a block was actually sent; xbzrle - * might have decided the page was identical so didn't bother writing - * to the stream. - */ - if (res > 0) { - last_sent_block = pss->block; + clear_bit(page, unsentmap); } } @@ -1295,83 +1329,70 @@ static int ram_save_target_page(MigrationState *ms, QEMUFile *f, } /** - * ram_save_host_page: Starting at *offset send pages up to the end - * of the current host page. It's valid for the initial - * offset to point into the middle of a host page - * in which case the remainder of the hostpage is sent. - * Only dirty target pages are sent. - * Note that the host page size may be a huge page for this - * block. + * ram_save_host_page: save a whole host page * - * Returns: Number of pages written. + * Starting at *offset send pages up to the end of the current host + * page. It's valid for the initial offset to point into the middle of + * a host page in which case the remainder of the hostpage is sent. + * Only dirty target pages are sent. Note that the host page size may + * be a huge page for this block. * - * @f: QEMUFile where to send the data - * @block: pointer to block that contains the page we want to send - * @offset: offset inside the block for the page; updated to last target page - * sent + * Returns the number of pages written or negative on error + * + * @rs: current RAM state + * @ms: current migration state + * @pss: data about the page we want to send * @last_stage: if we are at the completion stage - * @bytes_transferred: increase it with the number of transferred bytes - * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space */ -static int ram_save_host_page(MigrationState *ms, QEMUFile *f, - PageSearchStatus *pss, - bool last_stage, - uint64_t *bytes_transferred, - ram_addr_t dirty_ram_abs) +static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, + bool last_stage) { int tmppages, pages = 0; - size_t pagesize = qemu_ram_pagesize(pss->block); + size_t pagesize_bits = + qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS; do { - tmppages = ram_save_target_page(ms, f, pss, last_stage, - bytes_transferred, dirty_ram_abs); + tmppages = ram_save_target_page(rs, pss, last_stage); if (tmppages < 0) { return tmppages; } pages += tmppages; - pss->offset += TARGET_PAGE_SIZE; - dirty_ram_abs += TARGET_PAGE_SIZE; - } while (pss->offset & (pagesize - 1)); + pss->page++; + } while (pss->page & (pagesize_bits - 1)); /* The offset we leave with is the last one we looked at */ - pss->offset -= TARGET_PAGE_SIZE; + pss->page--; return pages; } /** - * ram_find_and_save_block: Finds a dirty page and sends it to f + * ram_find_and_save_block: finds a dirty page and sends it to f * * Called within an RCU critical section. * - * Returns: The number of pages written - * 0 means no dirty pages + * Returns the number of pages written where zero means no dirty pages * - * @f: QEMUFile where to send the data + * @rs: current RAM state * @last_stage: if we are at the completion stage - * @bytes_transferred: increase it with the number of transferred bytes * * On systems where host-page-size > target-page-size it will send all the * pages in a host page that are dirty. */ -static int ram_find_and_save_block(QEMUFile *f, bool last_stage, - uint64_t *bytes_transferred) +static int ram_find_and_save_block(RAMState *rs, bool last_stage) { PageSearchStatus pss; - MigrationState *ms = migrate_get_current(); int pages = 0; bool again, found; - ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in - ram_addr_t space */ /* No dirty page as there is zero RAM */ if (!ram_bytes_total()) { return pages; } - pss.block = last_seen_block; - pss.offset = last_offset; + pss.block = rs->last_seen_block; + pss.page = rs->last_page; pss.complete_round = false; if (!pss.block) { @@ -1380,22 +1401,20 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage, do { again = true; - found = get_queued_page(ms, &pss, &dirty_ram_abs); + found = get_queued_page(rs, &pss); if (!found) { /* priority queue empty, so just search for something dirty */ - found = find_dirty_block(f, &pss, &again, &dirty_ram_abs); + found = find_dirty_block(rs, &pss, &again); } if (found) { - pages = ram_save_host_page(ms, f, &pss, - last_stage, bytes_transferred, - dirty_ram_abs); + pages = ram_save_host_page(rs, &pss, last_stage); } } while (!pages && again); - last_seen_block = pss.block; - last_offset = pss.offset; + rs->last_seen_block = pss.block; + rs->last_page = pss.page; return pages; } @@ -1403,30 +1422,17 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage, void acct_update_position(QEMUFile *f, size_t size, bool zero) { uint64_t pages = size / TARGET_PAGE_SIZE; + RAMState *rs = &ram_state; + if (zero) { - acct_info.dup_pages += pages; + rs->zero_pages += pages; } else { - acct_info.norm_pages += pages; - bytes_transferred += size; + rs->norm_pages += pages; + rs->bytes_transferred += size; qemu_update_position(f, size); } } -static ram_addr_t ram_save_remaining(void) -{ - return migration_dirty_pages; -} - -uint64_t ram_bytes_remaining(void) -{ - return ram_save_remaining() * TARGET_PAGE_SIZE; -} - -uint64_t ram_bytes_transferred(void) -{ - return bytes_transferred; -} - uint64_t ram_bytes_total(void) { RAMBlock *block; @@ -1445,7 +1451,7 @@ void free_xbzrle_decoded_buf(void) xbzrle_decoded_buf = NULL; } -static void migration_bitmap_free(struct BitmapRcu *bmap) +static void migration_bitmap_free(RAMBitmap *bmap) { g_free(bmap->bmap); g_free(bmap->unsentmap); @@ -1454,11 +1460,13 @@ static void migration_bitmap_free(struct BitmapRcu *bmap) static void ram_migration_cleanup(void *opaque) { + RAMState *rs = opaque; + /* caller have hold iothread lock or is in a bh, so there is * no writing race against this migration_bitmap */ - struct BitmapRcu *bitmap = migration_bitmap_rcu; - atomic_rcu_set(&migration_bitmap_rcu, NULL); + RAMBitmap *bitmap = rs->ram_bitmap; + atomic_rcu_set(&rs->ram_bitmap, NULL); if (bitmap) { memory_global_dirty_log_stop(); call_rcu(bitmap, migration_bitmap_free, rcu); @@ -1477,49 +1485,17 @@ static void ram_migration_cleanup(void *opaque) XBZRLE_cache_unlock(); } -static void reset_ram_globals(void) +static void ram_state_reset(RAMState *rs) { - last_seen_block = NULL; - last_sent_block = NULL; - last_offset = 0; - last_version = ram_list.version; - ram_bulk_stage = true; + rs->last_seen_block = NULL; + rs->last_sent_block = NULL; + rs->last_page = 0; + rs->last_version = ram_list.version; + rs->ram_bulk_stage = true; } #define MAX_WAIT 50 /* ms, half buffered_file limit */ -void migration_bitmap_extend(ram_addr_t old, ram_addr_t new) -{ - /* called in qemu main thread, so there is - * no writing race against this migration_bitmap - */ - if (migration_bitmap_rcu) { - struct BitmapRcu *old_bitmap = migration_bitmap_rcu, *bitmap; - bitmap = g_new(struct BitmapRcu, 1); - bitmap->bmap = bitmap_new(new); - - /* prevent migration_bitmap content from being set bit - * by migration_bitmap_sync_range() at the same time. - * it is safe to migration if migration_bitmap is cleared bit - * at the same time. - */ - qemu_mutex_lock(&migration_bitmap_mutex); - bitmap_copy(bitmap->bmap, old_bitmap->bmap, old); - bitmap_set(bitmap->bmap, old, new - old); - - /* We don't have a way to safely extend the sentmap - * with RCU; so mark it as missing, entry to postcopy - * will fail. - */ - bitmap->unsentmap = NULL; - - atomic_rcu_set(&migration_bitmap_rcu, bitmap); - qemu_mutex_unlock(&migration_bitmap_mutex); - migration_dirty_pages += new - old; - call_rcu(old_bitmap, migration_bitmap_free, rcu); - } -} - /* * 'expected' is the value you expect the bitmap mostly to be full * of; it won't bother printing lines that are all this value. @@ -1527,14 +1503,14 @@ void migration_bitmap_extend(ram_addr_t old, ram_addr_t new) */ void ram_debug_dump_bitmap(unsigned long *todump, bool expected) { - int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS; - + unsigned long ram_pages = last_ram_page(); + RAMState *rs = &ram_state; int64_t cur; int64_t linelen = 128; char linebuf[129]; if (!todump) { - todump = atomic_rcu_read(&migration_bitmap_rcu)->bmap; + todump = atomic_rcu_read(&rs->ram_bitmap)->bmap; } for (cur = 0; cur < ram_pages; cur += linelen) { @@ -1563,8 +1539,9 @@ void ram_debug_dump_bitmap(unsigned long *todump, bool expected) void ram_postcopy_migrated_memory_release(MigrationState *ms) { + RAMState *rs = &ram_state; struct RAMBlock *block; - unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap; + unsigned long *bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap; QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { unsigned long first = block->offset >> TARGET_PAGE_BITS; @@ -1573,30 +1550,38 @@ void ram_postcopy_migrated_memory_release(MigrationState *ms) while (run_start < range) { unsigned long run_end = find_next_bit(bitmap, range, run_start + 1); - ram_discard_range(NULL, block->idstr, run_start << TARGET_PAGE_BITS, + ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS, (run_end - run_start) << TARGET_PAGE_BITS); run_start = find_next_zero_bit(bitmap, range, run_end + 1); } } } -/* +/** + * postcopy_send_discard_bm_ram: discard a RAMBlock + * + * Returns zero on success + * * Callback from postcopy_each_ram_send_discard for each RAMBlock * Note: At this point the 'unsentmap' is the processed bitmap combined * with the dirtymap; so a '1' means it's either dirty or unsent. - * start,length: Indexes into the bitmap for the first bit - * representing the named block and length in target-pages + * + * @ms: current migration state + * @pds: state for postcopy + * @start: RAMBlock starting page + * @length: RAMBlock size */ static int postcopy_send_discard_bm_ram(MigrationState *ms, PostcopyDiscardState *pds, unsigned long start, unsigned long length) { + RAMState *rs = &ram_state; unsigned long end = start + length; /* one after the end */ unsigned long current; unsigned long *unsentmap; - unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap; + unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap; for (current = start; current < end; ) { unsigned long one = find_next_bit(unsentmap, end, current); @@ -1621,13 +1606,18 @@ static int postcopy_send_discard_bm_ram(MigrationState *ms, return 0; } -/* +/** + * postcopy_each_ram_send_discard: discard all RAMBlocks + * + * Returns 0 for success or negative for error + * * Utility for the outgoing postcopy code. * Calls postcopy_send_discard_bm_ram for each RAMBlock * passing it bitmap indexes and name. - * Returns: 0 on success * (qemu_ram_foreach_block ends up passing unscaled lengths * which would mean postcopy code would have to deal with target page) + * + * @ms: current migration state */ static int postcopy_each_ram_send_discard(MigrationState *ms) { @@ -1656,22 +1646,27 @@ static int postcopy_each_ram_send_discard(MigrationState *ms) return 0; } -/* - * Helper for postcopy_chunk_hostpages; it's called twice to cleanup - * the two bitmaps, that are similar, but one is inverted. +/** + * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages + * + * Helper for postcopy_chunk_hostpages; it's called twice to + * canonicalize the two bitmaps, that are similar, but one is + * inverted. * - * We search for runs of target-pages that don't start or end on a - * host page boundary; - * unsent_pass=true: Cleans up partially unsent host pages by searching - * the unsentmap - * unsent_pass=false: Cleans up partially dirty host pages by searching - * the main migration bitmap + * Postcopy requires that all target pages in a hostpage are dirty or + * clean, not a mix. This function canonicalizes the bitmaps. * + * @ms: current migration state + * @unsent_pass: if true we need to canonicalize partially unsent host pages + * otherwise we need to canonicalize partially dirty host pages + * @block: block that contains the page we want to canonicalize + * @pds: state for postcopy */ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, RAMBlock *block, PostcopyDiscardState *pds) { + RAMState *rs = &ram_state; unsigned long *bitmap; unsigned long *unsentmap; unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE; @@ -1685,8 +1680,8 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, return; } - bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap; - unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap; + bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap; + unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap; if (unsent_pass) { /* Find a sent page */ @@ -1769,7 +1764,7 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, * Remark them as dirty, updating the count for any pages * that weren't previously dirty. */ - migration_dirty_pages += !test_and_set_bit(page, bitmap); + rs->migration_dirty_pages += !test_and_set_bit(page, bitmap); } } @@ -1784,23 +1779,28 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass, } } -/* +/** + * postcopy_chuck_hostpages: discrad any partially sent host page + * * Utility for the outgoing postcopy code. * * Discard any partially sent host-page size chunks, mark any partially * dirty host-page size chunks as all dirty. In this case the host-page * is the host-page for the particular RAMBlock, i.e. it might be a huge page * - * Returns: 0 on success + * Returns zero on success + * + * @ms: current migration state */ static int postcopy_chunk_hostpages(MigrationState *ms) { + RAMState *rs = &ram_state; struct RAMBlock *block; /* Easiest way to make sure we don't resume in the middle of a host-page */ - last_seen_block = NULL; - last_sent_block = NULL; - last_offset = 0; + rs->last_seen_block = NULL; + rs->last_sent_block = NULL; + rs->last_page = 0; QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { unsigned long first = block->offset >> TARGET_PAGE_BITS; @@ -1822,7 +1822,11 @@ static int postcopy_chunk_hostpages(MigrationState *ms) return 0; } -/* +/** + * ram_postcopy_send_discard_bitmap: transmit the discard bitmap + * + * Returns zero on success + * * Transmit the set of pages to be discarded after precopy to the target * these are pages that: * a) Have been previously transmitted but are now dirty again @@ -1830,18 +1834,21 @@ static int postcopy_chunk_hostpages(MigrationState *ms) * any pages on the destination that have been mapped by background * tasks get discarded (transparent huge pages is the specific concern) * Hopefully this is pretty sparse + * + * @ms: current migration state */ int ram_postcopy_send_discard_bitmap(MigrationState *ms) { + RAMState *rs = &ram_state; int ret; unsigned long *bitmap, *unsentmap; rcu_read_lock(); /* This should be our last sync, the src is now paused */ - migration_bitmap_sync(); + migration_bitmap_sync(rs); - unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap; + unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap; if (!unsentmap) { /* We don't have a safe way to resize the sentmap, so * if the bitmap was resized it will be NULL at this @@ -1862,9 +1869,8 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms) /* * Update the unsentmap to be unsentmap = unsentmap | dirty */ - bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap; - bitmap_or(unsentmap, unsentmap, bitmap, - last_ram_offset() >> TARGET_PAGE_BITS); + bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap; + bitmap_or(unsentmap, unsentmap, bitmap, last_ram_page()); trace_ram_postcopy_send_discard_bitmap(); @@ -1878,28 +1884,27 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms) return ret; } -/* - * At the start of the postcopy phase of migration, any now-dirty - * precopied pages are discarded. +/** + * ram_discard_range: discard dirtied pages at the beginning of postcopy * - * start, length describe a byte address range within the RAMBlock + * Returns zero on success * - * Returns 0 on success. + * @rbname: name of the RAMBlock of the request. NULL means the + * same that last one. + * @start: RAMBlock starting page + * @length: RAMBlock size */ -int ram_discard_range(MigrationIncomingState *mis, - const char *block_name, - uint64_t start, size_t length) +int ram_discard_range(const char *rbname, uint64_t start, size_t length) { int ret = -1; - trace_ram_discard_range(block_name, start, length); + trace_ram_discard_range(rbname, start, length); rcu_read_lock(); - RAMBlock *rb = qemu_ram_block_by_name(block_name); + RAMBlock *rb = qemu_ram_block_by_name(rbname); if (!rb) { - error_report("ram_discard_range: Failed to find block '%s'", - block_name); + error_report("ram_discard_range: Failed to find block '%s'", rbname); goto err; } @@ -1911,14 +1916,14 @@ err: return ret; } -static int ram_save_init_globals(void) +static int ram_state_init(RAMState *rs) { - int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */ + unsigned long ram_bitmap_pages; - dirty_rate_high_cnt = 0; - bitmap_sync_count = 0; - migration_bitmap_sync_init(); - qemu_mutex_init(&migration_bitmap_mutex); + memset(rs, 0, sizeof(*rs)); + qemu_mutex_init(&rs->bitmap_mutex); + qemu_mutex_init(&rs->src_page_req_mutex); + QSIMPLEQ_INIT(&rs->src_page_requests); if (migrate_use_xbzrle()) { XBZRLE_cache_lock(); @@ -1947,8 +1952,6 @@ static int ram_save_init_globals(void) XBZRLE.encoded_buf = NULL; return -1; } - - acct_clear(); } /* For memory_global_dirty_log_start below. */ @@ -1956,19 +1959,18 @@ static int ram_save_init_globals(void) qemu_mutex_lock_ramlist(); rcu_read_lock(); - bytes_transferred = 0; - reset_ram_globals(); + ram_state_reset(rs); - migration_bitmap_rcu = g_new0(struct BitmapRcu, 1); + rs->ram_bitmap = g_new0(RAMBitmap, 1); /* Skip setting bitmap if there is no RAM */ if (ram_bytes_total()) { - ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS; - migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages); - bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages); + ram_bitmap_pages = last_ram_page(); + rs->ram_bitmap->bmap = bitmap_new(ram_bitmap_pages); + bitmap_set(rs->ram_bitmap->bmap, 0, ram_bitmap_pages); if (migrate_postcopy_ram()) { - migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages); - bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages); + rs->ram_bitmap->unsentmap = bitmap_new(ram_bitmap_pages); + bitmap_set(rs->ram_bitmap->unsentmap, 0, ram_bitmap_pages); } } @@ -1976,10 +1978,10 @@ static int ram_save_init_globals(void) * Count the total number of pages used by ram blocks not including any * gaps due to alignment or unplugs. */ - migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS; + rs->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS; memory_global_dirty_log_start(); - migration_bitmap_sync(); + migration_bitmap_sync(rs); qemu_mutex_unlock_ramlist(); qemu_mutex_unlock_iothread(); rcu_read_unlock(); @@ -1987,22 +1989,33 @@ static int ram_save_init_globals(void) return 0; } -/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has +/* + * Each of ram_save_setup, ram_save_iterate and ram_save_complete has * long-running RCU critical section. When rcu-reclaims in the code * start to become numerous it will be necessary to reduce the * granularity of these critical sections. */ +/** + * ram_save_setup: Setup RAM for migration + * + * Returns zero to indicate success and negative for error + * + * @f: QEMUFile where to send the data + * @opaque: RAMState pointer + */ static int ram_save_setup(QEMUFile *f, void *opaque) { + RAMState *rs = opaque; RAMBlock *block; /* migration has already setup the bitmap, reuse it. */ if (!migration_in_colo_state()) { - if (ram_save_init_globals() < 0) { + if (ram_state_init(rs) < 0) { return -1; } } + rs->f = f; rcu_read_lock(); @@ -2027,16 +2040,25 @@ static int ram_save_setup(QEMUFile *f, void *opaque) return 0; } +/** + * ram_save_iterate: iterative stage for migration + * + * Returns zero to indicate success and negative for error + * + * @f: QEMUFile where to send the data + * @opaque: RAMState pointer + */ static int ram_save_iterate(QEMUFile *f, void *opaque) { + RAMState *rs = opaque; int ret; int i; int64_t t0; int done = 0; rcu_read_lock(); - if (ram_list.version != last_version) { - reset_ram_globals(); + if (ram_list.version != rs->last_version) { + ram_state_reset(rs); } /* Read version before ram_list.blocks */ @@ -2049,13 +2071,13 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) while ((ret = qemu_file_rate_limit(f)) == 0) { int pages; - pages = ram_find_and_save_block(f, false, &bytes_transferred); + pages = ram_find_and_save_block(rs, false); /* no more pages to sent */ if (pages == 0) { done = 1; break; } - acct_info.iterations++; + rs->iterations++; /* we want to check in the 1st loop, just in case it was the 1st time and we had to sync the dirty bitmap. @@ -2071,7 +2093,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) } i++; } - flush_compressed_data(f); + flush_compressed_data(rs); rcu_read_unlock(); /* @@ -2081,7 +2103,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) ram_control_after_iterate(f, RAM_CONTROL_ROUND); qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - bytes_transferred += 8; + rs->bytes_transferred += 8; ret = qemu_file_get_error(f); if (ret < 0) { @@ -2091,13 +2113,24 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) return done; } -/* Called with iothread lock */ +/** + * ram_save_complete: function called to send the remaining amount of ram + * + * Returns zero to indicate success + * + * Called with iothread lock + * + * @f: QEMUFile where to send the data + * @opaque: RAMState pointer + */ static int ram_save_complete(QEMUFile *f, void *opaque) { + RAMState *rs = opaque; + rcu_read_lock(); - if (!migration_in_postcopy(migrate_get_current())) { - migration_bitmap_sync(); + if (!migration_in_postcopy()) { + migration_bitmap_sync(rs); } ram_control_before_iterate(f, RAM_CONTROL_FINISH); @@ -2108,15 +2141,14 @@ static int ram_save_complete(QEMUFile *f, void *opaque) while (true) { int pages; - pages = ram_find_and_save_block(f, !migration_in_colo_state(), - &bytes_transferred); + pages = ram_find_and_save_block(rs, !migration_in_colo_state()); /* no more blocks to sent */ if (pages == 0) { break; } } - flush_compressed_data(f); + flush_compressed_data(rs); ram_control_after_iterate(f, RAM_CONTROL_FINISH); rcu_read_unlock(); @@ -2130,18 +2162,19 @@ static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size, uint64_t *non_postcopiable_pending, uint64_t *postcopiable_pending) { + RAMState *rs = opaque; uint64_t remaining_size; - remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; + remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE; - if (!migration_in_postcopy(migrate_get_current()) && + if (!migration_in_postcopy() && remaining_size < max_size) { qemu_mutex_lock_iothread(); rcu_read_lock(); - migration_bitmap_sync(); + migration_bitmap_sync(rs); rcu_read_unlock(); qemu_mutex_unlock_iothread(); - remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; + remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE; } /* We can do postcopy, and all the data is postcopiable */ @@ -2185,17 +2218,17 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) return 0; } -/* Must be called from within a rcu critical section. +/** + * ram_block_from_stream: read a RAMBlock id from the migration stream + * + * Must be called from within a rcu critical section. + * * Returns a pointer from within the RCU-protected ram_list. - */ -/* - * Read a RAMBlock ID from the stream f. * - * f: Stream to read from - * flags: Page flags (mostly to see if it's a continuation of previous block) + * @f: QEMUFile where to read the data from + * @flags: Page flags (mostly to see if it's a continuation of previous block) */ -static inline RAMBlock *ram_block_from_stream(QEMUFile *f, - int flags) +static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags) { static RAMBlock *block = NULL; char id[256]; @@ -2232,9 +2265,15 @@ static inline void *host_from_ram_block_offset(RAMBlock *block, return block->host + offset; } -/* +/** + * ram_handle_compressed: handle the zero page case + * * If a page (or a whole RDMA chunk) has been * determined to be zero, then zap it. + * + * @host: host address for the zero page + * @ch: what the page is filled from. We only support zero + * @size: size of the zero page */ void ram_handle_compressed(void *host, uint8_t ch, uint64_t size) { @@ -2373,20 +2412,33 @@ static void decompress_data_with_multi_threads(QEMUFile *f, qemu_mutex_unlock(&decomp_done_lock); } -/* - * Allocate data structures etc needed by incoming migration with postcopy-ram - * postcopy-ram's similarly names postcopy_ram_incoming_init does the work +/** + * ram_postcopy_incoming_init: allocate postcopy data structures + * + * Returns 0 for success and negative if there was one error + * + * @mis: current migration incoming state + * + * Allocate data structures etc needed by incoming migration with + * postcopy-ram. postcopy-ram's similarly names + * postcopy_ram_incoming_init does the work. */ int ram_postcopy_incoming_init(MigrationIncomingState *mis) { - size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS; + unsigned long ram_pages = last_ram_page(); return postcopy_ram_incoming_init(mis, ram_pages); } -/* +/** + * ram_load_postcopy: load a page in postcopy case + * + * Returns 0 for success or -errno in case of error + * * Called in postcopy mode by ram_load(). * rcu_read_lock is taken prior to this being called. + * + * @f: QEMUFile where to send the data */ static int ram_load_postcopy(QEMUFile *f) { @@ -2673,5 +2725,5 @@ static SaveVMHandlers savevm_ram_handlers = { void ram_mig_init(void) { qemu_mutex_init(&XBZRLE.lock); - register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL); + register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state); } diff --git a/migration/rdma.c b/migration/rdma.c index 674ccab12e..fe0a4b5a83 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -809,7 +809,7 @@ static void qemu_rdma_dump_gid(const char *who, struct rdma_cm_id *id) * * Patches are being reviewed on linux-rdma. */ -static int qemu_rdma_broken_ipv6_kernel(Error **errp, struct ibv_context *verbs) +static int qemu_rdma_broken_ipv6_kernel(struct ibv_context *verbs, Error **errp) { struct ibv_port_attr port_attr; @@ -950,7 +950,7 @@ static int qemu_rdma_resolve_host(RDMAContext *rdma, Error **errp) RDMA_RESOLVE_TIMEOUT_MS); if (!ret) { if (e->ai_family == AF_INET6) { - ret = qemu_rdma_broken_ipv6_kernel(errp, rdma->cm_id->verbs); + ret = qemu_rdma_broken_ipv6_kernel(rdma->cm_id->verbs, errp); if (ret) { continue; } @@ -2277,7 +2277,7 @@ static void qemu_rdma_cleanup(RDMAContext *rdma) } -static int qemu_rdma_source_init(RDMAContext *rdma, Error **errp, bool pin_all) +static int qemu_rdma_source_init(RDMAContext *rdma, bool pin_all, Error **errp) { int ret, idx; Error *local_err = NULL, **temp = &local_err; @@ -2469,7 +2469,7 @@ static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp) continue; } if (e->ai_family == AF_INET6) { - ret = qemu_rdma_broken_ipv6_kernel(errp, listen_id->verbs); + ret = qemu_rdma_broken_ipv6_kernel(listen_id->verbs, errp); if (ret) { continue; } @@ -3676,8 +3676,8 @@ void rdma_start_outgoing_migration(void *opaque, goto err; } - ret = qemu_rdma_source_init(rdma, errp, - s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL]); + ret = qemu_rdma_source_init(rdma, + s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); if (ret) { goto err; diff --git a/migration/savevm.c b/migration/savevm.c index 3b19a4a274..03ae1bdeb4 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -871,7 +871,7 @@ void qemu_savevm_send_postcopy_advise(QEMUFile *f) { uint64_t tmp[2]; tmp[0] = cpu_to_be64(ram_pagesize_summary()); - tmp[1] = cpu_to_be64(1ul << qemu_target_page_bits()); + tmp[1] = cpu_to_be64(qemu_target_page_size()); trace_qemu_savevm_send_postcopy_advise(); qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 16, (uint8_t *)tmp); @@ -1062,7 +1062,7 @@ int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy) static bool should_send_vmdesc(void) { MachineState *machine = MACHINE(qdev_get_machine()); - bool in_postcopy = migration_in_postcopy(migrate_get_current()); + bool in_postcopy = migration_in_postcopy(); return !machine->suppress_vmdesc && !in_postcopy; } @@ -1111,7 +1111,7 @@ void qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only) int vmdesc_len; SaveStateEntry *se; int ret; - bool in_postcopy = migration_in_postcopy(migrate_get_current()); + bool in_postcopy = migration_in_postcopy(); trace_savevm_state_complete_precopy(); @@ -1197,7 +1197,7 @@ void qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only) * the result is split into the amount for units that can and * for units that can't do postcopy. */ -void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size, +void qemu_savevm_state_pending(QEMUFile *f, uint64_t threshold_size, uint64_t *res_non_postcopiable, uint64_t *res_postcopiable) { @@ -1216,7 +1216,7 @@ void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size, continue; } } - se->ops->save_live_pending(f, se->opaque, max_size, + se->ops->save_live_pending(f, se->opaque, threshold_size, res_non_postcopiable, res_postcopiable); } } @@ -1390,13 +1390,13 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis) } remote_tps = qemu_get_be64(mis->from_src_file); - if (remote_tps != (1ul << qemu_target_page_bits())) { + if (remote_tps != qemu_target_page_size()) { /* * Again, some differences could be dealt with, but for now keep it * simple. */ - error_report("Postcopy needs matching target page sizes (s=%d d=%d)", - (int)remote_tps, 1 << qemu_target_page_bits()); + error_report("Postcopy needs matching target page sizes (s=%d d=%zd)", + (int)remote_tps, qemu_target_page_size()); return -1; } @@ -1479,8 +1479,7 @@ static int loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis, block_length = qemu_get_be64(mis->from_src_file); len -= 16; - int ret = ram_discard_range(mis, ramid, start_addr, - block_length); + int ret = ram_discard_range(ramid, start_addr, block_length); if (ret) { return ret; } diff --git a/migration/trace-events b/migration/trace-events index 7372ce2a51..b8f01a218c 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -63,8 +63,8 @@ put_qtailq_end(const char *name, const char *reason) "%s %s" qemu_file_fclose(void) "" # migration/ram.c -get_queued_page(const char *block_name, uint64_t tmp_offset, uint64_t ram_addr) "%s/%" PRIx64 " ram_addr=%" PRIx64 -get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, uint64_t ram_addr, int sent) "%s/%" PRIx64 " ram_addr=%" PRIx64 " (sent=%d)" +get_queued_page(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/%" PRIx64 " page_abs=%lx" +get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, unsigned long page_abs, int sent) "%s/%" PRIx64 " page_abs=%lx (sent=%d)" migration_bitmap_sync_start(void) "" migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64 migration_throttle(void) "" diff --git a/nbd/client.c b/nbd/client.c index 3dc2564cd0..a58fb02cb4 100644 --- a/nbd/client.c +++ b/nbd/client.c @@ -812,6 +812,6 @@ ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply) LOG("invalid magic (got 0x%" PRIx32 ")", magic); return -EINVAL; } - return 0; + return sizeof(buf); } diff --git a/net/colo-compare.c b/net/colo-compare.c index 54e6d40525..03ddebe5d3 100644 --- a/net/colo-compare.c +++ b/net/colo-compare.c @@ -233,24 +233,54 @@ static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt) spkt->ip->ip_sum = ppkt->ip->ip_sum; } - if (ptcp->th_sum == stcp->th_sum) { + /* + * Check tcp header length for tcp option field. + * th_off > 5 means this tcp packet have options field. + * The tcp options maybe always different. + * for example: + * From RFC 7323. + * TCP Timestamps option (TSopt): + * Kind: 8 + * + * Length: 10 bytes + * + * +-------+-------+---------------------+---------------------+ + * |Kind=8 | 10 | TS Value (TSval) |TS Echo Reply (TSecr)| + * +-------+-------+---------------------+---------------------+ + * 1 1 4 4 + * + * In this case the primary guest's timestamp always different with + * the secondary guest's timestamp. COLO just focus on payload, + * so we just need skip this field. + */ + if (ptcp->th_off > 5) { + ptrdiff_t tcp_offset; + tcp_offset = ppkt->transport_header - (uint8_t *)ppkt->data + + (ptcp->th_off * 4); + res = colo_packet_compare_common(ppkt, spkt, tcp_offset); + } else if (ptcp->th_sum == stcp->th_sum) { res = colo_packet_compare_common(ppkt, spkt, ETH_HLEN); } else { res = -1; } - if (res != 0 && trace_event_get_state(TRACE_COLO_COMPARE_MISCOMPARE)) { - trace_colo_compare_pkt_info_src(inet_ntoa(ppkt->ip->ip_src), - ntohl(stcp->th_seq), - ntohl(stcp->th_ack), - res, stcp->th_flags, - spkt->size); + if (res && trace_event_get_state(TRACE_COLO_COMPARE_MISCOMPARE)) { + char ip_src[20], ip_dst[20]; - trace_colo_compare_pkt_info_dst(inet_ntoa(ppkt->ip->ip_dst), - ntohl(ptcp->th_seq), - ntohl(ptcp->th_ack), - res, ptcp->th_flags, - ppkt->size); + strcpy(ip_src, inet_ntoa(ppkt->ip->ip_src)); + strcpy(ip_dst, inet_ntoa(ppkt->ip->ip_dst)); + + trace_colo_compare_tcp_info(ip_src, + ip_dst, + ntohl(ptcp->th_seq), + ntohl(stcp->th_seq), + ntohl(ptcp->th_ack), + ntohl(stcp->th_ack), + res, + ptcp->th_flags, + stcp->th_flags, + ppkt->size, + spkt->size); qemu_hexdump((char *)ppkt->data, stderr, "colo-compare ppkt", ppkt->size); @@ -372,10 +402,9 @@ static int colo_old_packet_check_one(Packet *pkt, int64_t *check_time) } } -static void colo_old_packet_check_one_conn(void *opaque, - void *user_data) +static int colo_old_packet_check_one_conn(Connection *conn, + void *user_data) { - Connection *conn = opaque; GList *result = NULL; int64_t check_time = REGULAR_PACKET_CHECK_MS; @@ -386,7 +415,10 @@ static void colo_old_packet_check_one_conn(void *opaque, if (result) { /* do checkpoint will flush old packet */ /* TODO: colo_notify_checkpoint();*/ + return 0; } + + return 1; } /* @@ -398,7 +430,12 @@ static void colo_old_packet_check(void *opaque) { CompareState *s = opaque; - g_queue_foreach(&s->conn_list, colo_old_packet_check_one_conn, NULL); + /* + * If we find one old packet, stop finding job and notify + * COLO frame do checkpoint. + */ + g_queue_find_custom(&s->conn_list, NULL, + (GCompareFunc)colo_old_packet_check_one_conn); } /* diff --git a/net/socket.c b/net/socket.c index fe3547b018..b8c931e762 100644 --- a/net/socket.c +++ b/net/socket.c @@ -578,7 +578,7 @@ static int net_socket_connect_init(NetClientState *peer, goto err; } - fd = socket_connect(c->saddr, &local_error, net_socket_connected, c); + fd = socket_connect(c->saddr, net_socket_connected, c, &local_error); if (fd < 0) { goto err; } diff --git a/net/trace-events b/net/trace-events index 35198bc742..123cb28c63 100644 --- a/net/trace-events +++ b/net/trace-events @@ -13,8 +13,7 @@ colo_compare_icmp_miscompare(const char *sta, int size) ": %s = %d" colo_compare_ip_info(int psize, const char *sta, const char *stb, int ssize, const char *stc, const char *std) "ppkt size = %d, ip_src = %s, ip_dst = %s, spkt size = %d, ip_src = %s, ip_dst = %s" colo_old_packet_check_found(int64_t old_time) "%" PRId64 colo_compare_miscompare(void) "" -colo_compare_pkt_info_src(const char *src, uint32_t sseq, uint32_t sack, int res, uint32_t sflag, int ssize) "src/dst: %s s: seq/ack=%u/%u res=%d flags=%x spkt_size: %d\n" -colo_compare_pkt_info_dst(const char *dst, uint32_t dseq, uint32_t dack, int res, uint32_t dflag, int dsize) "src/dst: %s d: seq/ack=%u/%u res=%d flags=%x dpkt_size: %d\n" +colo_compare_tcp_info(const char *src, const char *dst, uint32_t pseq, uint32_t sseq, uint32_t pack, uint32_t sack, int res, uint32_t pflag, uint32_t sflag, int psize, int ssize) "src/dst: %s/%s pseq/sseq:%u/%u pack/sack:%u/%u res=%d pflags/sflag:%x/%x psize/ssize:%d/%d \n" # net/filter-rewriter.c colo_filter_rewriter_debug(void) "" @@ -338,12 +338,12 @@ void parse_numa_opts(MachineClass *mc) if (i == nb_numa_nodes) { uint64_t usedmem = 0; - /* On Linux, each node's border has to be 8MB aligned, - * the final node gets the rest. + /* Align each node according to the alignment + * requirements of the machine class */ for (i = 0; i < nb_numa_nodes - 1; i++) { numa_info[i].node_mem = (ram_size / nb_numa_nodes) & - ~((1 << 23UL) - 1); + ~((1 << mc->numa_mem_align_shift) - 1); usedmem += numa_info[i].node_mem; } numa_info[i].node_mem = ram_size - usedmem; diff --git a/pc-bios/openbios-ppc b/pc-bios/openbios-ppc Binary files differindex cfe0aee7e5..ed9a51e738 100644 --- a/pc-bios/openbios-ppc +++ b/pc-bios/openbios-ppc diff --git a/pc-bios/openbios-sparc32 b/pc-bios/openbios-sparc32 Binary files differindex d6244b9ebd..1cf43dbe1d 100644 --- a/pc-bios/openbios-sparc32 +++ b/pc-bios/openbios-sparc32 diff --git a/pc-bios/openbios-sparc64 b/pc-bios/openbios-sparc64 Binary files differindex 74f67f2724..281d20d604 100644 --- a/pc-bios/openbios-sparc64 +++ b/pc-bios/openbios-sparc64 diff --git a/qapi-schema.json b/qapi-schema.json index 68a43274bf..01b087fa16 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -598,6 +598,9 @@ # @postcopy-requests: The number of page requests received from the destination # (since 2.7) # +# @page-size: The number of bytes per page for the various page-based +# statistics (since 2.10) +# # Since: 0.14.0 ## { 'struct': 'MigrationStats', @@ -605,7 +608,7 @@ 'duplicate': 'int', 'skipped': 'int', 'normal': 'int', 'normal-bytes': 'int', 'dirty-pages-rate' : 'int', 'mbps' : 'number', 'dirty-sync-count' : 'int', - 'postcopy-requests' : 'int' } } + 'postcopy-requests' : 'int', 'page-size' : 'int' } } ## # @XBZRLECacheStats: @@ -4051,19 +4054,27 @@ 'data': [ 'all', 'rx', 'tx' ] } ## -# @InetSocketAddress: -# -# Captures a socket address or address range in the Internet namespace. +# @InetSocketAddressBase: # # @host: host part of the address +# @port: port part of the address +## +{ 'struct': 'InetSocketAddressBase', + 'data': { + 'host': 'str', + 'port': 'str' } } + +## +# @InetSocketAddress: # -# @port: port part of the address, or lowest port if @to is present +# Captures a socket address or address range in the Internet namespace. # # @numeric: true if the host/port are guaranteed to be numeric, # false if name resolution should be attempted. Defaults to false. # (Since 2.9) # -# @to: highest port to try +# @to: If present, this is range of possible addresses, with port +# between @port and @to. # # @ipv4: whether to accept IPv4 addresses, default try both IPv4 and IPv6 # @@ -4072,9 +4083,8 @@ # Since: 1.3 ## { 'struct': 'InetSocketAddress', + 'base': 'InetSocketAddressBase', 'data': { - 'host': 'str', - 'port': 'str', '*numeric': 'bool', '*to': 'uint16', '*ipv4': 'bool', @@ -4137,7 +4147,7 @@ # Since: 2.9 ## { 'enum': 'SocketAddressFlatType', - 'data': [ 'unix', 'inet' ] } + 'data': [ 'inet', 'unix', 'vsock', 'fd' ] } ## # @SocketAddressFlat: @@ -4146,22 +4156,19 @@ # # @type: Transport type # -# This is similar to SocketAddress, only distinction: -# -# 1. SocketAddressFlat is a flat union, SocketAddress is a simple union. -# A flat union is nicer than simple because it avoids nesting -# (i.e. more {}) on the wire. -# -# 2. SocketAddressFlat supports only types 'unix' and 'inet', because -# that's what its current users need. +# This is just like SocketAddress, except it's a flat union rather +# than a simple union. Nicer because it avoids nesting on the wire, +# i.e. this form has fewer {}. # # Since: 2.9 ## { 'union': 'SocketAddressFlat', 'base': { 'type': 'SocketAddressFlatType' }, 'discriminator': 'type', - 'data': { 'unix': 'UnixSocketAddress', - 'inet': 'InetSocketAddress' } } + 'data': { 'inet': 'InetSocketAddress', + 'unix': 'UnixSocketAddress', + 'vsock': 'VsockSocketAddress', + 'fd': 'String' } } ## # @getfd: diff --git a/qapi/block-core.json b/qapi/block-core.json index 0f132fc995..87fb747ab6 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -2053,7 +2053,7 @@ # @ignore: Ignore the request # @unmap: Forward as an unmap request # -# Since: 1.7 +# Since: 2.9 ## { 'enum': 'BlockdevDiscardOptions', 'data': [ 'ignore', 'unmap' ] } @@ -2082,7 +2082,7 @@ # @threads: Use qemu's thread pool # @native: Use native AIO backend (only Linux and Windows) # -# Since: 1.7 +# Since: 2.9 ## { 'enum': 'BlockdevAioOptions', 'data': [ 'threads', 'native' ] } @@ -2097,7 +2097,7 @@ # @no-flush: ignore any flush requests for the device (default: # false) # -# Since: 1.7 +# Since: 2.9 ## { 'struct': 'BlockdevCacheOptions', 'data': { '*direct': 'bool', @@ -2108,18 +2108,9 @@ # # Drivers that are supported in block device operations. # -# @host_device: Since 2.1 -# @host_cdrom: Since 2.1 -# @gluster: Since 2.7 -# @nbd: Since 2.8 -# @nfs: Since 2.8 -# @replication: Since 2.8 -# @ssh: Since 2.8 -# @iscsi: Since 2.9 -# @rbd: Since 2.9 -# @sheepdog: Since 2.9 +# @vxhs: Since 2.10 # -# Since: 2.0 +# Since: 2.9 ## { 'enum': 'BlockdevDriver', 'data': [ 'blkdebug', 'blkverify', 'bochs', 'cloop', @@ -2127,7 +2118,7 @@ 'host_device', 'http', 'https', 'iscsi', 'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'parallels', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd', 'replication', 'sheepdog', 'ssh', - 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] } + 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat', 'vxhs' ] } ## # @BlockdevOptionsFile: @@ -2137,7 +2128,7 @@ # @filename: path to the image file # @aio: AIO backend (default: threads) (since: 2.8) # -# Since: 1.7 +# Since: 2.9 ## { 'struct': 'BlockdevOptionsFile', 'data': { 'filename': 'str', @@ -2153,7 +2144,7 @@ # requests. Default to zero which completes requests immediately. # (Since 2.4) # -# Since: 2.2 +# Since: 2.9 ## { 'struct': 'BlockdevOptionsNull', 'data': { '*size': 'int', '*latency-ns': 'uint64' } } @@ -2173,7 +2164,7 @@ # (since 2.4) # @rw: whether to allow write operations (default: false) # -# Since: 1.7 +# Since: 2.9 ## { 'struct': 'BlockdevOptionsVVFAT', 'data': { 'dir': 'str', '*fat-type': 'int', '*floppy': 'bool', @@ -2187,7 +2178,7 @@ # # @file: reference to or definition of the data source block device # -# Since: 1.7 +# Since: 2.9 ## { 'struct': 'BlockdevOptionsGenericFormat', 'data': { 'file': 'BlockdevRef' } } @@ -2201,7 +2192,7 @@ # the decryption key (since 2.6). Mandatory except when # doing a metadata-only probe of the image. # -# Since: 2.6 +# Since: 2.9 ## { 'struct': 'BlockdevOptionsLUKS', 'base': 'BlockdevOptionsGenericFormat', @@ -2219,7 +2210,7 @@ # allowed to pass an empty string here in order to disable the # default backing file. # -# Since: 1.7 +# Since: 2.9 ## { 'struct': 'BlockdevOptionsGenericCOWFormat', 'base': 'BlockdevOptionsGenericFormat', @@ -2240,7 +2231,7 @@ # # @all: Perform all available overlap checks # -# Since: 2.2 +# Since: 2.9 ## { 'enum': 'Qcow2OverlapCheckMode', 'data': [ 'none', 'constant', 'cached', 'all' ] } @@ -2255,7 +2246,7 @@ # @template: Specifies a template mode which can be adjusted using the other # flags, defaults to 'cached' # -# Since: 2.2 +# Since: 2.9 ## { 'struct': 'Qcow2OverlapCheckFlags', 'data': { '*template': 'Qcow2OverlapCheckMode', @@ -2279,7 +2270,7 @@ # # @mode: named mode which chooses a specific set of flags # -# Since: 2.2 +# Since: 2.9 ## { 'alternate': 'Qcow2OverlapChecks', 'data': { 'flags': 'Qcow2OverlapCheckFlags', @@ -2320,7 +2311,7 @@ # caches. The interval is in seconds. The default value # is 0 and it disables this feature (since 2.5) # -# Since: 1.7 +# Since: 2.9 ## { 'struct': 'BlockdevOptionsQcow2', 'base': 'BlockdevOptionsGenericCOWFormat', @@ -2347,7 +2338,7 @@ # # TODO: Expose the host_key_check option in QMP # -# Since: 2.8 +# Since: 2.9 ## { 'struct': 'BlockdevOptionsSsh', 'data': { 'server': 'InetSocketAddress', @@ -2360,7 +2351,7 @@ # # Trigger events supported by blkdebug. # -# Since: 2.0 +# Since: 2.9 ## { 'enum': 'BlkdebugEvent', 'prefix': 'BLKDBG', 'data': [ 'l1_update', 'l1_grow_alloc_table', 'l1_grow_write_table', @@ -2400,7 +2391,7 @@ # # @immediately: fail immediately; defaults to false # -# Since: 2.0 +# Since: 2.9 ## { 'struct': 'BlkdebugInjectErrorOptions', 'data': { 'event': 'BlkdebugEvent', @@ -2423,7 +2414,7 @@ # @new_state: the state identifier blkdebug is supposed to assume if # this event is triggered # -# Since: 2.0 +# Since: 2.9 ## { 'struct': 'BlkdebugSetStateOptions', 'data': { 'event': 'BlkdebugEvent', @@ -2446,7 +2437,7 @@ # # @set-state: array of state-change descriptions # -# Since: 2.0 +# Since: 2.9 ## { 'struct': 'BlockdevOptionsBlkdebug', 'data': { 'image': 'BlockdevRef', @@ -2464,7 +2455,7 @@ # # @raw: raw image used for verification # -# Since: 2.0 +# Since: 2.9 ## { 'struct': 'BlockdevOptionsBlkverify', 'data': { 'test': 'BlockdevRef', @@ -2479,7 +2470,7 @@ # # @fifo: read only from the first child that has not failed # -# Since: 2.2 +# Since: 2.9 ## { 'enum': 'QuorumReadPattern', 'data': [ 'quorum', 'fifo' ] } @@ -2501,7 +2492,7 @@ # @read-pattern: choose read pattern and set to quorum by default # (Since 2.2) # -# Since: 2.0 +# Since: 2.9 ## { 'struct': 'BlockdevOptionsQuorum', 'data': { '*blkverify': 'bool', @@ -2526,7 +2517,7 @@ # # @logfile: libgfapi log file (default /dev/stderr) (Since 2.8) # -# Since: 2.7 +# Since: 2.9 ## { 'struct': 'BlockdevOptionsGluster', 'data': { 'volume': 'str', @@ -2601,27 +2592,6 @@ ## -# @RbdAuthSupport: -# -# An enumeration of RBD auth support -# -# Since: 2.9 -## -{ 'enum': 'RbdAuthSupport', - 'data': [ 'cephx', 'none' ] } - - -## -# @RbdAuthMethod: -# -# An enumeration of rados auth_supported types -# -# Since: 2.9 -## -{ 'struct': 'RbdAuthMethod', - 'data': { 'auth': 'RbdAuthSupport' } } - -## # @BlockdevOptionsRbd: # # @pool: Ceph pool name. @@ -2639,11 +2609,6 @@ # @server: Monitor host address and port. This maps # to the "mon_host" Ceph option. # -# @auth-supported: Authentication supported. -# -# @password-secret: The ID of a QCryptoSecret object providing -# the password for the login. -# # Since: 2.9 ## { 'struct': 'BlockdevOptionsRbd', @@ -2652,9 +2617,7 @@ '*conf': 'str', '*snapshot': 'str', '*user': 'str', - '*server': ['InetSocketAddress'], - '*auth-supported': ['RbdAuthMethod'], - '*password-secret': 'str' } } + '*server': ['InetSocketAddressBase'] } } ## # @BlockdevOptionsSheepdog: @@ -2662,7 +2625,7 @@ # Driver specific block device options for sheepdog # # @vdi: Virtual disk image name -# @addr: The Sheepdog server to connect to +# @server: The Sheepdog server to connect to # @snap-id: Snapshot ID # @tag: Snapshot tag name # @@ -2671,7 +2634,7 @@ # Since: 2.9 ## { 'struct': 'BlockdevOptionsSheepdog', - 'data': { 'addr': 'SocketAddressFlat', + 'data': { 'server': 'SocketAddressFlat', 'vdi': 'str', '*snap-id': 'uint32', '*tag': 'str' } } @@ -2685,7 +2648,7 @@ # # @secondary: Secondary mode, receive the vm's state from primary QEMU. # -# Since: 2.8 +# Since: 2.9 ## { 'enum' : 'ReplicationMode', 'data' : [ 'primary', 'secondary' ] } @@ -2700,7 +2663,7 @@ # node who owns the replication node chain. Must not be given in # primary mode. # -# Since: 2.8 +# Since: 2.9 ## { 'struct': 'BlockdevOptionsReplication', 'base': 'BlockdevOptionsGenericFormat', @@ -2714,7 +2677,7 @@ # # @inet: TCP transport # -# Since: 2.8 +# Since: 2.9 ## { 'enum': 'NFSTransport', 'data': [ 'inet' ] } @@ -2728,7 +2691,7 @@ # # @host: host address for NFS server # -# Since: 2.8 +# Since: 2.9 ## { 'struct': 'NFSServer', 'data': { 'type': 'NFSTransport', @@ -2763,7 +2726,7 @@ # @debug: set the NFS debug level (max 2) (defaults # to libnfs default) # -# Since: 2.8 +# Since: 2.9 ## { 'struct': 'BlockdevOptionsNfs', 'data': { 'server': 'NFSServer', @@ -2776,16 +2739,101 @@ '*debug': 'int' } } ## -# @BlockdevOptionsCurl: +# @BlockdevOptionsCurlBase: # -# Driver specific block device options for the curl backend. +# Driver specific block device options shared by all protocols supported by the +# curl backend. # -# @filename: path to the image file +# @url: URL of the image file # -# Since: 1.7 +# @readahead: Size of the read-ahead cache; must be a multiple of +# 512 (defaults to 256 kB) +# +# @timeout: Timeout for connections, in seconds (defaults to 5) +# +# @username: Username for authentication (defaults to none) +# +# @password-secret: ID of a QCryptoSecret object providing a password +# for authentication (defaults to no password) +# +# @proxy-username: Username for proxy authentication (defaults to none) +# +# @proxy-password-secret: ID of a QCryptoSecret object providing a password +# for proxy authentication (defaults to no password) +# +# Since: 2.9 ## -{ 'struct': 'BlockdevOptionsCurl', - 'data': { 'filename': 'str' } } +{ 'struct': 'BlockdevOptionsCurlBase', + 'data': { 'url': 'str', + '*readahead': 'int', + '*timeout': 'int', + '*username': 'str', + '*password-secret': 'str', + '*proxy-username': 'str', + '*proxy-password-secret': 'str' } } + +## +# @BlockdevOptionsCurlHttp: +# +# Driver specific block device options for HTTP connections over the curl +# backend. URLs must start with "http://". +# +# @cookie: List of cookies to set; format is +# "name1=content1; name2=content2;" as explained by +# CURLOPT_COOKIE(3). Defaults to no cookies. +# +# Since: 2.9 +## +{ 'struct': 'BlockdevOptionsCurlHttp', + 'base': 'BlockdevOptionsCurlBase', + 'data': { '*cookie': 'str' } } + +## +# @BlockdevOptionsCurlHttps: +# +# Driver specific block device options for HTTPS connections over the curl +# backend. URLs must start with "https://". +# +# @cookie: List of cookies to set; format is +# "name1=content1; name2=content2;" as explained by +# CURLOPT_COOKIE(3). Defaults to no cookies. +# +# @sslverify: Whether to verify the SSL certificate's validity (defaults to +# true) +# +# Since: 2.9 +## +{ 'struct': 'BlockdevOptionsCurlHttps', + 'base': 'BlockdevOptionsCurlBase', + 'data': { '*cookie': 'str', + '*sslverify': 'bool' } } + +## +# @BlockdevOptionsCurlFtp: +# +# Driver specific block device options for FTP connections over the curl +# backend. URLs must start with "ftp://". +# +# Since: 2.9 +## +{ 'struct': 'BlockdevOptionsCurlFtp', + 'base': 'BlockdevOptionsCurlBase', + 'data': { } } + +## +# @BlockdevOptionsCurlFtps: +# +# Driver specific block device options for FTPS connections over the curl +# backend. URLs must start with "ftps://". +# +# @sslverify: Whether to verify the SSL certificate's validity (defaults to +# true) +# +# Since: 2.9 +## +{ 'struct': 'BlockdevOptionsCurlFtps', + 'base': 'BlockdevOptionsCurlBase', + 'data': { '*sslverify': 'bool' } } ## # @BlockdevOptionsNbd: @@ -2798,10 +2846,10 @@ # # @tls-creds: TLS credentials ID # -# Since: 2.8 +# Since: 2.9 ## { 'struct': 'BlockdevOptionsNbd', - 'data': { 'server': 'SocketAddress', + 'data': { 'server': 'SocketAddressFlat', '*export': 'str', '*tls-creds': 'str' } } @@ -2813,13 +2861,29 @@ # @offset: position where the block device starts # @size: the assumed size of the device # -# Since: 2.8 +# Since: 2.9 ## { 'struct': 'BlockdevOptionsRaw', 'base': 'BlockdevOptionsGenericFormat', 'data': { '*offset': 'int', '*size': 'int' } } ## +# @BlockdevOptionsVxHS: +# +# Driver specific block device options for VxHS +# +# @vdisk-id: UUID of VxHS volume +# @server: vxhs server IP, port +# @tls-creds: TLS credentials ID +# +# Since: 2.10 +## +{ 'struct': 'BlockdevOptionsVxHS', + 'data': { 'vdisk-id': 'str', + 'server': 'InetSocketAddressBase', + '*tls-creds': 'str' } } + +## # @BlockdevOptions: # # Options for creating a block device. Many options are available for all @@ -2837,7 +2901,7 @@ # # Remaining options are determined by the block driver. # -# Since: 1.7 +# Since: 2.9 ## { 'union': 'BlockdevOptions', 'base': { 'driver': 'BlockdevDriver', @@ -2854,13 +2918,13 @@ 'cloop': 'BlockdevOptionsGenericFormat', 'dmg': 'BlockdevOptionsGenericFormat', 'file': 'BlockdevOptionsFile', - 'ftp': 'BlockdevOptionsCurl', - 'ftps': 'BlockdevOptionsCurl', + 'ftp': 'BlockdevOptionsCurlFtp', + 'ftps': 'BlockdevOptionsCurlFtps', 'gluster': 'BlockdevOptionsGluster', 'host_cdrom': 'BlockdevOptionsFile', 'host_device':'BlockdevOptionsFile', - 'http': 'BlockdevOptionsCurl', - 'https': 'BlockdevOptionsCurl', + 'http': 'BlockdevOptionsCurlHttp', + 'https': 'BlockdevOptionsCurlHttps', 'iscsi': 'BlockdevOptionsIscsi', 'luks': 'BlockdevOptionsLUKS', 'nbd': 'BlockdevOptionsNbd', @@ -2881,7 +2945,8 @@ 'vhdx': 'BlockdevOptionsGenericFormat', 'vmdk': 'BlockdevOptionsGenericCOWFormat', 'vpc': 'BlockdevOptionsGenericFormat', - 'vvfat': 'BlockdevOptionsVVFAT' + 'vvfat': 'BlockdevOptionsVVFAT', + 'vxhs': 'BlockdevOptionsVxHS' } } ## @@ -2894,7 +2959,7 @@ # empty string means that no block device should be # referenced. # -# Since: 1.7 +# Since: 2.9 ## { 'alternate': 'BlockdevRef', 'data': { 'definition': 'BlockdevOptions', @@ -2907,11 +2972,7 @@ # BlockBackend will be created; otherwise, @node-name is mandatory at the top # level and no BlockBackend will be created. # -# Note: This command is still a work in progress. It doesn't support all -# block drivers among other things. Stay away from it unless you want -# to help with its development. -# -# Since: 1.7 +# Since: 2.9 # # Example: # @@ -2957,7 +3018,7 @@ { 'command': 'blockdev-add', 'data': 'BlockdevOptions', 'boxed': true } ## -# @x-blockdev-del: +# @blockdev-del: # # Deletes a block device that has been added using blockdev-add. # The command will fail if the node is attached to a device or is @@ -2965,11 +3026,7 @@ # # @node-name: Name of the graph node to delete. # -# Note: This command is still a work in progress and is considered -# experimental. Stay away from it unless you want to help with its -# development. -# -# Since: 2.5 +# Since: 2.9 # # Example: # @@ -2985,13 +3042,13 @@ # } # <- { "return": {} } # -# -> { "execute": "x-blockdev-del", +# -> { "execute": "blockdev-del", # "arguments": { "node-name": "node0" } # } # <- { "return": {} } # ## -{ 'command': 'x-blockdev-del', 'data': { 'node-name': 'str' } } +{ 'command': 'blockdev-del', 'data': { 'node-name': 'str' } } ## # @blockdev-open-tray: diff --git a/qapi/opts-visitor.c b/qapi/opts-visitor.c index 026d25b767..324b197495 100644 --- a/qapi/opts-visitor.c +++ b/qapi/opts-visitor.c @@ -164,7 +164,7 @@ opts_check_struct(Visitor *v, Error **errp) GHashTableIter iter; GQueue *any; - if (ov->depth > 0) { + if (ov->depth > 1) { return; } @@ -276,8 +276,8 @@ static void opts_check_list(Visitor *v, Error **errp) { /* - * FIXME should set error when unvisited elements remain. Mostly - * harmless, as the generated visits always visit all elements. + * Unvisited list elements will be reported later when checking + * whether unvisited struct members remain. */ } diff --git a/qapi/string-input-visitor.c b/qapi/string-input-visitor.c index 806b01ae3a..c089491c24 100644 --- a/qapi/string-input-visitor.c +++ b/qapi/string-input-visitor.c @@ -54,6 +54,10 @@ static int parse_str(StringInputVisitor *siv, const char *name, Error **errp) return 0; } + if (!*str) { + return 0; + } + do { errno = 0; start = strtoll(str, &endptr, 0); diff --git a/qdev-monitor.c b/qdev-monitor.c index 5f2fcdfc45..e61d596ef7 100644 --- a/qdev-monitor.c +++ b/qdev-monitor.c @@ -29,6 +29,7 @@ #include "qemu/error-report.h" #include "qemu/help_option.h" #include "sysemu/block-backend.h" +#include "migration/migration.h" /* * Aliases were a bad idea from the start. Let's keep them @@ -603,6 +604,11 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp) return NULL; } + if (!migration_is_idle()) { + error_setg(errp, "device_add not allowed while migrating"); + return NULL; + } + /* create device */ dev = DEVICE(object_new(driver)); @@ -836,6 +842,45 @@ static DeviceState *find_device_state(const char *id, Error **errp) return DEVICE(obj); } +void qdev_unplug(DeviceState *dev, Error **errp) +{ + DeviceClass *dc = DEVICE_GET_CLASS(dev); + HotplugHandler *hotplug_ctrl; + HotplugHandlerClass *hdc; + + if (dev->parent_bus && !qbus_is_hotpluggable(dev->parent_bus)) { + error_setg(errp, QERR_BUS_NO_HOTPLUG, dev->parent_bus->name); + return; + } + + if (!dc->hotpluggable) { + error_setg(errp, QERR_DEVICE_NO_HOTPLUG, + object_get_typename(OBJECT(dev))); + return; + } + + if (!migration_is_idle()) { + error_setg(errp, "device_del not allowed while migrating"); + return; + } + + qdev_hot_removed = true; + + hotplug_ctrl = qdev_get_hotplug_handler(dev); + /* hotpluggable device MUST have HotplugHandler, if it doesn't + * then something is very wrong with it */ + g_assert(hotplug_ctrl); + + /* If device supports async unplug just request it to be done, + * otherwise just remove it synchronously */ + hdc = HOTPLUG_HANDLER_GET_CLASS(hotplug_ctrl); + if (hdc->unplug_request) { + hotplug_handler_unplug_request(hotplug_ctrl, dev, errp); + } else { + hotplug_handler_unplug(hotplug_ctrl, dev, errp); + } +} + void qmp_device_del(const char *id, Error **errp) { DeviceState *dev = find_device_state(id, errp); diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx index 9c9702cc62..8ac78222af 100644 --- a/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx @@ -22,9 +22,9 @@ STEXI ETEXI DEF("create", img_create, - "create [-q] [--object objectdef] [--image-opts] [-f fmt] [-o options] filename [size]") + "create [-q] [--object objectdef] [-f fmt] [-o options] filename [size]") STEXI -@item create [--object @var{objectdef}] [--image-opts] [-q] [-f @var{fmt}] [-o @var{options}] @var{filename} [@var{size}] +@item create [--object @var{objectdef}] [-q] [-f @var{fmt}] [-o @var{options}] @var{filename} [@var{size}] ETEXI DEF("commit", img_commit, diff --git a/qemu-img.c b/qemu-img.c index 98b836b030..bbe15741f1 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -88,6 +88,16 @@ static void QEMU_NORETURN GCC_FMT_ATTR(1, 2) error_exit(const char *fmt, ...) exit(EXIT_FAILURE); } +static void QEMU_NORETURN missing_argument(const char *option) +{ + error_exit("missing argument for option '%s'", option); +} + +static void QEMU_NORETURN unrecognized_option(const char *option) +{ + error_exit("unrecognized option '%s'", option); +} + /* Please keep in synch with qemu-img.texi */ static void QEMU_NORETURN help(void) { @@ -406,13 +416,18 @@ static int img_create(int argc, char **argv) {"object", required_argument, 0, OPTION_OBJECT}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, "F:b:f:he6o:q", + c = getopt_long(argc, argv, ":F:b:f:he6o:q", long_options, NULL); if (c == -1) { break; } switch(c) { + case ':': + missing_argument(argv[optind - 1]); + break; case '?': + unrecognized_option(argv[optind - 1]); + break; case 'h': help(); break; @@ -501,7 +516,7 @@ static int img_create(int argc, char **argv) } bdrv_img_create(filename, fmt, base_filename, base_fmt, - options, img_size, 0, &local_err, quiet); + options, img_size, 0, quiet, &local_err); if (local_err) { error_reportf_err(local_err, "%s: ", filename); goto fail; @@ -651,13 +666,18 @@ static int img_check(int argc, char **argv) {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, "hf:r:T:q", + c = getopt_long(argc, argv, ":hf:r:T:q", long_options, &option_index); if (c == -1) { break; } switch(c) { + case ':': + missing_argument(argv[optind - 1]); + break; case '?': + unrecognized_option(argv[optind - 1]); + break; case 'h': help(); break; @@ -855,13 +875,18 @@ static int img_commit(int argc, char **argv) {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, "f:ht:b:dpq", + c = getopt_long(argc, argv, ":f:ht:b:dpq", long_options, NULL); if (c == -1) { break; } switch(c) { + case ':': + missing_argument(argv[optind - 1]); + break; case '?': + unrecognized_option(argv[optind - 1]); + break; case 'h': help(); break; @@ -959,7 +984,7 @@ static int img_commit(int argc, char **argv) aio_context_acquire(aio_context); commit_active_start("commit", bs, base_bs, BLOCK_JOB_DEFAULT, 0, BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb, - &cbi, &local_err, false); + &cbi, false, &local_err); aio_context_release(aio_context); if (local_err) { goto done; @@ -1190,13 +1215,18 @@ static int img_compare(int argc, char **argv) {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, "hf:F:T:pqs", + c = getopt_long(argc, argv, ":hf:F:T:pqs", long_options, NULL); if (c == -1) { break; } switch (c) { + case ':': + missing_argument(argv[optind - 1]); + break; case '?': + unrecognized_option(argv[optind - 1]); + break; case 'h': help(); break; @@ -1926,13 +1956,18 @@ static int img_convert(int argc, char **argv) {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, "hf:O:B:ce6o:s:l:S:pt:T:qnm:W", + c = getopt_long(argc, argv, ":hf:O:B:ce6o:s:l:S:pt:T:qnm:W", long_options, NULL); if (c == -1) { break; } switch(c) { + case ':': + missing_argument(argv[optind - 1]); + break; case '?': + unrecognized_option(argv[optind - 1]); + break; case 'h': help(); break; @@ -2502,13 +2537,18 @@ static int img_info(int argc, char **argv) {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, "f:h", + c = getopt_long(argc, argv, ":f:h", long_options, &option_index); if (c == -1) { break; } switch(c) { + case ':': + missing_argument(argv[optind - 1]); + break; case '?': + unrecognized_option(argv[optind - 1]); + break; case 'h': help(); break; @@ -2713,13 +2753,18 @@ static int img_map(int argc, char **argv) {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, "f:h", + c = getopt_long(argc, argv, ":f:h", long_options, &option_index); if (c == -1) { break; } switch (c) { + case ':': + missing_argument(argv[optind - 1]); + break; case '?': + unrecognized_option(argv[optind - 1]); + break; case 'h': help(); break; @@ -2835,13 +2880,18 @@ static int img_snapshot(int argc, char **argv) {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, "la:c:d:hq", + c = getopt_long(argc, argv, ":la:c:d:hq", long_options, NULL); if (c == -1) { break; } switch(c) { + case ':': + missing_argument(argv[optind - 1]); + break; case '?': + unrecognized_option(argv[optind - 1]); + break; case 'h': help(); return 0; @@ -2988,13 +3038,18 @@ static int img_rebase(int argc, char **argv) {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, "hf:F:b:upt:T:q", + c = getopt_long(argc, argv, ":hf:F:b:upt:T:q", long_options, NULL); if (c == -1) { break; } switch(c) { + case ':': + missing_argument(argv[optind - 1]); + break; case '?': + unrecognized_option(argv[optind - 1]); + break; case 'h': help(); return 0; @@ -3355,13 +3410,18 @@ static int img_resize(int argc, char **argv) {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, "f:hq", + c = getopt_long(argc, argv, ":f:hq", long_options, NULL); if (c == -1) { break; } switch(c) { + case ':': + missing_argument(argv[optind - 1]); + break; case '?': + unrecognized_option(argv[optind - 1]); + break; case 'h': help(); break; @@ -3493,54 +3553,59 @@ static int img_amend(int argc, char **argv) {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, "ho:f:t:pq", + c = getopt_long(argc, argv, ":ho:f:t:pq", long_options, NULL); if (c == -1) { break; } switch (c) { - case 'h': - case '?': - help(); - break; - case 'o': - if (!is_valid_option_list(optarg)) { - error_report("Invalid option list: %s", optarg); - ret = -1; - goto out_no_progress; - } - if (!options) { - options = g_strdup(optarg); - } else { - char *old_options = options; - options = g_strdup_printf("%s,%s", options, optarg); - g_free(old_options); - } - break; - case 'f': - fmt = optarg; - break; - case 't': - cache = optarg; - break; - case 'p': - progress = true; - break; - case 'q': - quiet = true; - break; - case OPTION_OBJECT: - opts = qemu_opts_parse_noisily(&qemu_object_opts, - optarg, true); - if (!opts) { - ret = -1; - goto out_no_progress; - } - break; - case OPTION_IMAGE_OPTS: - image_opts = true; - break; + case ':': + missing_argument(argv[optind - 1]); + break; + case '?': + unrecognized_option(argv[optind - 1]); + break; + case 'h': + help(); + break; + case 'o': + if (!is_valid_option_list(optarg)) { + error_report("Invalid option list: %s", optarg); + ret = -1; + goto out_no_progress; + } + if (!options) { + options = g_strdup(optarg); + } else { + char *old_options = options; + options = g_strdup_printf("%s,%s", options, optarg); + g_free(old_options); + } + break; + case 'f': + fmt = optarg; + break; + case 't': + cache = optarg; + break; + case 'p': + progress = true; + break; + case 'q': + quiet = true; + break; + case OPTION_OBJECT: + opts = qemu_opts_parse_noisily(&qemu_object_opts, + optarg, true); + if (!opts) { + ret = -1; + goto out_no_progress; + } + break; + case OPTION_IMAGE_OPTS: + image_opts = true; + break; } } @@ -3759,14 +3824,19 @@ static int img_bench(int argc, char **argv) {"no-drain", no_argument, 0, OPTION_NO_DRAIN}, {0, 0, 0, 0} }; - c = getopt_long(argc, argv, "hc:d:f:no:qs:S:t:w", long_options, NULL); + c = getopt_long(argc, argv, ":hc:d:f:no:qs:S:t:w", long_options, NULL); if (c == -1) { break; } switch (c) { - case 'h': + case ':': + missing_argument(argv[optind - 1]); + break; case '?': + unrecognized_option(argv[optind - 1]); + break; + case 'h': help(); break; case 'c': @@ -4093,7 +4163,7 @@ static int img_dd(int argc, char **argv) { 0, 0, 0, 0 } }; - while ((c = getopt_long(argc, argv, "hf:O:", long_options, NULL))) { + while ((c = getopt_long(argc, argv, ":hf:O:", long_options, NULL))) { if (c == EOF) { break; } @@ -4104,10 +4174,12 @@ static int img_dd(int argc, char **argv) case 'f': fmt = optarg; break; + case ':': + missing_argument(argv[optind - 1]); + break; case '?': - error_report("Try 'qemu-img --help' for more information."); - ret = -1; - goto out; + unrecognized_option(argv[optind - 1]); + break; case 'h': help(); break; @@ -4336,8 +4408,14 @@ int main(int argc, char **argv) qemu_add_opts(&qemu_source_opts); qemu_add_opts(&qemu_trace_opts); - while ((c = getopt_long(argc, argv, "+hVT:", long_options, NULL)) != -1) { + while ((c = getopt_long(argc, argv, "+:hVT:", long_options, NULL)) != -1) { switch (c) { + case ':': + missing_argument(argv[optind - 1]); + return 0; + case '?': + unrecognized_option(argv[optind - 1]); + return 0; case 'h': help(); return 0; diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 2c48f9ce1a..312fc6d157 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -35,6 +35,13 @@ static int compare_cmdname(const void *a, const void *b) void qemuio_add_command(const cmdinfo_t *ci) { + /* ci->perm assumes a file is open, but the GLOBAL and NOFILE_OK + * flags allow it not to be, so that combination is invalid. + * Catch it now rather than letting it manifest as a crash if a + * particular set of command line options are used. + */ + assert(ci->perm == 0 || + (ci->flags & (CMD_FLAG_GLOBAL | CMD_NOFILE_OK)) == 0); cmdtab = g_renew(cmdinfo_t, cmdtab, ++ncmds); cmdtab[ncmds - 1] = *ci; qsort(cmdtab, ncmds, sizeof(*cmdtab), compare_cmdname); @@ -514,7 +521,7 @@ static int do_co_pwrite_zeroes(BlockBackend *blk, int64_t offset, } co = qemu_coroutine_create(co_pwrite_zeroes_entry, &data); - qemu_coroutine_enter(co); + bdrv_coroutine_enter(blk_bs(blk), co); while (!data.done) { aio_poll(blk_get_aio_context(blk), true); } diff --git a/qemu-options.hx b/qemu-options.hx index 2043371260..b9a2463919 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -635,6 +635,30 @@ file sectors into the image file. conversion of plain zero writes by the OS to driver specific optimized zero write commands. You may even choose "unmap" if @var{discard} is set to "unmap" to allow a zero write to be converted to an UNMAP operation. +@item bps=@var{b},bps_rd=@var{r},bps_wr=@var{w} +Specify bandwidth throttling limits in bytes per second, either for all request +types or for reads or writes only. Small values can lead to timeouts or hangs +inside the guest. A safe minimum for disks is 2 MB/s. +@item bps_max=@var{bm},bps_rd_max=@var{rm},bps_wr_max=@var{wm} +Specify bursts in bytes per second, either for all request types or for reads +or writes only. Bursts allow the guest I/O to spike above the limit +temporarily. +@item iops=@var{i},iops_rd=@var{r},iops_wr=@var{w} +Specify request rate limits in requests per second, either for all request +types or for reads or writes only. +@item iops_max=@var{bm},iops_rd_max=@var{rm},iops_wr_max=@var{wm} +Specify bursts in requests per second, either for all request types or for reads +or writes only. Bursts allow the guest I/O to spike above the limit +temporarily. +@item iops_size=@var{is} +Let every @var{is} bytes of a request count as a new request for iops +throttling purposes. Use this option to prevent guests from circumventing iops +limits by sending fewer but larger requests. +@item group=@var{g} +Join a throttling quota group with given name @var{g}. All drives that are +members of the same group are accounted for together. Use this option to +prevent guests from circumventing throttling limits by using many small disks +instead of a single larger disk. @end table By default, the @option{cache=writeback} mode is used. It will report data diff --git a/qga/commands-posix.c b/qga/commands-posix.c index 73d93eb5ce..915df9ed90 100644 --- a/qga/commands-posix.c +++ b/qga/commands-posix.c @@ -999,7 +999,9 @@ static void build_guest_fsinfo_for_virtual_device(char const *syspath, dirpath = g_strdup_printf("%s/slaves", syspath); dir = opendir(dirpath); if (!dir) { - error_setg_errno(errp, errno, "opendir(\"%s\")", dirpath); + if (errno != ENOENT) { + error_setg_errno(errp, errno, "opendir(\"%s\")", dirpath); + } g_free(dirpath); return; } diff --git a/qga/commands-win32.c b/qga/commands-win32.c index 19d72b2411..04026eedbf 100644 --- a/qga/commands-win32.c +++ b/qga/commands-win32.c @@ -768,7 +768,7 @@ int64_t qmp_guest_fsfreeze_freeze(Error **errp) /* cannot risk guest agent blocking itself on a write in this state */ ga_set_frozen(ga_state); - qga_vss_fsfreeze(&i, &local_err, true); + qga_vss_fsfreeze(&i, true, &local_err); if (local_err) { error_propagate(errp, local_err); goto error; @@ -807,7 +807,7 @@ int64_t qmp_guest_fsfreeze_thaw(Error **errp) return 0; } - qga_vss_fsfreeze(&i, errp, false); + qga_vss_fsfreeze(&i, false, errp); ga_unset_frozen(ga_state); return i; diff --git a/qga/vss-win32.c b/qga/vss-win32.c index 9a0e46356a..a80933c98b 100644 --- a/qga/vss-win32.c +++ b/qga/vss-win32.c @@ -145,7 +145,7 @@ void ga_uninstall_vss_provider(void) } /* Call VSS requester and freeze/thaw filesystems and applications */ -void qga_vss_fsfreeze(int *nr_volume, Error **errp, bool freeze) +void qga_vss_fsfreeze(int *nr_volume, bool freeze, Error **errp) { const char *func_name = freeze ? "requester_freeze" : "requester_thaw"; QGAVSSRequesterFunc func; diff --git a/qga/vss-win32.h b/qga/vss-win32.h index 4d1d15081e..51d303a8f6 100644 --- a/qga/vss-win32.h +++ b/qga/vss-win32.h @@ -21,6 +21,6 @@ bool vss_initialized(void); int ga_install_vss_provider(void); void ga_uninstall_vss_provider(void); -void qga_vss_fsfreeze(int *nr_volume, Error **errp, bool freeze); +void qga_vss_fsfreeze(int *nr_volume, bool freeze, Error **errp); #endif @@ -207,6 +207,12 @@ void qmp_cont(Error **errp) } } + blk_resume_after_migration(&local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + if (runstate_check(RUN_STATE_INMIGRATE)) { autostart = 1; } else { @@ -542,11 +548,6 @@ DevicePropertyInfoList *qmp_device_list_properties(const char *typename, return NULL; } - if (DEVICE_CLASS(klass)->cannot_destroy_with_object_finalize_yet) { - error_setg(errp, "Can't list properties of device '%s'", typename); - return NULL; - } - obj = object_new(typename); object_property_iter_init(&iter, obj); diff --git a/qom/object_interfaces.c b/qom/object_interfaces.c index 03a95c3276..d4253a88de 100644 --- a/qom/object_interfaces.c +++ b/qom/object_interfaces.c @@ -114,7 +114,7 @@ Object *user_creatable_add_opts(QemuOpts *opts, Error **errp) QDict *pdict; Object *obj; const char *id = qemu_opts_id(opts); - const char *type = qemu_opt_get(opts, "qom-type"); + char *type = qemu_opt_get_del(opts, "qom-type"); if (!type) { error_setg(errp, QERR_MISSING_PARAMETER, "qom-type"); @@ -122,17 +122,21 @@ Object *user_creatable_add_opts(QemuOpts *opts, Error **errp) } if (!id) { error_setg(errp, QERR_MISSING_PARAMETER, "id"); + qemu_opt_set(opts, "qom-type", type, &error_abort); + g_free(type); return NULL; } + qemu_opts_set_id(opts, NULL); pdict = qemu_opts_to_qdict(opts, NULL); - qdict_del(pdict, "qom-type"); - qdict_del(pdict, "id"); v = opts_visitor_new(opts); obj = user_creatable_add_type(type, id, pdict, v, errp); visit_free(v); + qemu_opts_set_id(opts, (char *) id); + qemu_opt_set(opts, "qom-type", type, &error_abort); + g_free(type); QDECREF(pdict); return obj; } diff --git a/replay/replay-internal.c b/replay/replay-internal.c index bea7b4aa6b..fca8514012 100644 --- a/replay/replay-internal.c +++ b/replay/replay-internal.c @@ -195,6 +195,10 @@ void replay_save_instructions(void) if (replay_file && replay_mode == REPLAY_MODE_RECORD) { replay_mutex_lock(); int diff = (int)(replay_get_current_step() - replay_state.current_step); + + /* Time can only go forward */ + assert(diff >= 0); + if (diff > 0) { replay_put_event(EVENT_INSTRUCTION); replay_put_dword(diff); diff --git a/replay/replay.c b/replay/replay.c index 78e2a7e570..f810628cac 100644 --- a/replay/replay.c +++ b/replay/replay.c @@ -22,7 +22,7 @@ /* Current version of the replay mechanism. Increase it when file format changes. */ -#define REPLAY_VERSION 0xe02005 +#define REPLAY_VERSION 0xe02006 /* Size of replay log header */ #define HEADER_SIZE (sizeof(uint32_t) + sizeof(uint64_t)) @@ -84,6 +84,10 @@ void replay_account_executed_instructions(void) if (replay_state.instructions_count > 0) { int count = (int)(replay_get_current_step() - replay_state.current_step); + + /* Time can only go forward */ + assert(count >= 0); + replay_state.instructions_count -= count; replay_state.current_step += count; if (replay_state.instructions_count == 0) { diff --git a/roms/openbios b/roms/openbios -Subproject f233c3f72cfa79c1123a7ccef08d2f7e228da6d +Subproject 04898e8ce4c2f7bd94c7eeff9d26f2ff23aae8d diff --git a/scripts/qapi.py b/scripts/qapi.py index e88c047c2e..6c4d554165 100644 --- a/scripts/qapi.py +++ b/scripts/qapi.py @@ -106,7 +106,6 @@ class QAPIDoc(object): self.name = name # the list of lines for this section self.content = [] - self.optional = False def append(self, line): self.content.append(line) diff --git a/scripts/qapi2texi.py b/scripts/qapi2texi.py index 8eed11a60c..9e015002ef 100755 --- a/scripts/qapi2texi.py +++ b/scripts/qapi2texi.py @@ -35,12 +35,12 @@ EXAMPLE_FMT = """@example def subst_strong(doc): """Replaces *foo* by @strong{foo}""" - return re.sub(r'\*([^*\n]+)\*', r'@emph{\1}', doc) + return re.sub(r'\*([^*\n]+)\*', r'@strong{\1}', doc) def subst_emph(doc): """Replaces _foo_ by @emph{foo}""" - return re.sub(r'\b_([^_\n]+)_\b', r' @emph{\1} ', doc) + return re.sub(r'\b_([^_\n]+)_\b', r'@emph{\1}', doc) def subst_vars(doc): @@ -292,6 +292,7 @@ def main(argv): if not qapi.doc_required: print >>sys.stderr, ("%s: need pragma 'doc-required' " "to generate documentation" % argv[0]) + sys.exit(1) print texi_schema(schema) diff --git a/scripts/qemugdb/mtree.py b/scripts/qemugdb/mtree.py index cc8131c2e7..e6791b7885 100644 --- a/scripts/qemugdb/mtree.py +++ b/scripts/qemugdb/mtree.py @@ -21,7 +21,15 @@ def isnull(ptr): return ptr == gdb.Value(0).cast(ptr.type) def int128(p): - return int(p['lo']) + (int(p['hi']) << 64) + '''Read an Int128 type to a python integer. + + QEMU can be built with native Int128 support so we need to detect + if the value is a structure or the native type. + ''' + if p.type.code == gdb.TYPE_CODE_STRUCT: + return int(p['lo']) + (int(p['hi']) << 64) + else: + return int(("%s" % p), 16) class MtreeCommand(gdb.Command): '''Display the memory tree hierarchy''' @@ -69,7 +77,7 @@ class MtreeCommand(gdb.Command): gdb.write('%s alias: %s@%016x (@ %s)\n' % (' ' * level, alias['name'].string(), - ptr['alias_offset'], + int(ptr['alias_offset']), alias, ), gdb.STDOUT) diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py index 4c990047b6..d60b3a08f7 100755 --- a/scripts/simpletrace.py +++ b/scripts/simpletrace.py @@ -116,7 +116,28 @@ class Analyzer(object): is invoked. If a method matching a trace event name exists, it is invoked to process - that trace record. Otherwise the catchall() method is invoked.""" + that trace record. Otherwise the catchall() method is invoked. + + Example: + The following method handles the runstate_set(int new_state) trace event:: + + def runstate_set(self, new_state): + ... + + The method can also take a timestamp argument before the trace event + arguments:: + + def runstate_set(self, timestamp, new_state): + ... + + Timestamps have the uint64_t type and are in nanoseconds. + + The pid can be included in addition to the timestamp and is useful when + dealing with traces from multiple processes:: + + def runstate_set(self, timestamp, pid, new_state): + ... + """ def begin(self): """Called at the start of the trace.""" diff --git a/scripts/tracetool/format/tcg_h.py b/scripts/tracetool/format/tcg_h.py index 7ddc4a52ce..db55f52eb5 100644 --- a/scripts/tracetool/format/tcg_h.py +++ b/scripts/tracetool/format/tcg_h.py @@ -40,6 +40,7 @@ def generate(events, backend, group): '#define TRACE_%s_GENERATED_TCG_TRACERS_H' % group.upper(), '', '#include "exec/helper-proto.h"', + '#include "%s"' % header, '', ) diff --git a/scripts/tracetool/format/tcg_helper_c.py b/scripts/tracetool/format/tcg_helper_c.py index 7dccd8c5ec..ec7acbe347 100644 --- a/scripts/tracetool/format/tcg_helper_c.py +++ b/scripts/tracetool/format/tcg_helper_c.py @@ -55,6 +55,7 @@ def generate(events, backend, group): '#include "qemu-common.h"', '#include "cpu.h"', '#include "exec/helper-proto.h"', + '#include "%s"' % header, '', ) diff --git a/slirp/Makefile.objs b/slirp/Makefile.objs index 1baa1f1c7c..28049b03cd 100644 --- a/slirp/Makefile.objs +++ b/slirp/Makefile.objs @@ -2,4 +2,4 @@ common-obj-y = cksum.o if.o ip_icmp.o ip6_icmp.o ip6_input.o ip6_output.o \ ip_input.o ip_output.o dnssearch.o dhcpv6.o common-obj-y += slirp.o mbuf.o misc.o sbuf.o socket.o tcp_input.o tcp_output.o common-obj-y += tcp_subr.o tcp_timer.o udp.o udp6.o bootp.o tftp.o arp_table.o \ - ndp_table.o + ndp_table.o ncsi.o diff --git a/slirp/ip6_icmp.c b/slirp/ip6_icmp.c index 298a48dd25..777eb574be 100644 --- a/slirp/ip6_icmp.c +++ b/slirp/ip6_icmp.c @@ -143,17 +143,13 @@ void ndp_send_ra(Slirp *slirp) /* Build IPv6 packet */ struct mbuf *t = m_get(slirp); struct ip6 *rip = mtod(t, struct ip6 *); + size_t pl_size = 0; + struct in6_addr addr; + uint32_t scope_id; + rip->ip_src = (struct in6_addr)LINKLOCAL_ADDR; rip->ip_dst = (struct in6_addr)ALLNODES_MULTICAST; rip->ip_nh = IPPROTO_ICMPV6; - rip->ip_pl = htons(ICMP6_NDP_RA_MINLEN - + NDPOPT_LINKLAYER_LEN - + NDPOPT_PREFIXINFO_LEN -#ifndef _WIN32 - + NDPOPT_RDNSS_LEN -#endif - ); - t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl); /* Build ICMPv6 packet */ t->m_data += sizeof(struct ip6); @@ -171,6 +167,7 @@ void ndp_send_ra(Slirp *slirp) ricmp->icmp6_nra.reach_time = htonl(NDP_AdvReachableTime); ricmp->icmp6_nra.retrans_time = htonl(NDP_AdvRetransTime); t->m_data += ICMP6_NDP_RA_MINLEN; + pl_size += ICMP6_NDP_RA_MINLEN; /* Source link-layer address (NDP option) */ struct ndpopt *opt = mtod(t, struct ndpopt *); @@ -178,6 +175,7 @@ void ndp_send_ra(Slirp *slirp) opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8; in6_compute_ethaddr(rip->ip_src, opt->ndpopt_linklayer); t->m_data += NDPOPT_LINKLAYER_LEN; + pl_size += NDPOPT_LINKLAYER_LEN; /* Prefix information (NDP option) */ struct ndpopt *opt2 = mtod(t, struct ndpopt *); @@ -192,27 +190,26 @@ void ndp_send_ra(Slirp *slirp) opt2->ndpopt_prefixinfo.reserved2 = 0; opt2->ndpopt_prefixinfo.prefix = slirp->vprefix_addr6; t->m_data += NDPOPT_PREFIXINFO_LEN; + pl_size += NDPOPT_PREFIXINFO_LEN; -#ifndef _WIN32 /* Prefix information (NDP option) */ - /* disabled for windows for now, until get_dns6_addr is implemented */ - struct ndpopt *opt3 = mtod(t, struct ndpopt *); - opt3->ndpopt_type = NDPOPT_RDNSS; - opt3->ndpopt_len = NDPOPT_RDNSS_LEN / 8; - opt3->ndpopt_rdnss.reserved = 0; - opt3->ndpopt_rdnss.lifetime = htonl(2 * NDP_MaxRtrAdvInterval); - opt3->ndpopt_rdnss.addr = slirp->vnameserver_addr6; - t->m_data += NDPOPT_RDNSS_LEN; -#endif + if (get_dns6_addr(&addr, &scope_id) >= 0) { + /* Host system does have an IPv6 DNS server, announce our proxy. */ + struct ndpopt *opt3 = mtod(t, struct ndpopt *); + opt3->ndpopt_type = NDPOPT_RDNSS; + opt3->ndpopt_len = NDPOPT_RDNSS_LEN / 8; + opt3->ndpopt_rdnss.reserved = 0; + opt3->ndpopt_rdnss.lifetime = htonl(2 * NDP_MaxRtrAdvInterval); + opt3->ndpopt_rdnss.addr = slirp->vnameserver_addr6; + t->m_data += NDPOPT_RDNSS_LEN; + pl_size += NDPOPT_RDNSS_LEN; + } + + rip->ip_pl = htons(pl_size); + t->m_data -= sizeof(struct ip6) + pl_size; + t->m_len = sizeof(struct ip6) + pl_size; /* ICMPv6 Checksum */ -#ifndef _WIN32 - t->m_data -= NDPOPT_RDNSS_LEN; -#endif - t->m_data -= NDPOPT_PREFIXINFO_LEN; - t->m_data -= NDPOPT_LINKLAYER_LEN; - t->m_data -= ICMP6_NDP_RA_MINLEN; - t->m_data -= sizeof(struct ip6); ricmp->icmp6_cksum = ip6_cksum(t); ip6_output(NULL, t, 0); diff --git a/slirp/ncsi-pkt.h b/slirp/ncsi-pkt.h new file mode 100644 index 0000000000..ea07d1cd0f --- /dev/null +++ b/slirp/ncsi-pkt.h @@ -0,0 +1,419 @@ +/* + * Copyright Gavin Shan, IBM Corporation 2016. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef NCSI_PKT_H +#define NCSI_PKT_H + +/* from linux/net/ncsi/ncsi-pkt.h */ +#define __be32 uint32_t +#define __be16 uint16_t + +struct ncsi_pkt_hdr { + unsigned char mc_id; /* Management controller ID */ + unsigned char revision; /* NCSI version - 0x01 */ + unsigned char reserved; /* Reserved */ + unsigned char id; /* Packet sequence number */ + unsigned char type; /* Packet type */ + unsigned char channel; /* Network controller ID */ + __be16 length; /* Payload length */ + __be32 reserved1[2]; /* Reserved */ +}; + +struct ncsi_cmd_pkt_hdr { + struct ncsi_pkt_hdr common; /* Common NCSI packet header */ +}; + +struct ncsi_rsp_pkt_hdr { + struct ncsi_pkt_hdr common; /* Common NCSI packet header */ + __be16 code; /* Response code */ + __be16 reason; /* Response reason */ +}; + +struct ncsi_aen_pkt_hdr { + struct ncsi_pkt_hdr common; /* Common NCSI packet header */ + unsigned char reserved2[3]; /* Reserved */ + unsigned char type; /* AEN packet type */ +}; + +/* NCSI common command packet */ +struct ncsi_cmd_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + __be32 checksum; /* Checksum */ + unsigned char pad[26]; +}; + +struct ncsi_rsp_pkt { + struct ncsi_rsp_pkt_hdr rsp; /* Response header */ + __be32 checksum; /* Checksum */ + unsigned char pad[22]; +}; + +/* Select Package */ +struct ncsi_cmd_sp_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + unsigned char reserved[3]; /* Reserved */ + unsigned char hw_arbitration; /* HW arbitration */ + __be32 checksum; /* Checksum */ + unsigned char pad[22]; +}; + +/* Disable Channel */ +struct ncsi_cmd_dc_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + unsigned char reserved[3]; /* Reserved */ + unsigned char ald; /* Allow link down */ + __be32 checksum; /* Checksum */ + unsigned char pad[22]; +}; + +/* Reset Channel */ +struct ncsi_cmd_rc_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + __be32 reserved; /* Reserved */ + __be32 checksum; /* Checksum */ + unsigned char pad[22]; +}; + +/* AEN Enable */ +struct ncsi_cmd_ae_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + unsigned char reserved[3]; /* Reserved */ + unsigned char mc_id; /* MC ID */ + __be32 mode; /* AEN working mode */ + __be32 checksum; /* Checksum */ + unsigned char pad[18]; +}; + +/* Set Link */ +struct ncsi_cmd_sl_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + __be32 mode; /* Link working mode */ + __be32 oem_mode; /* OEM link mode */ + __be32 checksum; /* Checksum */ + unsigned char pad[18]; +}; + +/* Set VLAN Filter */ +struct ncsi_cmd_svf_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + __be16 reserved; /* Reserved */ + __be16 vlan; /* VLAN ID */ + __be16 reserved1; /* Reserved */ + unsigned char index; /* VLAN table index */ + unsigned char enable; /* Enable or disable */ + __be32 checksum; /* Checksum */ + unsigned char pad[14]; +}; + +/* Enable VLAN */ +struct ncsi_cmd_ev_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + unsigned char reserved[3]; /* Reserved */ + unsigned char mode; /* VLAN filter mode */ + __be32 checksum; /* Checksum */ + unsigned char pad[22]; +}; + +/* Set MAC Address */ +struct ncsi_cmd_sma_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + unsigned char mac[6]; /* MAC address */ + unsigned char index; /* MAC table index */ + unsigned char at_e; /* Addr type and operation */ + __be32 checksum; /* Checksum */ + unsigned char pad[18]; +}; + +/* Enable Broadcast Filter */ +struct ncsi_cmd_ebf_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + __be32 mode; /* Filter mode */ + __be32 checksum; /* Checksum */ + unsigned char pad[22]; +}; + +/* Enable Global Multicast Filter */ +struct ncsi_cmd_egmf_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + __be32 mode; /* Global MC mode */ + __be32 checksum; /* Checksum */ + unsigned char pad[22]; +}; + +/* Set NCSI Flow Control */ +struct ncsi_cmd_snfc_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + unsigned char reserved[3]; /* Reserved */ + unsigned char mode; /* Flow control mode */ + __be32 checksum; /* Checksum */ + unsigned char pad[22]; +}; + +/* Get Link Status */ +struct ncsi_rsp_gls_pkt { + struct ncsi_rsp_pkt_hdr rsp; /* Response header */ + __be32 status; /* Link status */ + __be32 other; /* Other indications */ + __be32 oem_status; /* OEM link status */ + __be32 checksum; + unsigned char pad[10]; +}; + +/* Get Version ID */ +struct ncsi_rsp_gvi_pkt { + struct ncsi_rsp_pkt_hdr rsp; /* Response header */ + __be32 ncsi_version; /* NCSI version */ + unsigned char reserved[3]; /* Reserved */ + unsigned char alpha2; /* NCSI version */ + unsigned char fw_name[12]; /* f/w name string */ + __be32 fw_version; /* f/w version */ + __be16 pci_ids[4]; /* PCI IDs */ + __be32 mf_id; /* Manufacture ID */ + __be32 checksum; +}; + +/* Get Capabilities */ +struct ncsi_rsp_gc_pkt { + struct ncsi_rsp_pkt_hdr rsp; /* Response header */ + __be32 cap; /* Capabilities */ + __be32 bc_cap; /* Broadcast cap */ + __be32 mc_cap; /* Multicast cap */ + __be32 buf_cap; /* Buffering cap */ + __be32 aen_cap; /* AEN cap */ + unsigned char vlan_cnt; /* VLAN filter count */ + unsigned char mixed_cnt; /* Mix filter count */ + unsigned char mc_cnt; /* MC filter count */ + unsigned char uc_cnt; /* UC filter count */ + unsigned char reserved[2]; /* Reserved */ + unsigned char vlan_mode; /* VLAN mode */ + unsigned char channel_cnt; /* Channel count */ + __be32 checksum; /* Checksum */ +}; + +/* Get Parameters */ +struct ncsi_rsp_gp_pkt { + struct ncsi_rsp_pkt_hdr rsp; /* Response header */ + unsigned char mac_cnt; /* Number of MAC addr */ + unsigned char reserved[2]; /* Reserved */ + unsigned char mac_enable; /* MAC addr enable flags */ + unsigned char vlan_cnt; /* VLAN tag count */ + unsigned char reserved1; /* Reserved */ + __be16 vlan_enable; /* VLAN tag enable flags */ + __be32 link_mode; /* Link setting */ + __be32 bc_mode; /* BC filter mode */ + __be32 valid_modes; /* Valid mode parameters */ + unsigned char vlan_mode; /* VLAN mode */ + unsigned char fc_mode; /* Flow control mode */ + unsigned char reserved2[2]; /* Reserved */ + __be32 aen_mode; /* AEN mode */ + unsigned char mac[6]; /* Supported MAC addr */ + __be16 vlan; /* Supported VLAN tags */ + __be32 checksum; /* Checksum */ +}; + +/* Get Controller Packet Statistics */ +struct ncsi_rsp_gcps_pkt { + struct ncsi_rsp_pkt_hdr rsp; /* Response header */ + __be32 cnt_hi; /* Counter cleared */ + __be32 cnt_lo; /* Counter cleared */ + __be32 rx_bytes; /* Rx bytes */ + __be32 tx_bytes; /* Tx bytes */ + __be32 rx_uc_pkts; /* Rx UC packets */ + __be32 rx_mc_pkts; /* Rx MC packets */ + __be32 rx_bc_pkts; /* Rx BC packets */ + __be32 tx_uc_pkts; /* Tx UC packets */ + __be32 tx_mc_pkts; /* Tx MC packets */ + __be32 tx_bc_pkts; /* Tx BC packets */ + __be32 fcs_err; /* FCS errors */ + __be32 align_err; /* Alignment errors */ + __be32 false_carrier; /* False carrier detection */ + __be32 runt_pkts; /* Rx runt packets */ + __be32 jabber_pkts; /* Rx jabber packets */ + __be32 rx_pause_xon; /* Rx pause XON frames */ + __be32 rx_pause_xoff; /* Rx XOFF frames */ + __be32 tx_pause_xon; /* Tx XON frames */ + __be32 tx_pause_xoff; /* Tx XOFF frames */ + __be32 tx_s_collision; /* Single collision frames */ + __be32 tx_m_collision; /* Multiple collision frames */ + __be32 l_collision; /* Late collision frames */ + __be32 e_collision; /* Excessive collision frames */ + __be32 rx_ctl_frames; /* Rx control frames */ + __be32 rx_64_frames; /* Rx 64-bytes frames */ + __be32 rx_127_frames; /* Rx 65-127 bytes frames */ + __be32 rx_255_frames; /* Rx 128-255 bytes frames */ + __be32 rx_511_frames; /* Rx 256-511 bytes frames */ + __be32 rx_1023_frames; /* Rx 512-1023 bytes frames */ + __be32 rx_1522_frames; /* Rx 1024-1522 bytes frames */ + __be32 rx_9022_frames; /* Rx 1523-9022 bytes frames */ + __be32 tx_64_frames; /* Tx 64-bytes frames */ + __be32 tx_127_frames; /* Tx 65-127 bytes frames */ + __be32 tx_255_frames; /* Tx 128-255 bytes frames */ + __be32 tx_511_frames; /* Tx 256-511 bytes frames */ + __be32 tx_1023_frames; /* Tx 512-1023 bytes frames */ + __be32 tx_1522_frames; /* Tx 1024-1522 bytes frames */ + __be32 tx_9022_frames; /* Tx 1523-9022 bytes frames */ + __be32 rx_valid_bytes; /* Rx valid bytes */ + __be32 rx_runt_pkts; /* Rx error runt packets */ + __be32 rx_jabber_pkts; /* Rx error jabber packets */ + __be32 checksum; /* Checksum */ +}; + +/* Get NCSI Statistics */ +struct ncsi_rsp_gns_pkt { + struct ncsi_rsp_pkt_hdr rsp; /* Response header */ + __be32 rx_cmds; /* Rx NCSI commands */ + __be32 dropped_cmds; /* Dropped commands */ + __be32 cmd_type_errs; /* Command type errors */ + __be32 cmd_csum_errs; /* Command checksum errors */ + __be32 rx_pkts; /* Rx NCSI packets */ + __be32 tx_pkts; /* Tx NCSI packets */ + __be32 tx_aen_pkts; /* Tx AEN packets */ + __be32 checksum; /* Checksum */ +}; + +/* Get NCSI Pass-through Statistics */ +struct ncsi_rsp_gnpts_pkt { + struct ncsi_rsp_pkt_hdr rsp; /* Response header */ + __be32 tx_pkts; /* Tx packets */ + __be32 tx_dropped; /* Tx dropped packets */ + __be32 tx_channel_err; /* Tx channel errors */ + __be32 tx_us_err; /* Tx undersize errors */ + __be32 rx_pkts; /* Rx packets */ + __be32 rx_dropped; /* Rx dropped packets */ + __be32 rx_channel_err; /* Rx channel errors */ + __be32 rx_us_err; /* Rx undersize errors */ + __be32 rx_os_err; /* Rx oversize errors */ + __be32 checksum; /* Checksum */ +}; + +/* Get package status */ +struct ncsi_rsp_gps_pkt { + struct ncsi_rsp_pkt_hdr rsp; /* Response header */ + __be32 status; /* Hardware arbitration status */ + __be32 checksum; +}; + +/* Get package UUID */ +struct ncsi_rsp_gpuuid_pkt { + struct ncsi_rsp_pkt_hdr rsp; /* Response header */ + unsigned char uuid[16]; /* UUID */ + __be32 checksum; +}; + +/* AEN: Link State Change */ +struct ncsi_aen_lsc_pkt { + struct ncsi_aen_pkt_hdr aen; /* AEN header */ + __be32 status; /* Link status */ + __be32 oem_status; /* OEM link status */ + __be32 checksum; /* Checksum */ + unsigned char pad[14]; +}; + +/* AEN: Configuration Required */ +struct ncsi_aen_cr_pkt { + struct ncsi_aen_pkt_hdr aen; /* AEN header */ + __be32 checksum; /* Checksum */ + unsigned char pad[22]; +}; + +/* AEN: Host Network Controller Driver Status Change */ +struct ncsi_aen_hncdsc_pkt { + struct ncsi_aen_pkt_hdr aen; /* AEN header */ + __be32 status; /* Status */ + __be32 checksum; /* Checksum */ + unsigned char pad[18]; +}; + +/* NCSI packet revision */ +#define NCSI_PKT_REVISION 0x01 + +/* NCSI packet commands */ +#define NCSI_PKT_CMD_CIS 0x00 /* Clear Initial State */ +#define NCSI_PKT_CMD_SP 0x01 /* Select Package */ +#define NCSI_PKT_CMD_DP 0x02 /* Deselect Package */ +#define NCSI_PKT_CMD_EC 0x03 /* Enable Channel */ +#define NCSI_PKT_CMD_DC 0x04 /* Disable Channel */ +#define NCSI_PKT_CMD_RC 0x05 /* Reset Channel */ +#define NCSI_PKT_CMD_ECNT 0x06 /* Enable Channel Network Tx */ +#define NCSI_PKT_CMD_DCNT 0x07 /* Disable Channel Network Tx */ +#define NCSI_PKT_CMD_AE 0x08 /* AEN Enable */ +#define NCSI_PKT_CMD_SL 0x09 /* Set Link */ +#define NCSI_PKT_CMD_GLS 0x0a /* Get Link */ +#define NCSI_PKT_CMD_SVF 0x0b /* Set VLAN Filter */ +#define NCSI_PKT_CMD_EV 0x0c /* Enable VLAN */ +#define NCSI_PKT_CMD_DV 0x0d /* Disable VLAN */ +#define NCSI_PKT_CMD_SMA 0x0e /* Set MAC address */ +#define NCSI_PKT_CMD_EBF 0x10 /* Enable Broadcast Filter */ +#define NCSI_PKT_CMD_DBF 0x11 /* Disable Broadcast Filter */ +#define NCSI_PKT_CMD_EGMF 0x12 /* Enable Global Multicast Filter */ +#define NCSI_PKT_CMD_DGMF 0x13 /* Disable Global Multicast Filter */ +#define NCSI_PKT_CMD_SNFC 0x14 /* Set NCSI Flow Control */ +#define NCSI_PKT_CMD_GVI 0x15 /* Get Version ID */ +#define NCSI_PKT_CMD_GC 0x16 /* Get Capabilities */ +#define NCSI_PKT_CMD_GP 0x17 /* Get Parameters */ +#define NCSI_PKT_CMD_GCPS 0x18 /* Get Controller Packet Statistics */ +#define NCSI_PKT_CMD_GNS 0x19 /* Get NCSI Statistics */ +#define NCSI_PKT_CMD_GNPTS 0x1a /* Get NCSI Pass-throu Statistics */ +#define NCSI_PKT_CMD_GPS 0x1b /* Get package status */ +#define NCSI_PKT_CMD_OEM 0x50 /* OEM */ +#define NCSI_PKT_CMD_PLDM 0x51 /* PLDM request over NCSI over RBT */ +#define NCSI_PKT_CMD_GPUUID 0x52 /* Get package UUID */ + +/* NCSI packet responses */ +#define NCSI_PKT_RSP_CIS (NCSI_PKT_CMD_CIS + 0x80) +#define NCSI_PKT_RSP_SP (NCSI_PKT_CMD_SP + 0x80) +#define NCSI_PKT_RSP_DP (NCSI_PKT_CMD_DP + 0x80) +#define NCSI_PKT_RSP_EC (NCSI_PKT_CMD_EC + 0x80) +#define NCSI_PKT_RSP_DC (NCSI_PKT_CMD_DC + 0x80) +#define NCSI_PKT_RSP_RC (NCSI_PKT_CMD_RC + 0x80) +#define NCSI_PKT_RSP_ECNT (NCSI_PKT_CMD_ECNT + 0x80) +#define NCSI_PKT_RSP_DCNT (NCSI_PKT_CMD_DCNT + 0x80) +#define NCSI_PKT_RSP_AE (NCSI_PKT_CMD_AE + 0x80) +#define NCSI_PKT_RSP_SL (NCSI_PKT_CMD_SL + 0x80) +#define NCSI_PKT_RSP_GLS (NCSI_PKT_CMD_GLS + 0x80) +#define NCSI_PKT_RSP_SVF (NCSI_PKT_CMD_SVF + 0x80) +#define NCSI_PKT_RSP_EV (NCSI_PKT_CMD_EV + 0x80) +#define NCSI_PKT_RSP_DV (NCSI_PKT_CMD_DV + 0x80) +#define NCSI_PKT_RSP_SMA (NCSI_PKT_CMD_SMA + 0x80) +#define NCSI_PKT_RSP_EBF (NCSI_PKT_CMD_EBF + 0x80) +#define NCSI_PKT_RSP_DBF (NCSI_PKT_CMD_DBF + 0x80) +#define NCSI_PKT_RSP_EGMF (NCSI_PKT_CMD_EGMF + 0x80) +#define NCSI_PKT_RSP_DGMF (NCSI_PKT_CMD_DGMF + 0x80) +#define NCSI_PKT_RSP_SNFC (NCSI_PKT_CMD_SNFC + 0x80) +#define NCSI_PKT_RSP_GVI (NCSI_PKT_CMD_GVI + 0x80) +#define NCSI_PKT_RSP_GC (NCSI_PKT_CMD_GC + 0x80) +#define NCSI_PKT_RSP_GP (NCSI_PKT_CMD_GP + 0x80) +#define NCSI_PKT_RSP_GCPS (NCSI_PKT_CMD_GCPS + 0x80) +#define NCSI_PKT_RSP_GNS (NCSI_PKT_CMD_GNS + 0x80) +#define NCSI_PKT_RSP_GNPTS (NCSI_PKT_CMD_GNPTS + 0x80) +#define NCSI_PKT_RSP_GPS (NCSI_PKT_CMD_GPS + 0x80) +#define NCSI_PKT_RSP_OEM (NCSI_PKT_CMD_OEM + 0x80) +#define NCSI_PKT_RSP_PLDM (NCSI_PKT_CMD_PLDM + 0x80) +#define NCSI_PKT_RSP_GPUUID (NCSI_PKT_CMD_GPUUID + 0x80) + +/* NCSI response code/reason */ +#define NCSI_PKT_RSP_C_COMPLETED 0x0000 /* Command Completed */ +#define NCSI_PKT_RSP_C_FAILED 0x0001 /* Command Failed */ +#define NCSI_PKT_RSP_C_UNAVAILABLE 0x0002 /* Command Unavailable */ +#define NCSI_PKT_RSP_C_UNSUPPORTED 0x0003 /* Command Unsupported */ +#define NCSI_PKT_RSP_R_NO_ERROR 0x0000 /* No Error */ +#define NCSI_PKT_RSP_R_INTERFACE 0x0001 /* Interface not ready */ +#define NCSI_PKT_RSP_R_PARAM 0x0002 /* Invalid Parameter */ +#define NCSI_PKT_RSP_R_CHANNEL 0x0003 /* Channel not Ready */ +#define NCSI_PKT_RSP_R_PACKAGE 0x0004 /* Package not Ready */ +#define NCSI_PKT_RSP_R_LENGTH 0x0005 /* Invalid payload length */ +#define NCSI_PKT_RSP_R_UNKNOWN 0x7fff /* Command type unsupported */ + +/* NCSI AEN packet type */ +#define NCSI_PKT_AEN 0xFF /* AEN Packet */ +#define NCSI_PKT_AEN_LSC 0x00 /* Link status change */ +#define NCSI_PKT_AEN_CR 0x01 /* Configuration required */ +#define NCSI_PKT_AEN_HNCDSC 0x02 /* HNC driver status change */ + +#endif /* NCSI_PKT_H */ diff --git a/slirp/ncsi.c b/slirp/ncsi.c new file mode 100644 index 0000000000..d12ba3e494 --- /dev/null +++ b/slirp/ncsi.c @@ -0,0 +1,130 @@ +/* + * NC-SI (Network Controller Sideband Interface) "echo" model + * + * Copyright (C) 2016 IBM Corp. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "slirp.h" + +#include "ncsi-pkt.h" + +/* Get Capabilities */ +static int ncsi_rsp_handler_gc(struct ncsi_rsp_pkt_hdr *rnh) +{ + struct ncsi_rsp_gc_pkt *rsp = (struct ncsi_rsp_gc_pkt *) rnh; + + rsp->cap = htonl(~0); + rsp->bc_cap = htonl(~0); + rsp->mc_cap = htonl(~0); + rsp->buf_cap = htonl(~0); + rsp->aen_cap = htonl(~0); + rsp->vlan_mode = 0xff; + rsp->uc_cnt = 2; + return 0; +} + +/* Get Link status */ +static int ncsi_rsp_handler_gls(struct ncsi_rsp_pkt_hdr *rnh) +{ + struct ncsi_rsp_gls_pkt *rsp = (struct ncsi_rsp_gls_pkt *) rnh; + + rsp->status = htonl(0x1); + return 0; +} + +static const struct ncsi_rsp_handler { + unsigned char type; + int payload; + int (*handler)(struct ncsi_rsp_pkt_hdr *rnh); +} ncsi_rsp_handlers[] = { + { NCSI_PKT_RSP_CIS, 4, NULL }, + { NCSI_PKT_RSP_SP, 4, NULL }, + { NCSI_PKT_RSP_DP, 4, NULL }, + { NCSI_PKT_RSP_EC, 4, NULL }, + { NCSI_PKT_RSP_DC, 4, NULL }, + { NCSI_PKT_RSP_RC, 4, NULL }, + { NCSI_PKT_RSP_ECNT, 4, NULL }, + { NCSI_PKT_RSP_DCNT, 4, NULL }, + { NCSI_PKT_RSP_AE, 4, NULL }, + { NCSI_PKT_RSP_SL, 4, NULL }, + { NCSI_PKT_RSP_GLS, 16, ncsi_rsp_handler_gls }, + { NCSI_PKT_RSP_SVF, 4, NULL }, + { NCSI_PKT_RSP_EV, 4, NULL }, + { NCSI_PKT_RSP_DV, 4, NULL }, + { NCSI_PKT_RSP_SMA, 4, NULL }, + { NCSI_PKT_RSP_EBF, 4, NULL }, + { NCSI_PKT_RSP_DBF, 4, NULL }, + { NCSI_PKT_RSP_EGMF, 4, NULL }, + { NCSI_PKT_RSP_DGMF, 4, NULL }, + { NCSI_PKT_RSP_SNFC, 4, NULL }, + { NCSI_PKT_RSP_GVI, 36, NULL }, + { NCSI_PKT_RSP_GC, 32, ncsi_rsp_handler_gc }, + { NCSI_PKT_RSP_GP, -1, NULL }, + { NCSI_PKT_RSP_GCPS, 172, NULL }, + { NCSI_PKT_RSP_GNS, 172, NULL }, + { NCSI_PKT_RSP_GNPTS, 172, NULL }, + { NCSI_PKT_RSP_GPS, 8, NULL }, + { NCSI_PKT_RSP_OEM, 0, NULL }, + { NCSI_PKT_RSP_PLDM, 0, NULL }, + { NCSI_PKT_RSP_GPUUID, 20, NULL } +}; + +/* + * packet format : ncsi header + payload + checksum + */ +#define NCSI_MAX_PAYLOAD 172 +#define NCSI_MAX_LEN (sizeof(struct ncsi_pkt_hdr) + NCSI_MAX_PAYLOAD + 4) + +void ncsi_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) +{ + struct ncsi_pkt_hdr *nh = (struct ncsi_pkt_hdr *)(pkt + ETH_HLEN); + uint8_t ncsi_reply[ETH_HLEN + NCSI_MAX_LEN]; + struct ethhdr *reh = (struct ethhdr *)ncsi_reply; + struct ncsi_rsp_pkt_hdr *rnh = (struct ncsi_rsp_pkt_hdr *) + (ncsi_reply + ETH_HLEN); + const struct ncsi_rsp_handler *handler = NULL; + int i; + + memset(ncsi_reply, 0, sizeof(ncsi_reply)); + + memset(reh->h_dest, 0xff, ETH_ALEN); + memset(reh->h_source, 0xff, ETH_ALEN); + reh->h_proto = htons(ETH_P_NCSI); + + for (i = 0; i < ARRAY_SIZE(ncsi_rsp_handlers); i++) { + if (ncsi_rsp_handlers[i].type == nh->type + 0x80) { + handler = &ncsi_rsp_handlers[i]; + break; + } + } + + rnh->common.mc_id = nh->mc_id; + rnh->common.revision = NCSI_PKT_REVISION; + rnh->common.id = nh->id; + rnh->common.type = nh->type + 0x80; + rnh->common.channel = nh->channel; + + if (handler) { + rnh->common.length = htons(handler->payload); + rnh->code = htons(NCSI_PKT_RSP_C_COMPLETED); + rnh->reason = htons(NCSI_PKT_RSP_R_NO_ERROR); + + if (handler->handler) { + /* TODO: handle errors */ + handler->handler(rnh); + } + } else { + rnh->common.length = 0; + rnh->code = htons(NCSI_PKT_RSP_C_UNAVAILABLE); + rnh->reason = htons(NCSI_PKT_RSP_R_UNKNOWN); + } + + /* TODO: add a checksum at the end of the frame but the specs + * allows it to be zero */ + + slirp_output(slirp->opaque, ncsi_reply, ETH_HLEN + sizeof(*nh) + + (handler ? handler->payload : 0) + 4); +} diff --git a/slirp/slirp.c b/slirp/slirp.c index 60539de7a3..9a50918346 100644 --- a/slirp/slirp.c +++ b/slirp/slirp.c @@ -198,7 +198,7 @@ static int get_dns_addr_resolv_conf(int af, void *pdns_addr, void *cached_addr, #ifdef DEBUG else { char s[INET6_ADDRSTRLEN]; - char *res = inet_ntop(af, tmp_addr, s, sizeof(s)); + const char *res = inet_ntop(af, tmp_addr, s, sizeof(s)); if (!res) { res = "(string conversion error)"; } @@ -870,6 +870,10 @@ void slirp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) } break; + case ETH_P_NCSI: + ncsi_input(slirp, pkt, pkt_len); + break; + default: break; } diff --git a/slirp/slirp.h b/slirp/slirp.h index 3877f667f0..5af4f482b5 100644 --- a/slirp/slirp.h +++ b/slirp/slirp.h @@ -231,6 +231,9 @@ extern Slirp *slirp_instance; void if_start(Slirp *); +/* ncsi.c */ +void ncsi_input(Slirp *slirp, const uint8_t *pkt, int pkt_len); + #ifndef _WIN32 #include <netdb.h> #endif diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 04b062cb7e..b357aee778 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -304,33 +304,6 @@ bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request) } #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) -static void arm_v7m_unassigned_access(CPUState *cpu, hwaddr addr, - bool is_write, bool is_exec, int opaque, - unsigned size) -{ - ARMCPU *arm = ARM_CPU(cpu); - CPUARMState *env = &arm->env; - - /* ARMv7-M interrupt return works by loading a magic value into the PC. - * On real hardware the load causes the return to occur. The qemu - * implementation performs the jump normally, then does the exception - * return by throwing a special exception when when the CPU tries to - * execute code at the magic address. - */ - if (env->v7m.exception != 0 && addr >= 0xfffffff0 && is_exec) { - cpu->exception_index = EXCP_EXCEPTION_EXIT; - cpu_loop_exit(cpu); - } - - /* In real hardware an attempt to access parts of the address space - * with nothing there will usually cause an external abort. - * However our QEMU board models are often missing device models where - * the guest can boot anyway with the default read-as-zero/writes-ignored - * behaviour that you get without a QEMU unassigned_access hook. - * So just return here to retain that default behaviour. - */ -} - static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) { CPUClass *cc = CPU_GET_CLASS(cs); @@ -338,17 +311,7 @@ static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) CPUARMState *env = &cpu->env; bool ret = false; - /* ARMv7-M interrupt return works by loading a magic value - * into the PC. On real hardware the load causes the - * return to occur. The qemu implementation performs the - * jump normally, then does the exception return when the - * CPU tries to execute code at the magic address. - * This will cause the magic PC value to be pushed to - * the stack if an interrupt occurred at the wrong time. - * We avoid this by disabling interrupts when - * pc contains a magic address. - * - * ARMv7-M interrupt masking works differently than -A or -R. + /* ARMv7-M interrupt masking works differently than -A or -R. * There is no FIQ/IRQ distinction. Instead of I and F bits * masking FIQ and IRQ interrupts, an exception is taken only * if it is higher priority than the current execution priority @@ -356,8 +319,7 @@ static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) * currently active exception). */ if (interrupt_request & CPU_INTERRUPT_HARD - && (armv7m_nvic_can_take_pending_exception(env->nvic)) - && (env->regs[15] < 0xfffffff0)) { + && (armv7m_nvic_can_take_pending_exception(env->nvic))) { cs->exception_index = EXCP_IRQ; cc->do_interrupt(cs); ret = true; @@ -1091,7 +1053,6 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) cc->do_interrupt = arm_v7m_cpu_do_interrupt; #endif - cc->do_unassigned_access = arm_v7m_unassigned_access; cc->cpu_exec_interrupt = arm_v7m_cpu_exec_interrupt; } diff --git a/target/arm/cpu.h b/target/arm/cpu.h index a8aabce7dd..1055bfef3d 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -58,6 +58,7 @@ #define EXCP_SEMIHOST 16 /* semihosting call */ #define EXCP_NOCP 17 /* v7M NOCP UsageFault */ #define EXCP_INVSTATE 18 /* v7M INVSTATE UsageFault */ +/* NB: add new EXCP_ defines to the array in arm_log_exception() too */ #define ARMV7M_EXCP_RESET 1 #define ARMV7M_EXCP_NMI 2 @@ -2290,6 +2291,9 @@ static inline bool arm_cpu_data_is_big_endian(CPUARMState *env) #define ARM_TBFLAG_NS_MASK (1 << ARM_TBFLAG_NS_SHIFT) #define ARM_TBFLAG_BE_DATA_SHIFT 20 #define ARM_TBFLAG_BE_DATA_MASK (1 << ARM_TBFLAG_BE_DATA_SHIFT) +/* For M profile only, Handler (ie not Thread) mode */ +#define ARM_TBFLAG_HANDLER_SHIFT 21 +#define ARM_TBFLAG_HANDLER_MASK (1 << ARM_TBFLAG_HANDLER_SHIFT) /* Bit usage when in AArch64 state */ #define ARM_TBFLAG_TBI0_SHIFT 0 /* TBI0 for EL0/1 or TBI for EL2/3 */ @@ -2326,6 +2330,8 @@ static inline bool arm_cpu_data_is_big_endian(CPUARMState *env) (((F) & ARM_TBFLAG_NS_MASK) >> ARM_TBFLAG_NS_SHIFT) #define ARM_TBFLAG_BE_DATA(F) \ (((F) & ARM_TBFLAG_BE_DATA_MASK) >> ARM_TBFLAG_BE_DATA_SHIFT) +#define ARM_TBFLAG_HANDLER(F) \ + (((F) & ARM_TBFLAG_HANDLER_MASK) >> ARM_TBFLAG_HANDLER_SHIFT) #define ARM_TBFLAG_TBI0(F) \ (((F) & ARM_TBFLAG_TBI0_MASK) >> ARM_TBFLAG_TBI0_SHIFT) #define ARM_TBFLAG_TBI1(F) \ @@ -2516,6 +2522,10 @@ static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, } *flags |= fp_exception_el(env) << ARM_TBFLAG_FPEXC_EL_SHIFT; + if (env->v7m.exception != 0) { + *flags |= ARM_TBFLAG_HANDLER_MASK; + } + *cs_base = 0; } diff --git a/target/arm/helper.c b/target/arm/helper.c index 8cb7a9451c..8a3e4480aa 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -6271,6 +6271,25 @@ static void arm_log_exception(int idx) { if (qemu_loglevel_mask(CPU_LOG_INT)) { const char *exc = NULL; + static const char * const excnames[] = { + [EXCP_UDEF] = "Undefined Instruction", + [EXCP_SWI] = "SVC", + [EXCP_PREFETCH_ABORT] = "Prefetch Abort", + [EXCP_DATA_ABORT] = "Data Abort", + [EXCP_IRQ] = "IRQ", + [EXCP_FIQ] = "FIQ", + [EXCP_BKPT] = "Breakpoint", + [EXCP_EXCEPTION_EXIT] = "QEMU v7M exception exit", + [EXCP_KERNEL_TRAP] = "QEMU intercept of kernel commpage", + [EXCP_HVC] = "Hypervisor Call", + [EXCP_HYP_TRAP] = "Hypervisor Trap", + [EXCP_SMC] = "Secure Monitor Call", + [EXCP_VIRQ] = "Virtual IRQ", + [EXCP_VFIQ] = "Virtual FIQ", + [EXCP_SEMIHOST] = "Semihosting call", + [EXCP_NOCP] = "v7M NOCP UsageFault", + [EXCP_INVSTATE] = "v7M INVSTATE UsageFault", + }; if (idx >= 0 && idx < ARRAY_SIZE(excnames)) { exc = excnames[idx]; diff --git a/target/arm/internals.h b/target/arm/internals.h index f742a419ff..1f6efef7c4 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -51,27 +51,6 @@ static inline bool excp_is_internal(int excp) || excp == EXCP_SEMIHOST; } -/* Exception names for debug logging; note that not all of these - * precisely correspond to architectural exceptions. - */ -static const char * const excnames[] = { - [EXCP_UDEF] = "Undefined Instruction", - [EXCP_SWI] = "SVC", - [EXCP_PREFETCH_ABORT] = "Prefetch Abort", - [EXCP_DATA_ABORT] = "Data Abort", - [EXCP_IRQ] = "IRQ", - [EXCP_FIQ] = "FIQ", - [EXCP_BKPT] = "Breakpoint", - [EXCP_EXCEPTION_EXIT] = "QEMU v7M exception exit", - [EXCP_KERNEL_TRAP] = "QEMU intercept of kernel commpage", - [EXCP_HVC] = "Hypervisor Call", - [EXCP_HYP_TRAP] = "Hypervisor Trap", - [EXCP_SMC] = "Secure Monitor Call", - [EXCP_VIRQ] = "Virtual IRQ", - [EXCP_VFIQ] = "Virtual FIQ", - [EXCP_SEMIHOST] = "Semihosting call", -}; - /* Scale factor for generic timers, ie number of ns per tick. * This gives a 62.5MHz timer. */ diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c index 61111091ad..a16abc8d12 100644 --- a/target/arm/kvm64.c +++ b/target/arm/kvm64.c @@ -940,7 +940,7 @@ bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit) * single step at this point so something has gone wrong. */ error_report("%s: guest single-step while debugging unsupported" - " (%"PRIx64", %"PRIx32")\n", + " (%"PRIx64", %"PRIx32")", __func__, env->pc, debug_exit->hsr); return false; } @@ -965,7 +965,7 @@ bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit) break; } default: - error_report("%s: unhandled debug exit (%"PRIx32", %"PRIx64")\n", + error_report("%s: unhandled debug exit (%"PRIx32", %"PRIx64")", __func__, debug_exit->hsr, env->pc); } diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c index d64c8670fa..156b825040 100644 --- a/target/arm/op_helper.c +++ b/target/arm/op_helper.c @@ -130,7 +130,7 @@ void tlb_fill(CPUState *cs, target_ulong addr, MMUAccessType access_type, if (unlikely(ret)) { ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = &cpu->env; - uint32_t syn, exc; + uint32_t syn, exc, fsc; unsigned int target_el; bool same_el; @@ -145,19 +145,32 @@ void tlb_fill(CPUState *cs, target_ulong addr, MMUAccessType access_type, env->cp15.hpfar_el2 = extract64(fi.s2addr, 12, 47) << 4; } same_el = arm_current_el(env) == target_el; - /* AArch64 syndrome does not have an LPAE bit */ - syn = fsr & ~(1 << 9); + + if (fsr & (1 << 9)) { + /* LPAE format fault status register : bottom 6 bits are + * status code in the same form as needed for syndrome + */ + fsc = extract32(fsr, 0, 6); + } else { + /* Short format FSR : this fault will never actually be reported + * to an EL that uses a syndrome register. Check that here, + * and use a (currently) reserved FSR code in case the constructed + * syndrome does leak into the guest somehow. + */ + assert(target_el != 2 && !arm_el_is_aa64(env, target_el)); + fsc = 0x3f; + } /* For insn and data aborts we assume there is no instruction syndrome * information; this is always true for exceptions reported to EL1. */ if (access_type == MMU_INST_FETCH) { - syn = syn_insn_abort(same_el, 0, fi.s1ptw, syn); + syn = syn_insn_abort(same_el, 0, fi.s1ptw, fsc); exc = EXCP_PREFETCH_ABORT; } else { syn = merge_syn_data_abort(env->exception.syndrome, target_el, same_el, fi.s1ptw, - access_type == MMU_DATA_STORE, syn); + access_type == MMU_DATA_STORE, fsc); if (access_type == MMU_DATA_STORE && arm_feature(env, ARM_FEATURE_V6)) { fsr |= (1 << 11); diff --git a/target/arm/translate.c b/target/arm/translate.c index e32e38cadd..0b5a0bca06 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -296,6 +296,30 @@ static void gen_step_complete_exception(DisasContext *s) s->is_jmp = DISAS_EXC; } +static void gen_singlestep_exception(DisasContext *s) +{ + /* Generate the right kind of exception for singlestep, which is + * either the architectural singlestep or EXCP_DEBUG for QEMU's + * gdb singlestepping. + */ + if (s->ss_active) { + gen_step_complete_exception(s); + } else { + gen_exception_internal(EXCP_DEBUG); + } +} + +static inline bool is_singlestepping(DisasContext *s) +{ + /* Return true if we are singlestepping either because of + * architectural singlestep or QEMU gdbstub singlestep. This does + * not include the command line '-singlestep' mode which is rather + * misnamed as it only means "one instruction per TB" and doesn't + * affect the code we generate. + */ + return s->singlestep_enabled || s->ss_active; +} + static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b) { TCGv_i32 tmp1 = tcg_temp_new_i32(); @@ -880,6 +904,21 @@ static const uint8_t table_logic_cc[16] = { 1, /* mvn */ }; +static inline void gen_set_condexec(DisasContext *s) +{ + if (s->condexec_mask) { + uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1); + TCGv_i32 tmp = tcg_temp_new_i32(); + tcg_gen_movi_i32(tmp, val); + store_cpu_field(tmp, condexec_bits); + } +} + +static inline void gen_set_pc_im(DisasContext *s, target_ulong val) +{ + tcg_gen_movi_i32(cpu_R[15], val); +} + /* Set PC and Thumb state from an immediate address. */ static inline void gen_bx_im(DisasContext *s, uint32_t addr) { @@ -904,6 +943,51 @@ static inline void gen_bx(DisasContext *s, TCGv_i32 var) store_cpu_field(var, thumb); } +/* Set PC and Thumb state from var. var is marked as dead. + * For M-profile CPUs, include logic to detect exception-return + * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC, + * and BX reg, and no others, and happens only for code in Handler mode. + */ +static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var) +{ + /* Generate the same code here as for a simple bx, but flag via + * s->is_jmp that we need to do the rest of the work later. + */ + gen_bx(s, var); + if (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M)) { + s->is_jmp = DISAS_BX_EXCRET; + } +} + +static inline void gen_bx_excret_final_code(DisasContext *s) +{ + /* Generate the code to finish possible exception return and end the TB */ + TCGLabel *excret_label = gen_new_label(); + + /* Is the new PC value in the magic range indicating exception return? */ + tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], 0xff000000, excret_label); + /* No: end the TB as we would for a DISAS_JMP */ + if (is_singlestepping(s)) { + gen_singlestep_exception(s); + } else { + tcg_gen_exit_tb(0); + } + gen_set_label(excret_label); + /* Yes: this is an exception return. + * At this point in runtime env->regs[15] and env->thumb will hold + * the exception-return magic number, which do_v7m_exception_exit() + * will read. Nothing else will be able to see those values because + * the cpu-exec main loop guarantees that we will always go straight + * from raising the exception to the exception-handling code. + * + * gen_ss_advance(s) does nothing on M profile currently but + * calling it is conceptually the right thing as we have executed + * this instruction (compare SWI, HVC, SMC handling). + */ + gen_ss_advance(s); + gen_exception_internal(EXCP_EXCEPTION_EXIT); +} + /* Variant of store_reg which uses branch&exchange logic when storing to r15 in ARM architecture v7 and above. The source must be a temporary and will be marked as dead. */ @@ -923,7 +1007,7 @@ static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var) static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var) { if (reg == 15 && ENABLE_ARCH_5) { - gen_bx(s, var); + gen_bx_excret(s, var); } else { store_reg(s, reg, var); } @@ -1056,11 +1140,6 @@ DO_GEN_ST(8, MO_UB) DO_GEN_ST(16, MO_UW) DO_GEN_ST(32, MO_UL) -static inline void gen_set_pc_im(DisasContext *s, target_ulong val) -{ - tcg_gen_movi_i32(cpu_R[15], val); -} - static inline void gen_hvc(DisasContext *s, int imm16) { /* The pre HVC helper handles cases when HVC gets trapped @@ -1094,17 +1173,6 @@ static inline void gen_smc(DisasContext *s) s->is_jmp = DISAS_SMC; } -static inline void -gen_set_condexec (DisasContext *s) -{ - if (s->condexec_mask) { - uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1); - TCGv_i32 tmp = tcg_temp_new_i32(); - tcg_gen_movi_i32(tmp, val); - store_cpu_field(tmp, condexec_bits); - } -} - static void gen_exception_internal_insn(DisasContext *s, int offset, int excp) { gen_set_condexec(s); @@ -4092,7 +4160,7 @@ static inline void gen_goto_tb(DisasContext *s, int n, target_ulong dest) static inline void gen_jmp (DisasContext *s, uint32_t dest) { - if (unlikely(s->singlestep_enabled || s->ss_active)) { + if (unlikely(is_singlestepping(s))) { /* An indirect jump so that we still trigger the debug exception. */ if (s->thumb) dest |= 1; @@ -9858,7 +9926,7 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw tmp = tcg_temp_new_i32(); gen_aa32_ld32u(s, tmp, addr, get_mem_index(s)); if (i == 15) { - gen_bx(s, tmp); + gen_bx_excret(s, tmp); } else if (i == rn) { loaded_var = tmp; loaded_base = 1; @@ -9959,7 +10027,7 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw gen_arm_shift_reg(tmp, op, tmp2, logic_cc); if (logic_cc) gen_logic_CC(tmp); - store_reg_bx(s, rd, tmp); + store_reg(s, rd, tmp); break; case 1: /* Sign/zero extend. */ op = (insn >> 20) & 7; @@ -10485,7 +10553,12 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw } break; case 4: /* bxj */ - /* Trivial implementation equivalent to bx. */ + /* Trivial implementation equivalent to bx. + * This instruction doesn't exist at all for M-profile. + */ + if (arm_dc_feature(s, ARM_FEATURE_M)) { + goto illegal_op; + } tmp = load_reg(s, rn); gen_bx(s, tmp); break; @@ -10885,7 +10958,7 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw goto illegal_op; } if (rs == 15) { - gen_bx(s, tmp); + gen_bx_excret(s, tmp); } else { store_reg(s, rs, tmp); } @@ -11075,9 +11148,11 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s) tmp2 = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp2, val); store_reg(s, 14, tmp2); + gen_bx(s, tmp); + } else { + /* Only BX works as exception-return, not BLX */ + gen_bx_excret(s, tmp); } - /* already thumb, no need to check */ - gen_bx(s, tmp); break; } break; @@ -11752,6 +11827,7 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) dc->vec_len = ARM_TBFLAG_VECLEN(tb->flags); dc->vec_stride = ARM_TBFLAG_VECSTRIDE(tb->flags); dc->c15_cpar = ARM_TBFLAG_XSCALE_CPAR(tb->flags); + dc->v7m_handler_mode = ARM_TBFLAG_HANDLER(tb->flags); dc->cp_regs = cpu->cp_regs; dc->features = env->features; @@ -11851,14 +11927,6 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) dc->is_jmp = DISAS_EXC; break; } -#else - if (arm_dc_feature(dc, ARM_FEATURE_M)) { - /* Branches to the magic exception-return addresses should - * already have been caught via the arm_v7m_unassigned_access hook, - * and never get here. - */ - assert(dc->pc < 0xfffffff0); - } #endif if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) { @@ -11953,9 +12021,8 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) ((dc->pc >= next_page_start - 3) && insn_crosses_page(env, dc)); } while (!dc->is_jmp && !tcg_op_buf_full() && - !cs->singlestep_enabled && + !is_singlestepping(dc) && !singlestep && - !dc->ss_active && !end_of_page && num_insns < max_insns); @@ -11971,9 +12038,16 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) /* At this stage dc->condjmp will only be set when the skipped instruction was a conditional branch or trap, and the PC has already been written. */ - if (unlikely(cs->singlestep_enabled || dc->ss_active)) { + gen_set_condexec(dc); + if (dc->is_jmp == DISAS_BX_EXCRET) { + /* Exception return branches need some special case code at the + * end of the TB, which is complex enough that it has to + * handle the single-step vs not and the condition-failed + * insn codepath itself. + */ + gen_bx_excret_final_code(dc); + } else if (unlikely(is_singlestepping(dc))) { /* Unconditional and "condition passed" instruction codepath. */ - gen_set_condexec(dc); switch (dc->is_jmp) { case DISAS_SWI: gen_ss_advance(dc); @@ -11993,24 +12067,8 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) gen_set_pc_im(dc, dc->pc); /* fall through */ default: - if (dc->ss_active) { - gen_step_complete_exception(dc); - } else { - /* FIXME: Single stepping a WFI insn will not halt - the CPU. */ - gen_exception_internal(EXCP_DEBUG); - } - } - if (dc->condjmp) { - /* "Condition failed" instruction codepath. */ - gen_set_label(dc->condlabel); - gen_set_condexec(dc); - gen_set_pc_im(dc, dc->pc); - if (dc->ss_active) { - gen_step_complete_exception(dc); - } else { - gen_exception_internal(EXCP_DEBUG); - } + /* FIXME: Single stepping a WFI insn will not halt the CPU. */ + gen_singlestep_exception(dc); } } else { /* While branches must always occur at the end of an IT block, @@ -12021,7 +12079,6 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) - Hardware watchpoints. Hardware breakpoints have already been handled and skip this code. */ - gen_set_condexec(dc); switch(dc->is_jmp) { case DISAS_NEXT: gen_goto_tb(dc, 1, dc->pc); @@ -12061,11 +12118,17 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) gen_exception(EXCP_SMC, syn_aa32_smc(), 3); break; } - if (dc->condjmp) { - gen_set_label(dc->condlabel); - gen_set_condexec(dc); + } + + if (dc->condjmp) { + /* "Condition failed" instruction codepath for the branch/trap insn */ + gen_set_label(dc->condlabel); + gen_set_condexec(dc); + if (unlikely(is_singlestepping(dc))) { + gen_set_pc_im(dc, dc->pc); + gen_singlestep_exception(dc); + } else { gen_goto_tb(dc, 1, dc->pc); - dc->condjmp = 0; } } diff --git a/target/arm/translate.h b/target/arm/translate.h index abb0760158..629dab945e 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -31,6 +31,7 @@ typedef struct DisasContext { bool vfp_enabled; /* FP enabled via FPSCR.EN */ int vec_len; int vec_stride; + bool v7m_handler_mode; /* Immediate value in AArch32 SVC insn; must be set if is_jmp == DISAS_SWI * so that top level loop can generate correct syndrome information. */ @@ -134,6 +135,10 @@ static void disas_set_insn_syndrome(DisasContext *s, uint32_t syn) #define DISAS_HVC 8 #define DISAS_SMC 9 #define DISAS_YIELD 10 +/* M profile branch which might be an exception return (and so needs + * custom end-of-TB code) + */ +#define DISAS_BX_EXCRET 11 #ifdef TARGET_AARCH64 void a64_translate_init(void); diff --git a/target/i386/arch_dump.c b/target/i386/arch_dump.c index 5a2e4be5d0..fe0aa36932 100644 --- a/target/i386/arch_dump.c +++ b/target/i386/arch_dump.c @@ -391,8 +391,7 @@ int cpu_get_dump_info(ArchDumpInfo *info, #ifdef TARGET_X86_64 X86CPU *first_x86_cpu = X86_CPU(first_cpu); - - lma = !!(first_x86_cpu->env.hflags & HF_LMA_MASK); + lma = first_cpu && (first_x86_cpu->env.hflags & HF_LMA_MASK); #endif if (lma) { diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 7aa762245a..13c0985f11 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -3373,15 +3373,19 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp) GList *l; Error *local_err = NULL; - /*TODO: cpu->max_features incorrectly overwrites features - * set using "feat=on|off". Once we fix this, we can convert + /*TODO: Now cpu->max_features doesn't overwrite features + * set using QOM properties, and we can convert * plus_features & minus_features to global properties * inside x86_cpu_parse_featurestr() too. */ if (cpu->max_features) { for (w = 0; w < FEATURE_WORDS; w++) { - env->features[w] = - x86_cpu_get_supported_feature_word(w, cpu->migratable); + /* Override only features that weren't set explicitly + * by the user. + */ + env->features[w] |= + x86_cpu_get_supported_feature_word(w, cpu->migratable) & + ~env->user_features[w]; } } @@ -3692,15 +3696,17 @@ static void x86_cpu_unrealizefn(DeviceState *dev, Error **errp) } typedef struct BitProperty { - uint32_t *ptr; + FeatureWord w; uint32_t mask; } BitProperty; static void x86_cpu_get_bit_prop(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { + X86CPU *cpu = X86_CPU(obj); BitProperty *fp = opaque; - bool value = (*fp->ptr & fp->mask) == fp->mask; + uint32_t f = cpu->env.features[fp->w]; + bool value = (f & fp->mask) == fp->mask; visit_type_bool(v, name, &value, errp); } @@ -3708,6 +3714,7 @@ static void x86_cpu_set_bit_prop(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { DeviceState *dev = DEVICE(obj); + X86CPU *cpu = X86_CPU(obj); BitProperty *fp = opaque; Error *local_err = NULL; bool value; @@ -3724,10 +3731,11 @@ static void x86_cpu_set_bit_prop(Object *obj, Visitor *v, const char *name, } if (value) { - *fp->ptr |= fp->mask; + cpu->env.features[fp->w] |= fp->mask; } else { - *fp->ptr &= ~fp->mask; + cpu->env.features[fp->w] &= ~fp->mask; } + cpu->env.user_features[fp->w] |= fp->mask; } static void x86_cpu_release_bit_prop(Object *obj, const char *name, @@ -3745,7 +3753,7 @@ static void x86_cpu_release_bit_prop(Object *obj, const char *name, */ static void x86_cpu_register_bit_prop(X86CPU *cpu, const char *prop_name, - uint32_t *field, + FeatureWord w, int bitnr) { BitProperty *fp; @@ -3755,11 +3763,11 @@ static void x86_cpu_register_bit_prop(X86CPU *cpu, op = object_property_find(OBJECT(cpu), prop_name, NULL); if (op) { fp = op->opaque; - assert(fp->ptr == field); + assert(fp->w == w); fp->mask |= mask; } else { fp = g_new0(BitProperty, 1); - fp->ptr = field; + fp->w = w; fp->mask = mask; object_property_add(OBJECT(cpu), prop_name, "bool", x86_cpu_get_bit_prop, @@ -3787,7 +3795,7 @@ static void x86_cpu_register_feature_bit_props(X86CPU *cpu, /* aliases don't use "|" delimiters anymore, they are registered * manually using object_property_add_alias() */ assert(!strchr(name, '|')); - x86_cpu_register_bit_prop(cpu, name, &cpu->env.features[w], bitnr); + x86_cpu_register_bit_prop(cpu, name, w, bitnr); } static GuestPanicInformation *x86_cpu_get_crash_info(CPUState *cs) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 07401ad9fe..c4602ca80d 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1147,6 +1147,8 @@ typedef struct CPUX86State { uint32_t cpuid_vendor3; uint32_t cpuid_version; FeatureWordArray features; + /* Features that were explicitly enabled/disabled */ + FeatureWordArray user_features; uint32_t cpuid_model[12]; /* MTRRs */ diff --git a/target/i386/helper.c b/target/i386/helper.c index e2af3404f2..f11cac63a1 100644 --- a/target/i386/helper.c +++ b/target/i386/helper.c @@ -326,6 +326,10 @@ void x86_cpu_dump_local_apic_state(CPUState *cs, FILE *f, { X86CPU *cpu = X86_CPU(cs); APICCommonState *s = APIC_COMMON(cpu->apic_state); + if (!s) { + cpu_fprintf(f, "local apic state not available\n"); + return; + } uint32_t *lvt = s->lvt; cpu_fprintf(f, "dumping local APIC state for CPU %-2u\n\n", diff --git a/target/i386/misc_helper.c b/target/i386/misc_helper.c index ca2ea09f54..628f64aad5 100644 --- a/target/i386/misc_helper.c +++ b/target/i386/misc_helper.c @@ -18,6 +18,7 @@ */ #include "qemu/osdep.h" +#include "qemu/main-loop.h" #include "cpu.h" #include "exec/helper-proto.h" #include "exec/exec-all.h" @@ -156,7 +157,9 @@ void helper_write_crN(CPUX86State *env, int reg, target_ulong t0) break; case 8: if (!(env->hflags2 & HF2_VINTR_MASK)) { + qemu_mutex_lock_iothread(); cpu_set_apic_tpr(x86_env_get_cpu(env)->apic_state, t0); + qemu_mutex_unlock_iothread(); } env->v_tpr = t0 & 0x0f; break; diff --git a/target/i386/translate.c b/target/i386/translate.c index 72c1b03a2a..1d1372fb43 100644 --- a/target/i386/translate.c +++ b/target/i386/translate.c @@ -4418,6 +4418,13 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, s->vex_l = 0; s->vex_v = 0; next_byte: + /* x86 has an upper limit of 15 bytes for an instruction. Since we + * do not want to decode and generate IR for an illegal + * instruction, the following check limits the instruction size to + * 25 bytes: 14 prefix + 1 opc + 6 (modrm+sib+ofs) + 4 imm */ + if (s->pc - pc_start > 14) { + goto illegal_op; + } b = cpu_ldub_code(env, s->pc); s->pc++; /* Collect prefixes. */ diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 9f1f132cef..64017acfad 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -2245,14 +2245,8 @@ static void alter_insns(uint64_t *word, uint64_t flags, bool on) } } -static void kvmppc_host_cpu_initfn(Object *obj) -{ - assert(kvm_enabled()); -} - static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data) { - DeviceClass *dc = DEVICE_CLASS(oc); PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc); uint32_t vmx = kvmppc_get_vmx(); uint32_t dfp = kvmppc_get_dfp(); @@ -2279,9 +2273,6 @@ static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data) if (icache_size != -1) { pcc->l1_icache_size = icache_size; } - - /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */ - dc->cannot_destroy_with_object_finalize_yet = true; } bool kvmppc_has_cap_epr(void) @@ -2333,7 +2324,6 @@ static int kvm_ppc_register_host_cpu_type(void) { TypeInfo type_info = { .name = TYPE_HOST_POWERPC_CPU, - .instance_init = kvmppc_host_cpu_initfn, .class_init = kvmppc_host_cpu_class_init, }; PowerPCCPUClass *pvr_pcc; diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c index 4ea3a2de80..ce461cc905 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -376,12 +376,12 @@ static void cpu_model_from_info(S390CPUModel *model, const CpuModelInfo *info, static void qdict_add_disabled_feat(const char *name, void *opaque) { - qdict_put((QDict *) opaque, name, qbool_from_bool(false)); + qdict_put(opaque, name, qbool_from_bool(false)); } static void qdict_add_enabled_feat(const char *name, void *opaque) { - qdict_put((QDict *) opaque, name, qbool_from_bool(true)); + qdict_put(opaque, name, qbool_from_bool(true)); } /* convert S390CPUDef into a static CpuModelInfo */ @@ -660,7 +660,6 @@ static void check_compatibility(const S390CPUModel *max_model, static S390CPUModel *get_max_cpu_model(Error **errp) { -#ifndef CONFIG_USER_ONLY static S390CPUModel max_model; static bool cached; @@ -680,7 +679,6 @@ static S390CPUModel *get_max_cpu_model(Error **errp) cached = true; return &max_model; } -#endif return NULL; } diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c index ac47154b83..1a249d8359 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c @@ -47,16 +47,16 @@ #include "exec/memattrs.h" #include "hw/s390x/s390-virtio-ccw.h" -/* #define DEBUG_KVM */ - -#ifdef DEBUG_KVM -#define DPRINTF(fmt, ...) \ - do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) -#else -#define DPRINTF(fmt, ...) \ - do { } while (0) +#ifndef DEBUG_KVM +#define DEBUG_KVM 0 #endif +#define DPRINTF(fmt, ...) do { \ + if (DEBUG_KVM) { \ + fprintf(stderr, fmt, ## __VA_ARGS__); \ + } \ +} while (0); + #define kvm_vm_check_mem_attr(s, attr) \ kvm_vm_check_attr(s, KVM_S390_VM_MEM_CTRL, attr) diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c index 93b0e61366..eca82441d0 100644 --- a/target/s390x/misc_helper.c +++ b/target/s390x/misc_helper.c @@ -288,7 +288,9 @@ void HELPER(diag)(CPUS390XState *env, uint32_t r1, uint32_t r3, uint32_t num) switch (num) { case 0x500: /* KVM hypercall */ + qemu_mutex_lock_iothread(); r = s390_virtio_hypercall(env); + qemu_mutex_unlock_iothread(); break; case 0x44: /* yield */ @@ -515,7 +517,8 @@ uint32_t HELPER(sigp)(CPUS390XState *env, uint64_t order_code, uint32_t r1, /* Remember: Use "R1 or R1 + 1, whichever is the odd-numbered register" as parameter (input). Status (output) is always R1. */ - switch (order_code) { + /* sigp contains the order code in bit positions 56-63, mask it here. */ + switch (order_code & 0xff) { case SIGP_SET_ARCH: /* switch arch */ break; diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 09a19c6f35..75ea247bc4 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -130,14 +130,7 @@ enum { static inline void flush_icache_range(uintptr_t start, uintptr_t stop) { -#if QEMU_GNUC_PREREQ(4, 1) __builtin___clear_cache((char *) start, (char *) stop); -#else - register uintptr_t _beg __asm("a1") = start; - register uintptr_t _end __asm("a2") = stop; - register uintptr_t _flg __asm("a3") = 0; - __asm __volatile__ ("swi 0x9f0002" : : "r" (_beg), "r" (_end), "r" (_flg)); -#endif } #endif diff --git a/tcg/sparc/tcg-target.inc.c b/tcg/sparc/tcg-target.inc.c index d1f4c0dead..3785d77f62 100644 --- a/tcg/sparc/tcg-target.inc.c +++ b/tcg/sparc/tcg-target.inc.c @@ -843,6 +843,29 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0) static tcg_insn_unit *qemu_ld_trampoline[16]; static tcg_insn_unit *qemu_st_trampoline[16]; +static void emit_extend(TCGContext *s, TCGReg r, int op) +{ + /* Emit zero extend of 8, 16 or 32 bit data as + * required by the MO_* value op; do nothing for 64 bit. + */ + switch (op & MO_SIZE) { + case MO_8: + tcg_out_arithi(s, r, r, 0xff, ARITH_AND); + break; + case MO_16: + tcg_out_arithi(s, r, r, 16, SHIFT_SLL); + tcg_out_arithi(s, r, r, 16, SHIFT_SRL); + break; + case MO_32: + if (SPARC64) { + tcg_out_arith(s, r, r, 0, SHIFT_SRL); + } + break; + case MO_64: + break; + } +} + static void build_trampolines(TCGContext *s) { static void * const qemu_ld_helpers[16] = { @@ -910,6 +933,7 @@ static void build_trampolines(TCGContext *s) qemu_st_trampoline[i] = s->code_ptr; if (SPARC64) { + emit_extend(s, TCG_REG_O2, i); ra = TCG_REG_O4; } else { ra = TCG_REG_O1; @@ -925,6 +949,7 @@ static void build_trampolines(TCGContext *s) tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX); ra += 2; } else { + emit_extend(s, ra, i); ra += 1; } /* Skip the oi argument. */ @@ -1119,7 +1144,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, /* Skip the high-part; we'll perform the extract in the trampoline. */ param++; } - tcg_out_mov(s, TCG_TYPE_REG, param++, addr); + tcg_out_mov(s, TCG_TYPE_REG, param++, addrz); /* We use the helpers to extend SB and SW data, leaving the case of SL needing explicit extending below. */ @@ -1199,7 +1224,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, /* Skip the high-part; we'll perform the extract in the trampoline. */ param++; } - tcg_out_mov(s, TCG_TYPE_REG, param++, addr); + tcg_out_mov(s, TCG_TYPE_REG, param++, addrz); if (!SPARC64 && (memop & MO_SIZE) == MO_64) { /* Skip the high-part; we'll perform the extract in the trampoline. */ param++; diff --git a/tests/Makefile.include b/tests/Makefile.include index 402e71cf06..579ec07cce 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include @@ -379,6 +379,7 @@ qapi-schema += doc-duplicated-since.json qapi-schema += doc-empty-arg.json qapi-schema += doc-empty-section.json qapi-schema += doc-empty-symbol.json +qapi-schema += doc-good.json qapi-schema += doc-interleaved-section.json qapi-schema += doc-invalid-end.json qapi-schema += doc-invalid-end2.json @@ -518,7 +519,7 @@ QEMU_CFLAGS += -I$(SRC_PATH)/tests # Deps that are common to various different sets of tests below -test-util-obj-y = $(trace-obj-y) libqemuutil.a libqemustub.a +test-util-obj-y = libqemuutil.a libqemustub.a test-qom-obj-y = $(qom-obj-y) $(test-util-obj-y) test-qapi-obj-y = tests/test-qapi-visit.o tests/test-qapi-types.o \ tests/test-qapi-event.o tests/test-qmp-introspect.o \ @@ -607,6 +608,9 @@ $(SRC_PATH)/tests/qapi-schema/qapi-schema-test.json $(SRC_PATH)/scripts/qapi-int $(gen-out-type) -o tests -p "test-" $<, \ "GEN","$@") +tests/qapi-schema/doc-good.test.texi: $(SRC_PATH)/tests/qapi-schema/doc-good.json $(SRC_PATH)/scripts/qapi2texi.py $(qapi-py) + $(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi2texi.py $< > $@,"GEN","$@") + tests/test-string-output-visitor$(EXESUF): tests/test-string-output-visitor.o $(test-qapi-obj-y) tests/test-string-input-visitor$(EXESUF): tests/test-string-input-visitor.o $(test-qapi-obj-y) tests/test-qmp-event$(EXESUF): tests/test-qmp-event.o $(test-qapi-obj-y) @@ -736,7 +740,7 @@ tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o $(test-util-obj-y) \ $(chardev-obj-y) tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o $(test-util-obj-y) -tests/test-keyval$(EXESUF): tests/test-keyval.o $(test-util-obj-y) +tests/test-keyval$(EXESUF): tests/test-keyval.o $(test-util-obj-y) $(test-qapi-obj-y) tests/test-write-threshold$(EXESUF): tests/test-write-threshold.o $(test-block-obj-y) tests/test-netfilter$(EXESUF): tests/test-netfilter.o $(qtest-obj-y) tests/test-filter-mirror$(EXESUF): tests/test-filter-mirror.o $(qtest-obj-y) @@ -856,9 +860,6 @@ QEMU_IOTESTS_HELPERS-$(CONFIG_LINUX) = tests/qemu-iotests/socket_scm_helper$(EXE check-tests/qemu-iotests-quick.sh: tests/qemu-iotests-quick.sh qemu-img$(EXESUF) qemu-io$(EXESUF) $(QEMU_IOTESTS_HELPERS-y) $< -.PHONY: check-tests/test-qapi.py -check-tests/test-qapi.py: tests/test-qapi.py - .PHONY: $(patsubst %, check-%, $(check-qapi-schema-y)) $(patsubst %, check-%, $(check-qapi-schema-y)): check-%.json: $(SRC_PATH)/%.json $(call quiet-command, PYTHONPATH=$(SRC_PATH)/scripts \ @@ -871,10 +872,14 @@ $(patsubst %, check-%, $(check-qapi-schema-y)): check-%.json: $(SRC_PATH)/%.json @perl -p -e 's|\Q$(SRC_PATH)\E/||g' $*.test.err | diff -q $(SRC_PATH)/$*.err - @diff -q $(SRC_PATH)/$*.exit $*.test.exit +.PHONY: check-tests/qapi-schema/doc-good.texi +check-tests/qapi-schema/doc-good.texi: tests/qapi-schema/doc-good.test.texi + @diff -q $(SRC_PATH)/tests/qapi-schema/doc-good.texi $< + # Consolidated targets .PHONY: check-qapi-schema check-qtest check-unit check check-clean -check-qapi-schema: $(patsubst %,check-%, $(check-qapi-schema-y)) +check-qapi-schema: $(patsubst %,check-%, $(check-qapi-schema-y)) check-tests/qapi-schema/doc-good.texi check-qtest: $(patsubst %,check-qtest-%, $(QTEST_TARGETS)) check-unit: $(patsubst %,check-%, $(check-unit-y)) check-block: $(patsubst %,check-%, $(check-block-y)) diff --git a/tests/acpi-utils.h b/tests/acpi-utils.h index 9f9a2d532c..348e4d7931 100644 --- a/tests/acpi-utils.h +++ b/tests/acpi-utils.h @@ -26,7 +26,7 @@ typedef struct { gsize asl_len; gchar *asl_file; bool tmp_files_retain; /* do not delete the temp asl/aml */ -} QEMU_PACKED AcpiSdtTable; +} AcpiSdtTable; #define ACPI_READ_FIELD(field, addr) \ do { \ diff --git a/tests/boot-serial-test.c b/tests/boot-serial-test.c index 57edf6af33..11f48b049c 100644 --- a/tests/boot-serial-test.c +++ b/tests/boot-serial-test.c @@ -79,8 +79,8 @@ static void test_machine(const void *data) g_assert(fd != -1); args = g_strdup_printf("-M %s,accel=tcg -chardev file,id=serial0,path=%s" - " -serial chardev:serial0 %s", test->machine, - tmpname, test->extra); + " -no-shutdown -serial chardev:serial0 %s", + test->machine, tmpname, test->extra); qtest_start(args); unlink(tmpname); diff --git a/tests/libqtest.c b/tests/libqtest.c index a5c3d2bf48..99b1195355 100644 --- a/tests/libqtest.c +++ b/tests/libqtest.c @@ -167,6 +167,14 @@ QTestState *qtest_init_without_qmp_handshake(const char *extra_args) socket_path = g_strdup_printf("/tmp/qtest-%d.sock", getpid()); qmp_socket_path = g_strdup_printf("/tmp/qtest-%d.qmp", getpid()); + /* It's possible that if an earlier test run crashed it might + * have left a stale unix socket lying around. Delete any + * stale old socket to avoid spurious test failures with + * tests/libqtest.c:70:init_socket: assertion failed (ret != -1): (-1 != -1) + */ + unlink(socket_path); + unlink(qmp_socket_path); + sock = init_socket(socket_path); qmpsock = init_socket(qmp_socket_path); diff --git a/tests/qapi-schema/doc-good.err b/tests/qapi-schema/doc-good.err new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/tests/qapi-schema/doc-good.err diff --git a/tests/qapi-schema/doc-good.exit b/tests/qapi-schema/doc-good.exit new file mode 100644 index 0000000000..573541ac97 --- /dev/null +++ b/tests/qapi-schema/doc-good.exit @@ -0,0 +1 @@ +0 diff --git a/tests/qapi-schema/doc-good.json b/tests/qapi-schema/doc-good.json new file mode 100644 index 0000000000..cfdc0a8a81 --- /dev/null +++ b/tests/qapi-schema/doc-good.json @@ -0,0 +1,136 @@ +# -*- Mode: Python -*- +# Positive QAPI doc comment tests + +{ 'pragma': { 'doc-required': true } } + +## +# = Section +# +# == Subsection +# +# *strong* _with emphasis_ +# @var {in braces} +# * List item one +# - Two, multiple +# lines +# +# 3. Three +# Still in list +# +# Not in list +# - Second list +# Note: still in list +# +# Note: not in list +# 1. Third list +# is numbered +# +# - another item +# +# | example +# | multiple lines +# +# Returns: the King +# Since: the first age +# Notes: +# +# 1. Lorem ipsum dolor sit amet +# +# 2. Ut enim ad minim veniam +# +# Duis aute irure dolor +# +# Example: +# +# -> in +# <- out +# Examples: +# - *verbatim* +# - {braces} +## + +## +# @Enum: +# == Produces *invalid* texinfo +# @one: The _one_ {and only} +# +# @two is undocumented +## +{ 'enum': 'Enum', 'data': [ 'one', 'two' ] } + +## +# @Base: +# @base1: +# the first member +## +{ 'struct': 'Base', 'data': { 'base1': 'Enum' } } + +## +# @Variant1: +# A paragraph +# +# Another paragraph (but no @var: line) +## +{ 'struct': 'Variant1', 'data': { 'var1': 'str' } } + +## +# @Variant2: +## +{ 'struct': 'Variant2', 'data': {} } + +## +# @Object: +## +{ 'union': 'Object', + 'base': 'Base', + 'discriminator': 'base1', + 'data': { 'one': 'Variant1', 'two': 'Variant2' } } + +## +# @SugaredUnion: +## +{ 'union': 'SugaredUnion', + 'data': { 'one': 'Variant1', 'two': 'Variant2' } } + +## +# == Another subsection +## + +## +# @cmd: +# @arg1: the first argument +# +# @arg2: the second +# argument +# Note: @arg3 is undocumented +# Returns: @Object +# TODO: frobnicate +# Notes: +# - Lorem ipsum dolor sit amet +# - Ut enim ad minim veniam +# +# Duis aute irure dolor +# Example: +# +# -> in +# <- out +# Examples: +# - *verbatim* +# - {braces} +# Since: 2.10 +## +{ 'command': 'cmd', + 'data': { 'arg1': 'int', '*arg2': 'str', 'arg3': 'bool' }, + 'returns': 'Object' } + +## +# @cmd-boxed: +# If you're bored enough to read this, go see a video of boxed cats +# Example: +# +# -> in +# +# <- out +## +{ 'command': 'cmd-boxed', 'boxed': true, + 'data': 'Object' } diff --git a/tests/qapi-schema/doc-good.out b/tests/qapi-schema/doc-good.out new file mode 100644 index 0000000000..70c1252408 --- /dev/null +++ b/tests/qapi-schema/doc-good.out @@ -0,0 +1,148 @@ +object Base + member base1: Enum optional=False +enum Enum ['one', 'two'] +object Object + base Base + tag base1 + case one: Variant1 + case two: Variant2 +enum QType ['none', 'qnull', 'qint', 'qstring', 'qdict', 'qlist', 'qfloat', 'qbool'] + prefix QTYPE +object SugaredUnion + member type: SugaredUnionKind optional=False + tag type + case one: q_obj_Variant1-wrapper + case two: q_obj_Variant2-wrapper +enum SugaredUnionKind ['one', 'two'] +object Variant1 + member var1: str optional=False +object Variant2 +command cmd q_obj_cmd-arg -> Object + gen=True success_response=True boxed=False +command cmd-boxed Object -> None + gen=True success_response=True boxed=True +object q_empty +object q_obj_Variant1-wrapper + member data: Variant1 optional=False +object q_obj_Variant2-wrapper + member data: Variant2 optional=False +object q_obj_cmd-arg + member arg1: int optional=False + member arg2: str optional=True + member arg3: bool optional=False +doc freeform + body= += Section + +== Subsection + +*strong* _with emphasis_ +@var {in braces} +* List item one +- Two, multiple +lines + +3. Three +Still in list + +Not in list +- Second list +Note: still in list + +Note: not in list +1. Third list +is numbered + +- another item + +| example +| multiple lines + +Returns: the King +Since: the first age +Notes: + +1. Lorem ipsum dolor sit amet + +2. Ut enim ad minim veniam + +Duis aute irure dolor + +Example: + +-> in +<- out +Examples: +- *verbatim* +- {braces} +doc symbol=Enum + body= +== Produces *invalid* texinfo + arg=one +The _one_ {and only} + arg=two + + section= +@two is undocumented +doc symbol=Base + body= + + arg=base1 +the first member +doc symbol=Variant1 + body= +A paragraph + +Another paragraph (but no @var: line) + arg=var1 + +doc symbol=Variant2 + body= + +doc symbol=Object + body= + +doc symbol=SugaredUnion + body= + + arg=type + +doc freeform + body= +== Another subsection +doc symbol=cmd + body= + + arg=arg1 +the first argument + arg=arg2 +the second +argument + arg=arg3 + + section=Note +@arg3 is undocumented + section=Returns +@Object + section=TODO +frobnicate + section=Notes +- Lorem ipsum dolor sit amet +- Ut enim ad minim veniam + +Duis aute irure dolor + section=Example +-> in +<- out + section=Examples +- *verbatim* +- {braces} + section=Since +2.10 +doc symbol=cmd-boxed + body= +If you're bored enough to read this, go see a video of boxed cats + section=Example +-> in + +<- out diff --git a/tests/qapi-schema/doc-good.texi b/tests/qapi-schema/doc-good.texi new file mode 100644 index 0000000000..c410626e4a --- /dev/null +++ b/tests/qapi-schema/doc-good.texi @@ -0,0 +1,243 @@ +@section Section + +@subsection Subsection + +@strong{strong} @emph{with emphasis} +@code{var} @{in braces@} +@itemize @bullet +@item +List item one +@item +Two, multiple +lines + +@item +Three +Still in list + +@end itemize + +Not in list +@itemize @minus +@item +Second list +Note: still in list + +@end itemize + +Note: not in list +@enumerate +@item +Third list +is numbered + +@item +another item + +@example +example +@end example + +@example +multiple lines +@end example + + +@end enumerate + +Returns: the King +Since: the first age +Notes: + +@enumerate +@item +Lorem ipsum dolor sit amet + +@item +Ut enim ad minim veniam + +@end enumerate + +Duis aute irure dolor + +Example: + +-> in +<- out +Examples: +@itemize @minus +@item +@strong{verbatim} +@item +@{braces@} +@end itemize + + + +@deftp {Enum} Enum + +@subsection Produces @strong{invalid} texinfo + +@b{Values:} +@table @asis +@item @code{one} +The @emph{one} @{and only@} +@item @code{two} +Not documented +@end table +@code{two} is undocumented + +@end deftp + + + +@deftp {Object} Base + + + +@b{Members:} +@table @asis +@item @code{base1: Enum} +the first member +@end table + + +@end deftp + + + +@deftp {Object} Variant1 + +A paragraph + +Another paragraph (but no @code{var}: line) + +@b{Members:} +@table @asis +@item @code{var1: string} +Not documented +@end table + + +@end deftp + + + +@deftp {Object} Variant2 + + + + +@end deftp + + + +@deftp {Object} Object + + + +@b{Members:} +@table @asis +@item The members of @code{Base} +@item The members of @code{Variant1} when @code{base1} is @t{"one"} +@item The members of @code{Variant2} when @code{base1} is @t{"two"} +@end table + + +@end deftp + + + +@deftp {Object} SugaredUnion + + + +@b{Members:} +@table @asis +@item @code{type} +One of @t{"one"}, @t{"two"} +@item @code{data: Variant1} when @code{type} is @t{"one"} +@item @code{data: Variant2} when @code{type} is @t{"two"} +@end table + + +@end deftp + + +@subsection Another subsection + + +@deftypefn Command {} cmd + + + +@b{Arguments:} +@table @asis +@item @code{arg1: int} +the first argument +@item @code{arg2: string} (optional) +the second +argument +@item @code{arg3: boolean} +Not documented +@end table + + +@b{Note:} +@code{arg3} is undocumented + +@b{Returns:} +@code{Object} + +@b{TODO:} +frobnicate + +@b{Notes:} +@itemize @minus +@item +Lorem ipsum dolor sit amet +@item +Ut enim ad minim veniam + +@end itemize + +Duis aute irure dolor + +@b{Example:} +@example +-> in +<- out +@end example + + +@b{Examples:} +@example +- *verbatim* +- @{braces@} +@end example + + +@b{Since:} +2.10 + +@end deftypefn + + + +@deftypefn Command {} cmd-boxed + +If you're bored enough to read this, go see a video of boxed cats + +@b{Arguments:} the members of @code{Object} + +@b{Example:} +@example +-> in + +<- out +@end example + + +@end deftypefn + + diff --git a/tests/qapi-schema/test-qapi.py b/tests/qapi-schema/test-qapi.py index ef74e2c4c8..c7724d3437 100644 --- a/tests/qapi-schema/test-qapi.py +++ b/tests/qapi-schema/test-qapi.py @@ -55,3 +55,14 @@ class QAPISchemaTestVisitor(QAPISchemaVisitor): schema = QAPISchema(sys.argv[1]) schema.visit(QAPISchemaTestVisitor()) + +for doc in schema.docs: + if doc.symbol: + print 'doc symbol=%s' % doc.symbol + else: + print 'doc freeform' + print ' body=\n%s' % doc.body + for arg, section in doc.args.iteritems(): + print ' arg=%s\n%s' % (arg, section) + for section in doc.sections: + print ' section=%s\n%s' % (section.name, section) diff --git a/tests/qemu-iotests/017 b/tests/qemu-iotests/017 index e3f9e75967..4f9302db42 100755 --- a/tests/qemu-iotests/017 +++ b/tests/qemu-iotests/017 @@ -41,6 +41,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 # Any format supporting backing files _supported_fmt qcow qcow2 vmdk qed _supported_proto generic +_unsupported_proto vxhs _supported_os Linux _unsupported_imgopts "subformat=monolithicFlat" "subformat=twoGbMaxExtentFlat" diff --git a/tests/qemu-iotests/020 b/tests/qemu-iotests/020 index 9c4a68c977..7a111100ec 100755 --- a/tests/qemu-iotests/020 +++ b/tests/qemu-iotests/020 @@ -43,6 +43,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 # Any format supporting backing files _supported_fmt qcow qcow2 vmdk qed _supported_proto generic +_unsupported_proto vxhs _supported_os Linux _unsupported_imgopts "subformat=monolithicFlat" \ "subformat=twoGbMaxExtentFlat" \ diff --git a/tests/qemu-iotests/028 b/tests/qemu-iotests/028 index 7783e57c71..97a8869251 100755 --- a/tests/qemu-iotests/028 +++ b/tests/qemu-iotests/028 @@ -32,6 +32,7 @@ status=1 # failure is the default! _cleanup() { + _cleanup_qemu rm -f "${TEST_IMG}.copy" _cleanup_test_img } diff --git a/tests/qemu-iotests/029 b/tests/qemu-iotests/029 index e639ac0ddf..30bab24dc0 100755 --- a/tests/qemu-iotests/029 +++ b/tests/qemu-iotests/029 @@ -42,6 +42,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 # Any format supporting intenal snapshots _supported_fmt qcow2 _supported_proto generic +_unsupported_proto vxhs _supported_os Linux # Internal snapshots are (currently) impossible with refcount_bits=1 _unsupported_imgopts 'refcount_bits=1[^0-9]' diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 index bc6cf782fe..2f54986434 100755 --- a/tests/qemu-iotests/041 +++ b/tests/qemu-iotests/041 @@ -966,5 +966,51 @@ class TestRepairQuorum(iotests.QMPTestCase): # to check that this file is really driven by quorum self.vm.shutdown() +# Test mirroring with a source that does not have any parents (not even a +# BlockBackend) +class TestOrphanedSource(iotests.QMPTestCase): + def setUp(self): + blk0 = { 'node-name': 'src', + 'driver': 'null-co' } + + blk1 = { 'node-name': 'dest', + 'driver': 'null-co' } + + blk2 = { 'node-name': 'dest-ro', + 'driver': 'null-co', + 'read-only': 'on' } + + self.vm = iotests.VM() + self.vm.add_blockdev(self.qmp_to_opts(blk0)) + self.vm.add_blockdev(self.qmp_to_opts(blk1)) + self.vm.add_blockdev(self.qmp_to_opts(blk2)) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + + def test_no_job_id(self): + self.assert_no_active_block_jobs() + + result = self.vm.qmp('blockdev-mirror', device='src', sync='full', + target='dest') + self.assert_qmp(result, 'error/class', 'GenericError') + + def test_success(self): + self.assert_no_active_block_jobs() + + result = self.vm.qmp('blockdev-mirror', job_id='job', device='src', + sync='full', target='dest') + self.assert_qmp(result, 'return', {}) + + self.complete_and_wait('job') + + def test_failing_permissions(self): + self.assert_no_active_block_jobs() + + result = self.vm.qmp('blockdev-mirror', device='src', sync='full', + target='dest-ro') + self.assert_qmp(result, 'error/class', 'GenericError') + if __name__ == '__main__': iotests.main(supported_fmts=['qcow2', 'qed']) diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out index b67d0504a6..e30fd3b05b 100644 --- a/tests/qemu-iotests/041.out +++ b/tests/qemu-iotests/041.out @@ -1,5 +1,5 @@ -............................................................................ +............................................................................... ---------------------------------------------------------------------- -Ran 76 tests +Ran 79 tests OK diff --git a/tests/qemu-iotests/073 b/tests/qemu-iotests/073 index ad37a617b2..40f85b18b9 100755 --- a/tests/qemu-iotests/073 +++ b/tests/qemu-iotests/073 @@ -39,6 +39,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow2 _supported_proto generic +_unsupported_proto vxhs _supported_os Linux CLUSTER_SIZE=64k diff --git a/tests/qemu-iotests/094 b/tests/qemu-iotests/094 index 0ba0b0c361..9aa01e3627 100755 --- a/tests/qemu-iotests/094 +++ b/tests/qemu-iotests/094 @@ -27,7 +27,14 @@ echo "QA output created by $seq" here="$PWD" status=1 # failure is the default! -trap "exit \$status" 0 1 2 3 15 +_cleanup() +{ + _cleanup_qemu + _cleanup_test_img + rm -f "$TEST_DIR/source.$IMGFMT" +} + +trap "_cleanup; exit \$status" 0 1 2 3 15 # get standard environment, filters and checks . ./common.rc @@ -73,8 +80,6 @@ _send_qemu_cmd $QEMU_HANDLE \ wait=1 _cleanup_qemu -_cleanup_test_img -rm -f "$TEST_DIR/source.$IMGFMT" # success, all done echo '*** done' diff --git a/tests/qemu-iotests/097 b/tests/qemu-iotests/097 index 4c33e8038a..e22670c8d0 100755 --- a/tests/qemu-iotests/097 +++ b/tests/qemu-iotests/097 @@ -56,30 +56,25 @@ _supported_os Linux # 3: Two-layer backing chain, commit to lower backing file # (in this case, the top image will implicitly stay unchanged) # -# Each pass is run twice, since qcow2 has different code paths for cleaning -# an image depending on whether it has a snapshot. -# # 020 already tests committing, so this only tests whether image chains are # working properly and that all images above the base are emptied; therefore, # no complicated patterns are necessary. Check near the 2G mark, as qcow2 # has been buggy at that boundary in the past. for i in 0 1 2 3; do -for j in 0 1; do echo -echo "=== Test pass $i.$j ===" +echo "=== Test pass $i ===" echo -TEST_IMG="$TEST_IMG.base" _make_test_img 2100M -TEST_IMG="$TEST_IMG.itmd" _make_test_img -b "$TEST_IMG.base" 2100M -_make_test_img -b "$TEST_IMG.itmd" 2100M -if [ $j -eq 0 ]; then - $QEMU_IMG snapshot -c snap "$TEST_IMG" -fi +len=$((2100 * 1024 * 1024 + 512)) # larger than 2G, and not cluster aligned +TEST_IMG="$TEST_IMG.base" _make_test_img $len +TEST_IMG="$TEST_IMG.itmd" _make_test_img -b "$TEST_IMG.base" $len +_make_test_img -b "$TEST_IMG.itmd" $len -$QEMU_IO -c 'write -P 1 0x7ffd0000 192k' "$TEST_IMG.base" | _filter_qemu_io -$QEMU_IO -c 'write -P 2 0x7ffe0000 128k' "$TEST_IMG.itmd" | _filter_qemu_io -$QEMU_IO -c 'write -P 3 0x7fff0000 64k' "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "write -P 1 0x7ffd0000 192k" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "write -P 2 0x7ffe0000 128k" "$TEST_IMG.itmd" | _filter_qemu_io +$QEMU_IO -c "write -P 3 0x7fff0000 64k" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "write -P 4 $(($len - 512)) 512" "$TEST_IMG" | _filter_qemu_io if [ $i -lt 3 ]; then if [ $i == 0 ]; then @@ -97,11 +92,13 @@ if [ $i -lt 3 ]; then # Bottom should be unchanged $QEMU_IO -c 'read -P 1 0x7ffd0000 192k' "$TEST_IMG.base" | _filter_qemu_io + $QEMU_IO -c "read -P 0 $((len - 512)) 512" "$TEST_IMG.base" | _filter_qemu_io # Intermediate should contain changes from top $QEMU_IO -c 'read -P 1 0x7ffd0000 64k' "$TEST_IMG.itmd" | _filter_qemu_io $QEMU_IO -c 'read -P 2 0x7ffe0000 64k' "$TEST_IMG.itmd" | _filter_qemu_io $QEMU_IO -c 'read -P 3 0x7fff0000 64k' "$TEST_IMG.itmd" | _filter_qemu_io + $QEMU_IO -c "read -P 4 $((len - 512)) 512" "$TEST_IMG.itmd" | _filter_qemu_io # And in pass 0, the top image should be empty, whereas in both other passes # it should be unchanged (which is both checked by qemu-img map) @@ -112,6 +109,7 @@ else $QEMU_IO -c 'read -P 1 0x7ffd0000 64k' "$TEST_IMG.base" | _filter_qemu_io $QEMU_IO -c 'read -P 2 0x7ffe0000 64k' "$TEST_IMG.base" | _filter_qemu_io $QEMU_IO -c 'read -P 3 0x7fff0000 64k' "$TEST_IMG.base" | _filter_qemu_io + $QEMU_IO -c "read -P 4 $((len - 512)) 512" "$TEST_IMG.base" | _filter_qemu_io # Both top and intermediate should be unchanged fi @@ -121,7 +119,6 @@ $QEMU_IMG map "$TEST_IMG.itmd" | _filter_qemu_img_map $QEMU_IMG map "$TEST_IMG" | _filter_qemu_img_map done -done # success, all done diff --git a/tests/qemu-iotests/097.out b/tests/qemu-iotests/097.out index 8106cc9275..f6705a1cc7 100644 --- a/tests/qemu-iotests/097.out +++ b/tests/qemu-iotests/097.out @@ -1,222 +1,131 @@ QA output created by 097 -=== Test pass 0.0 === +=== Test pass 0 === -Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202009600 -Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.base -Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.itmd +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202010112 +Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.base +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.itmd wrote 196608/196608 bytes at offset 2147287040 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) wrote 131072/131072 bytes at offset 2147352576 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) wrote 65536/65536 bytes at offset 2147418112 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 2202009600 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) Image committed. read 196608/196608 bytes at offset 2147287040 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 2202009600 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 65536/65536 bytes at offset 2147287040 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 65536/65536 bytes at offset 2147352576 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 65536/65536 bytes at offset 2147418112 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 2202009600 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) Offset Length File 0x7ffd0000 0x30000 TEST_DIR/t.IMGFMT.base Offset Length File 0x7ffd0000 0x10000 TEST_DIR/t.IMGFMT.base 0x7ffe0000 0x20000 TEST_DIR/t.IMGFMT.itmd +0x83400000 0x200 TEST_DIR/t.IMGFMT.itmd Offset Length File 0x7ffd0000 0x10000 TEST_DIR/t.IMGFMT.base 0x7ffe0000 0x20000 TEST_DIR/t.IMGFMT.itmd +0x83400000 0x200 TEST_DIR/t.IMGFMT.itmd -=== Test pass 0.1 === +=== Test pass 1 === -Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202009600 -Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.base -Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.itmd -wrote 196608/196608 bytes at offset 2147287040 -192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -wrote 131072/131072 bytes at offset 2147352576 -128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -wrote 65536/65536 bytes at offset 2147418112 -64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -Image committed. -read 196608/196608 bytes at offset 2147287040 -192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -read 65536/65536 bytes at offset 2147287040 -64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -read 65536/65536 bytes at offset 2147352576 -64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -read 65536/65536 bytes at offset 2147418112 -64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -Offset Length File -0x7ffd0000 0x30000 TEST_DIR/t.IMGFMT.base -Offset Length File -0x7ffd0000 0x10000 TEST_DIR/t.IMGFMT.base -0x7ffe0000 0x20000 TEST_DIR/t.IMGFMT.itmd -Offset Length File -0x7ffd0000 0x10000 TEST_DIR/t.IMGFMT.base -0x7ffe0000 0x20000 TEST_DIR/t.IMGFMT.itmd - -=== Test pass 1.0 === - -Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202009600 -Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.base -Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.itmd -wrote 196608/196608 bytes at offset 2147287040 -192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -wrote 131072/131072 bytes at offset 2147352576 -128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -wrote 65536/65536 bytes at offset 2147418112 -64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -Image committed. -read 196608/196608 bytes at offset 2147287040 -192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -read 65536/65536 bytes at offset 2147287040 -64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -read 65536/65536 bytes at offset 2147352576 -64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -read 65536/65536 bytes at offset 2147418112 -64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -Offset Length File -0x7ffd0000 0x30000 TEST_DIR/t.IMGFMT.base -Offset Length File -0x7ffd0000 0x10000 TEST_DIR/t.IMGFMT.base -0x7ffe0000 0x20000 TEST_DIR/t.IMGFMT.itmd -Offset Length File -0x7ffd0000 0x10000 TEST_DIR/t.IMGFMT.base -0x7ffe0000 0x10000 TEST_DIR/t.IMGFMT.itmd -0x7fff0000 0x10000 TEST_DIR/t.IMGFMT - -=== Test pass 1.1 === - -Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202009600 -Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.base -Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.itmd +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202010112 +Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.base +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.itmd wrote 196608/196608 bytes at offset 2147287040 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) wrote 131072/131072 bytes at offset 2147352576 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) wrote 65536/65536 bytes at offset 2147418112 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 2202009600 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) Image committed. read 196608/196608 bytes at offset 2147287040 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 2202009600 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 65536/65536 bytes at offset 2147287040 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 65536/65536 bytes at offset 2147352576 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 65536/65536 bytes at offset 2147418112 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 2202009600 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) Offset Length File 0x7ffd0000 0x30000 TEST_DIR/t.IMGFMT.base Offset Length File 0x7ffd0000 0x10000 TEST_DIR/t.IMGFMT.base 0x7ffe0000 0x20000 TEST_DIR/t.IMGFMT.itmd +0x83400000 0x200 TEST_DIR/t.IMGFMT.itmd Offset Length File 0x7ffd0000 0x10000 TEST_DIR/t.IMGFMT.base 0x7ffe0000 0x10000 TEST_DIR/t.IMGFMT.itmd 0x7fff0000 0x10000 TEST_DIR/t.IMGFMT +0x83400000 0x200 TEST_DIR/t.IMGFMT -=== Test pass 2.0 === +=== Test pass 2 === -Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202009600 -Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.base -Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.itmd +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202010112 +Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.base +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.itmd wrote 196608/196608 bytes at offset 2147287040 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) wrote 131072/131072 bytes at offset 2147352576 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) wrote 65536/65536 bytes at offset 2147418112 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 2202009600 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) Image committed. read 196608/196608 bytes at offset 2147287040 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 2202009600 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 65536/65536 bytes at offset 2147287040 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 65536/65536 bytes at offset 2147352576 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 65536/65536 bytes at offset 2147418112 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 2202009600 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) Offset Length File 0x7ffd0000 0x30000 TEST_DIR/t.IMGFMT.base Offset Length File 0x7ffd0000 0x10000 TEST_DIR/t.IMGFMT.base 0x7ffe0000 0x20000 TEST_DIR/t.IMGFMT.itmd +0x83400000 0x200 TEST_DIR/t.IMGFMT.itmd Offset Length File 0x7ffd0000 0x10000 TEST_DIR/t.IMGFMT.base 0x7ffe0000 0x10000 TEST_DIR/t.IMGFMT.itmd 0x7fff0000 0x10000 TEST_DIR/t.IMGFMT +0x83400000 0x200 TEST_DIR/t.IMGFMT -=== Test pass 2.1 === - -Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202009600 -Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.base -Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.itmd -wrote 196608/196608 bytes at offset 2147287040 -192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -wrote 131072/131072 bytes at offset 2147352576 -128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -wrote 65536/65536 bytes at offset 2147418112 -64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -Image committed. -read 196608/196608 bytes at offset 2147287040 -192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -read 65536/65536 bytes at offset 2147287040 -64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -read 65536/65536 bytes at offset 2147352576 -64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -read 65536/65536 bytes at offset 2147418112 -64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -Offset Length File -0x7ffd0000 0x30000 TEST_DIR/t.IMGFMT.base -Offset Length File -0x7ffd0000 0x10000 TEST_DIR/t.IMGFMT.base -0x7ffe0000 0x20000 TEST_DIR/t.IMGFMT.itmd -Offset Length File -0x7ffd0000 0x10000 TEST_DIR/t.IMGFMT.base -0x7ffe0000 0x10000 TEST_DIR/t.IMGFMT.itmd -0x7fff0000 0x10000 TEST_DIR/t.IMGFMT - -=== Test pass 3.0 === - -Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202009600 -Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.base -Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.itmd -wrote 196608/196608 bytes at offset 2147287040 -192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -wrote 131072/131072 bytes at offset 2147352576 -128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -wrote 65536/65536 bytes at offset 2147418112 -64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -Image committed. -read 65536/65536 bytes at offset 2147287040 -64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -read 65536/65536 bytes at offset 2147352576 -64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -read 65536/65536 bytes at offset 2147418112 -64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -Offset Length File -0x7ffd0000 0x30000 TEST_DIR/t.IMGFMT.base -Offset Length File -0x7ffd0000 0x10000 TEST_DIR/t.IMGFMT.base -0x7ffe0000 0x20000 TEST_DIR/t.IMGFMT.itmd -Offset Length File -0x7ffd0000 0x10000 TEST_DIR/t.IMGFMT.base -0x7ffe0000 0x10000 TEST_DIR/t.IMGFMT.itmd -0x7fff0000 0x10000 TEST_DIR/t.IMGFMT - -=== Test pass 3.1 === +=== Test pass 3 === -Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202009600 -Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.base -Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.itmd +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202010112 +Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.base +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.itmd wrote 196608/196608 bytes at offset 2147287040 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) wrote 131072/131072 bytes at offset 2147352576 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) wrote 65536/65536 bytes at offset 2147418112 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 2202009600 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) Image committed. read 65536/65536 bytes at offset 2147287040 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) @@ -224,13 +133,18 @@ read 65536/65536 bytes at offset 2147352576 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 65536/65536 bytes at offset 2147418112 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 2202009600 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) Offset Length File 0x7ffd0000 0x30000 TEST_DIR/t.IMGFMT.base +0x83400000 0x200 TEST_DIR/t.IMGFMT.base Offset Length File 0x7ffd0000 0x10000 TEST_DIR/t.IMGFMT.base 0x7ffe0000 0x20000 TEST_DIR/t.IMGFMT.itmd +0x83400000 0x200 TEST_DIR/t.IMGFMT.base Offset Length File 0x7ffd0000 0x10000 TEST_DIR/t.IMGFMT.base 0x7ffe0000 0x10000 TEST_DIR/t.IMGFMT.itmd 0x7fff0000 0x10000 TEST_DIR/t.IMGFMT +0x83400000 0x200 TEST_DIR/t.IMGFMT *** done diff --git a/tests/qemu-iotests/102 b/tests/qemu-iotests/102 index 64b4af9441..87db1bb1bf 100755 --- a/tests/qemu-iotests/102 +++ b/tests/qemu-iotests/102 @@ -25,11 +25,12 @@ seq=$(basename $0) echo "QA output created by $seq" here=$PWD -status=1 # failure is the default! +status=1 # failure is the default! _cleanup() { - _cleanup_test_img + _cleanup_qemu + _cleanup_test_img } trap "_cleanup; exit \$status" 0 1 2 3 15 diff --git a/tests/qemu-iotests/109 b/tests/qemu-iotests/109 index 927151a285..6161633a52 100755 --- a/tests/qemu-iotests/109 +++ b/tests/qemu-iotests/109 @@ -29,6 +29,7 @@ status=1 # failure is the default! _cleanup() { + _cleanup_qemu rm -f $TEST_IMG.src _cleanup_test_img } diff --git a/tests/qemu-iotests/109.out b/tests/qemu-iotests/109.out index e5d70d75f1..55fe536d56 100644 --- a/tests/qemu-iotests/109.out +++ b/tests/qemu-iotests/109.out @@ -10,7 +10,7 @@ Automatically detecting the format is dangerous for raw images, write operations Specify the 'raw' format explicitly to remove the restrictions. {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_ERROR", "data": {"device": "src", "operation": "write", "action": "report"}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 1024, "offset": 0, "speed": 0, "type": "mirror", "error": "Operation not permitted"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 65536, "offset": 0, "speed": 0, "type": "mirror", "error": "Operation not permitted"}} {"return": []} read 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) @@ -73,7 +73,7 @@ Automatically detecting the format is dangerous for raw images, write operations Specify the 'raw' format explicitly to remove the restrictions. {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_ERROR", "data": {"device": "src", "operation": "write", "action": "report"}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 1024, "offset": 0, "speed": 0, "type": "mirror", "error": "Operation not permitted"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 65536, "offset": 0, "speed": 0, "type": "mirror", "error": "Operation not permitted"}} {"return": []} read 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) @@ -115,7 +115,7 @@ Automatically detecting the format is dangerous for raw images, write operations Specify the 'raw' format explicitly to remove the restrictions. {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_ERROR", "data": {"device": "src", "operation": "write", "action": "report"}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 2560, "offset": 0, "speed": 0, "type": "mirror", "error": "Operation not permitted"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 65536, "offset": 0, "speed": 0, "type": "mirror", "error": "Operation not permitted"}} {"return": []} read 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) @@ -135,7 +135,7 @@ Automatically detecting the format is dangerous for raw images, write operations Specify the 'raw' format explicitly to remove the restrictions. {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_ERROR", "data": {"device": "src", "operation": "write", "action": "report"}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 2560, "offset": OFFSET, "speed": 0, "type": "mirror", "error": "Operation not permitted"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 65536, "offset": OFFSET, "speed": 0, "type": "mirror", "error": "Operation not permitted"}} {"return": []} read 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) @@ -195,7 +195,7 @@ Automatically detecting the format is dangerous for raw images, write operations Specify the 'raw' format explicitly to remove the restrictions. {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_ERROR", "data": {"device": "src", "operation": "write", "action": "report"}} -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 2048, "offset": OFFSET, "speed": 0, "type": "mirror", "error": "Operation not permitted"}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 65536, "offset": OFFSET, "speed": 0, "type": "mirror", "error": "Operation not permitted"}} {"return": []} read 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) diff --git a/tests/qemu-iotests/114 b/tests/qemu-iotests/114 index f110d4f65a..5b7dc5496c 100755 --- a/tests/qemu-iotests/114 +++ b/tests/qemu-iotests/114 @@ -39,6 +39,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow2 _supported_proto generic +_unsupported_proto vxhs _supported_os Linux diff --git a/tests/qemu-iotests/117 b/tests/qemu-iotests/117 index e955d52de3..6c83461182 100755 --- a/tests/qemu-iotests/117 +++ b/tests/qemu-iotests/117 @@ -29,6 +29,7 @@ status=1 # failure is the default! _cleanup() { + _cleanup_qemu _cleanup_test_img } trap "_cleanup; exit \$status" 0 1 2 3 15 diff --git a/tests/qemu-iotests/130 b/tests/qemu-iotests/130 index ecc8a5ba1b..e7e43de6d6 100755 --- a/tests/qemu-iotests/130 +++ b/tests/qemu-iotests/130 @@ -31,6 +31,7 @@ status=1 # failure is the default! _cleanup() { + _cleanup_qemu _cleanup_test_img } trap "_cleanup; exit \$status" 0 1 2 3 15 @@ -42,6 +43,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow2 _supported_proto generic +_unsupported_proto vxhs _supported_os Linux qemu_comm_method="monitor" diff --git a/tests/qemu-iotests/134 b/tests/qemu-iotests/134 index af618b8817..acce946e75 100755 --- a/tests/qemu-iotests/134 +++ b/tests/qemu-iotests/134 @@ -39,6 +39,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow2 _supported_proto generic +_unsupported_proto vxhs _supported_os Linux diff --git a/tests/qemu-iotests/139 b/tests/qemu-iotests/139 index 6d98e4f879..175d8f0008 100644 --- a/tests/qemu-iotests/139 +++ b/tests/qemu-iotests/139 @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Test cases for the QMP 'x-blockdev-del' command +# Test cases for the QMP 'blockdev-del' command # # Copyright (C) 2015 Igalia, S.L. # Author: Alberto Garcia <berto@igalia.com> @@ -79,7 +79,7 @@ class TestBlockdevDel(iotests.QMPTestCase): # Delete a BlockDriverState def delBlockDriverState(self, node, expect_error = False): self.checkBlockDriverState(node) - result = self.vm.qmp('x-blockdev-del', node_name = node) + result = self.vm.qmp('blockdev-del', node_name = node) if expect_error: self.assert_qmp(result, 'error/class', 'GenericError') else: @@ -173,7 +173,7 @@ class TestBlockdevDel(iotests.QMPTestCase): self.wait_until_completed(id) # Add a BlkDebug node - # Note that the purpose of this is to test the x-blockdev-del + # Note that the purpose of this is to test the blockdev-del # sanity checks, not to create a usable blkdebug drive def addBlkDebug(self, debug, node): self.checkBlockDriverState(node, False) @@ -191,7 +191,7 @@ class TestBlockdevDel(iotests.QMPTestCase): self.checkBlockDriverState(debug) # Add a BlkVerify node - # Note that the purpose of this is to test the x-blockdev-del + # Note that the purpose of this is to test the blockdev-del # sanity checks, not to create a usable blkverify drive def addBlkVerify(self, blkverify, test, raw): self.checkBlockDriverState(test, False) diff --git a/tests/qemu-iotests/140 b/tests/qemu-iotests/140 index 49f9df4eb0..8c80a5a866 100755 --- a/tests/qemu-iotests/140 +++ b/tests/qemu-iotests/140 @@ -33,6 +33,7 @@ status=1 # failure is the default! _cleanup() { + _cleanup_qemu _cleanup_test_img rm -f "$TEST_DIR/nbd" } diff --git a/tests/qemu-iotests/141 b/tests/qemu-iotests/141 index 6d8f0a1a84..40a3405968 100755 --- a/tests/qemu-iotests/141 +++ b/tests/qemu-iotests/141 @@ -29,6 +29,7 @@ status=1 # failure is the default! _cleanup() { + _cleanup_qemu _cleanup_test_img rm -f "$TEST_DIR/{b,m,o}.$IMGFMT" } @@ -65,7 +66,7 @@ test_blockjob() # We want this to return an error because the block job is still running _send_qemu_cmd $QEMU_HANDLE \ - "{'execute': 'x-blockdev-del', + "{'execute': 'blockdev-del', 'arguments': {'node-name': 'drv0'}}" \ 'error' | _filter_generated_node_ids @@ -75,7 +76,7 @@ test_blockjob() "$3" _send_qemu_cmd $QEMU_HANDLE \ - "{'execute': 'x-blockdev-del', + "{'execute': 'blockdev-del', 'arguments': {'node-name': 'drv0'}}" \ 'return' } diff --git a/tests/qemu-iotests/143 b/tests/qemu-iotests/143 index ec4ef2221a..5ff1944507 100755 --- a/tests/qemu-iotests/143 +++ b/tests/qemu-iotests/143 @@ -29,6 +29,7 @@ status=1 # failure is the default! _cleanup() { + _cleanup_qemu rm -f "$TEST_DIR/nbd" } trap "_cleanup; exit \$status" 0 1 2 3 15 diff --git a/tests/qemu-iotests/147 b/tests/qemu-iotests/147 index 45469c911e..32afea63d4 100755 --- a/tests/qemu-iotests/147 +++ b/tests/qemu-iotests/147 @@ -30,6 +30,13 @@ NBD_PORT = 10811 test_img = os.path.join(iotests.test_dir, 'test.img') unix_socket = os.path.join(iotests.test_dir, 'nbd.socket') + +def flatten_sock_addr(crumpled_address): + result = { 'type': crumpled_address['type'] } + result.update(crumpled_address['data']) + return result + + class NBDBlockdevAddBase(iotests.QMPTestCase): def blockdev_add_options(self, address, export=None): options = { 'node-name': 'nbd-blockdev', @@ -57,7 +64,7 @@ class NBDBlockdevAddBase(iotests.QMPTestCase): filename) break - result = self.vm.qmp('x-blockdev-del', node_name='nbd-blockdev') + result = self.vm.qmp('blockdev-del', node_name='nbd-blockdev') self.assert_qmp(result, 'return', {}) @@ -85,13 +92,15 @@ class QemuNBD(NBDBlockdevAddBase): 'host': 'localhost', 'port': str(NBD_PORT) } } - self.client_test('nbd://localhost:%i' % NBD_PORT, address) + self.client_test('nbd://localhost:%i' % NBD_PORT, + flatten_sock_addr(address)) def test_unix(self): self._server_up('-k', unix_socket) address = { 'type': 'unix', 'data': { 'path': unix_socket } } - self.client_test('nbd+unix://?socket=' + unix_socket, address) + self.client_test('nbd+unix://?socket=' + unix_socket, + flatten_sock_addr(address)) class BuiltinNBD(NBDBlockdevAddBase): @@ -134,7 +143,7 @@ class BuiltinNBD(NBDBlockdevAddBase): } } self._server_up(address) self.client_test('nbd://localhost:%i/nbd-export' % NBD_PORT, - address, 'nbd-export') + flatten_sock_addr(address), 'nbd-export') self._server_down() def test_inet6(self): @@ -149,10 +158,10 @@ class BuiltinNBD(NBDBlockdevAddBase): 'file': { 'driver': 'nbd', 'export': 'nbd-export', - 'server': address + 'server': flatten_sock_addr(address) } } self._server_up(address) - self.client_test(filename, address, 'nbd-export') + self.client_test(filename, flatten_sock_addr(address), 'nbd-export') self._server_down() def test_unix(self): @@ -160,7 +169,7 @@ class BuiltinNBD(NBDBlockdevAddBase): 'data': { 'path': unix_socket } } self._server_up(address) self.client_test('nbd+unix:///nbd-export?socket=' + unix_socket, - address, 'nbd-export') + flatten_sock_addr(address), 'nbd-export') self._server_down() def test_fd(self): @@ -182,9 +191,9 @@ class BuiltinNBD(NBDBlockdevAddBase): 'file': { 'driver': 'nbd', 'export': 'nbd-export', - 'server': address + 'server': flatten_sock_addr(address) } } - self.client_test(filename, address, 'nbd-export') + self.client_test(filename, flatten_sock_addr(address), 'nbd-export') self._server_down() diff --git a/tests/qemu-iotests/156 b/tests/qemu-iotests/156 index cc95ff1f98..d799b73e1e 100755 --- a/tests/qemu-iotests/156 +++ b/tests/qemu-iotests/156 @@ -37,6 +37,7 @@ status=1 # failure is the default! _cleanup() { + _cleanup_qemu rm -f "$TEST_IMG{,.target}{,.backing,.overlay}" } trap "_cleanup; exit \$status" 0 1 2 3 15 @@ -48,6 +49,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow2 qed _supported_proto generic +_unsupported_proto vxhs _supported_os Linux # Create source disk diff --git a/tests/qemu-iotests/158 b/tests/qemu-iotests/158 index a6cdd6d8cf..ef8d70f109 100755 --- a/tests/qemu-iotests/158 +++ b/tests/qemu-iotests/158 @@ -39,6 +39,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow2 _supported_proto generic +_unsupported_proto vxhs _supported_os Linux diff --git a/tests/qemu-iotests/176 b/tests/qemu-iotests/176 new file mode 100755 index 0000000000..950b28720e --- /dev/null +++ b/tests/qemu-iotests/176 @@ -0,0 +1,131 @@ +#!/bin/bash +# +# Commit changes into backing chains and empty the top image if the +# backing image is not explicitly specified. +# +# Variant of 097, which includes snapshots to test different codepath +# in qcow2 +# +# Copyright (C) 2014 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=mreitz@redhat.com + +seq="$(basename $0)" +echo "QA output created by $seq" + +here="$PWD" +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img + _rm_test_img "$TEST_IMG.itmd" +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter +. ./common.pattern + +# Any format supporting backing files and bdrv_make_empty +_supported_fmt qcow2 +_supported_proto file +_supported_os Linux + + +# Four passes: +# 0: Two-layer backing chain, commit to upper backing file (implicitly) +# (in this case, the top image will be emptied) +# 1: Two-layer backing chain, commit to upper backing file (explicitly) +# (in this case, the top image will implicitly stay unchanged) +# 2: Two-layer backing chain, commit to upper backing file (implicitly with -d) +# (in this case, the top image will explicitly stay unchanged) +# 3: Two-layer backing chain, commit to lower backing file +# (in this case, the top image will implicitly stay unchanged) +# +# 020 already tests committing, so this only tests whether image chains are +# working properly and that all images above the base are emptied; therefore, +# no complicated patterns are necessary. Check near the 2G mark, as qcow2 +# has been buggy at that boundary in the past. +for i in 0 1 2 3; do + +echo +echo "=== Test pass $i ===" +echo + +len=$((2100 * 1024 * 1024 + 512)) # larger than 2G, and not cluster aligned +TEST_IMG="$TEST_IMG.base" _make_test_img $len +TEST_IMG="$TEST_IMG.itmd" _make_test_img -b "$TEST_IMG.base" $len +_make_test_img -b "$TEST_IMG.itmd" $len +$QEMU_IMG snapshot -c snap "$TEST_IMG" + +$QEMU_IO -c "write -P 1 0x7ffd0000 192k" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "write -P 2 0x7ffe0000 128k" "$TEST_IMG.itmd" | _filter_qemu_io +$QEMU_IO -c "write -P 3 0x7fff0000 64k" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "write -P 4 $(($len - 512)) 512" "$TEST_IMG" | _filter_qemu_io + +if [ $i -lt 3 ]; then + if [ $i == 0 ]; then + # -b "$TEST_IMG.itmd" should be the default (that is, committing to the + # first backing file in the chain) + $QEMU_IMG commit "$TEST_IMG" + elif [ $i == 1 ]; then + # explicitly specify the commit target (this should imply -d) + $QEMU_IMG commit -b "$TEST_IMG.itmd" "$TEST_IMG" + else + # do not explicitly specify the commit target, but use -d to leave the + # top image unchanged + $QEMU_IMG commit -d "$TEST_IMG" + fi + + # Bottom should be unchanged + $QEMU_IO -c 'read -P 1 0x7ffd0000 192k' "$TEST_IMG.base" | _filter_qemu_io + $QEMU_IO -c "read -P 0 $((len - 512)) 512" "$TEST_IMG.base" | _filter_qemu_io + + # Intermediate should contain changes from top + $QEMU_IO -c 'read -P 1 0x7ffd0000 64k' "$TEST_IMG.itmd" | _filter_qemu_io + $QEMU_IO -c 'read -P 2 0x7ffe0000 64k' "$TEST_IMG.itmd" | _filter_qemu_io + $QEMU_IO -c 'read -P 3 0x7fff0000 64k' "$TEST_IMG.itmd" | _filter_qemu_io + $QEMU_IO -c "read -P 4 $((len - 512)) 512" "$TEST_IMG.itmd" | _filter_qemu_io + + # And in pass 0, the top image should be empty, whereas in both other passes + # it should be unchanged (which is both checked by qemu-img map) +else + $QEMU_IMG commit -b "$TEST_IMG.base" "$TEST_IMG" + + # Bottom should contain all changes + $QEMU_IO -c 'read -P 1 0x7ffd0000 64k' "$TEST_IMG.base" | _filter_qemu_io + $QEMU_IO -c 'read -P 2 0x7ffe0000 64k' "$TEST_IMG.base" | _filter_qemu_io + $QEMU_IO -c 'read -P 3 0x7fff0000 64k' "$TEST_IMG.base" | _filter_qemu_io + $QEMU_IO -c "read -P 4 $((len - 512)) 512" "$TEST_IMG.base" | _filter_qemu_io + + # Both top and intermediate should be unchanged +fi + +$QEMU_IMG map "$TEST_IMG.base" | _filter_qemu_img_map +$QEMU_IMG map "$TEST_IMG.itmd" | _filter_qemu_img_map +$QEMU_IMG map "$TEST_IMG" | _filter_qemu_img_map + +done + + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/176.out b/tests/qemu-iotests/176.out new file mode 100644 index 0000000000..6271fa7d6f --- /dev/null +++ b/tests/qemu-iotests/176.out @@ -0,0 +1,150 @@ +QA output created by 176 + +=== Test pass 0 === + +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202010112 +Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.base +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.itmd +wrote 196608/196608 bytes at offset 2147287040 +192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 131072/131072 bytes at offset 2147352576 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 2147418112 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 2202009600 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Image committed. +read 196608/196608 bytes at offset 2147287040 +192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 2202009600 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 65536/65536 bytes at offset 2147287040 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 65536/65536 bytes at offset 2147352576 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 65536/65536 bytes at offset 2147418112 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 2202009600 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Offset Length File +0x7ffd0000 0x30000 TEST_DIR/t.IMGFMT.base +Offset Length File +0x7ffd0000 0x10000 TEST_DIR/t.IMGFMT.base +0x7ffe0000 0x20000 TEST_DIR/t.IMGFMT.itmd +0x83400000 0x200 TEST_DIR/t.IMGFMT.itmd +Offset Length File +0x7ffd0000 0x10000 TEST_DIR/t.IMGFMT.base +0x7ffe0000 0x20000 TEST_DIR/t.IMGFMT.itmd +0x83400000 0x200 TEST_DIR/t.IMGFMT.itmd + +=== Test pass 1 === + +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202010112 +Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.base +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.itmd +wrote 196608/196608 bytes at offset 2147287040 +192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 131072/131072 bytes at offset 2147352576 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 2147418112 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 2202009600 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Image committed. +read 196608/196608 bytes at offset 2147287040 +192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 2202009600 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 65536/65536 bytes at offset 2147287040 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 65536/65536 bytes at offset 2147352576 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 65536/65536 bytes at offset 2147418112 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 2202009600 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Offset Length File +0x7ffd0000 0x30000 TEST_DIR/t.IMGFMT.base +Offset Length File +0x7ffd0000 0x10000 TEST_DIR/t.IMGFMT.base +0x7ffe0000 0x20000 TEST_DIR/t.IMGFMT.itmd +0x83400000 0x200 TEST_DIR/t.IMGFMT.itmd +Offset Length File +0x7ffd0000 0x10000 TEST_DIR/t.IMGFMT.base +0x7ffe0000 0x10000 TEST_DIR/t.IMGFMT.itmd +0x7fff0000 0x10000 TEST_DIR/t.IMGFMT +0x83400000 0x200 TEST_DIR/t.IMGFMT + +=== Test pass 2 === + +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202010112 +Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.base +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.itmd +wrote 196608/196608 bytes at offset 2147287040 +192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 131072/131072 bytes at offset 2147352576 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 2147418112 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 2202009600 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Image committed. +read 196608/196608 bytes at offset 2147287040 +192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 2202009600 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 65536/65536 bytes at offset 2147287040 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 65536/65536 bytes at offset 2147352576 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 65536/65536 bytes at offset 2147418112 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 2202009600 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Offset Length File +0x7ffd0000 0x30000 TEST_DIR/t.IMGFMT.base +Offset Length File +0x7ffd0000 0x10000 TEST_DIR/t.IMGFMT.base +0x7ffe0000 0x20000 TEST_DIR/t.IMGFMT.itmd +0x83400000 0x200 TEST_DIR/t.IMGFMT.itmd +Offset Length File +0x7ffd0000 0x10000 TEST_DIR/t.IMGFMT.base +0x7ffe0000 0x10000 TEST_DIR/t.IMGFMT.itmd +0x7fff0000 0x10000 TEST_DIR/t.IMGFMT +0x83400000 0x200 TEST_DIR/t.IMGFMT + +=== Test pass 3 === + +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202010112 +Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.base +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.itmd +wrote 196608/196608 bytes at offset 2147287040 +192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 131072/131072 bytes at offset 2147352576 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset 2147418112 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 2202009600 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Image committed. +read 65536/65536 bytes at offset 2147287040 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 65536/65536 bytes at offset 2147352576 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 65536/65536 bytes at offset 2147418112 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 2202009600 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Offset Length File +0x7ffd0000 0x30000 TEST_DIR/t.IMGFMT.base +0x83400000 0x200 TEST_DIR/t.IMGFMT.base +Offset Length File +0x7ffd0000 0x10000 TEST_DIR/t.IMGFMT.base +0x7ffe0000 0x20000 TEST_DIR/t.IMGFMT.itmd +0x83400000 0x200 TEST_DIR/t.IMGFMT.base +Offset Length File +0x7ffd0000 0x10000 TEST_DIR/t.IMGFMT.base +0x7ffe0000 0x10000 TEST_DIR/t.IMGFMT.itmd +0x7fff0000 0x10000 TEST_DIR/t.IMGFMT +0x83400000 0x200 TEST_DIR/t.IMGFMT +*** done diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common index 4d5650d7c8..9c6f9721e5 100644 --- a/tests/qemu-iotests/common +++ b/tests/qemu-iotests/common @@ -157,6 +157,7 @@ check options -ssh test ssh -nfs test nfs -luks test luks + -vxhs test vxhs -xdiff graphical mode diff -nocache use O_DIRECT on backing file -misalign misalign memory allocations @@ -260,6 +261,11 @@ testlist options xpand=false ;; + -vxhs) + IMGPROTO=vxhs + xpand=false + ;; + -ssh) IMGPROTO=ssh xpand=false diff --git a/tests/qemu-iotests/common.config b/tests/qemu-iotests/common.config index 55527aac87..c4b51b3509 100644 --- a/tests/qemu-iotests/common.config +++ b/tests/qemu-iotests/common.config @@ -105,6 +105,10 @@ if [ -z "$QEMU_NBD_PROG" ]; then export QEMU_NBD_PROG="`set_prog_path qemu-nbd`" fi +if [ -z "$QEMU_VXHS_PROG" ]; then + export QEMU_VXHS_PROG="`set_prog_path qnio_server`" +fi + _qemu_wrapper() { ( @@ -156,10 +160,19 @@ _qemu_nbd_wrapper() ) } +_qemu_vxhs_wrapper() +{ + ( + echo $BASHPID > "${TEST_DIR}/qemu-vxhs.pid" + exec "$QEMU_VXHS_PROG" $QEMU_VXHS_OPTIONS "$@" + ) +} + export QEMU=_qemu_wrapper export QEMU_IMG=_qemu_img_wrapper export QEMU_IO=_qemu_io_wrapper export QEMU_NBD=_qemu_nbd_wrapper +export QEMU_VXHS=_qemu_vxhs_wrapper QEMU_IMG_EXTRA_ARGS= if [ "$IMGOPTSSYNTAX" = "true" ]; then diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter index 104001358b..c9a2d5c595 100644 --- a/tests/qemu-iotests/common.filter +++ b/tests/qemu-iotests/common.filter @@ -122,6 +122,7 @@ _filter_img_info() -e "s#$TEST_DIR#TEST_DIR#g" \ -e "s#$IMGFMT#IMGFMT#g" \ -e 's#nbd://127.0.0.1:10810$#TEST_DIR/t.IMGFMT#g' \ + -e 's#json.*vdisk-id.*vxhs"}}#TEST_DIR/t.IMGFMT#' \ -e "/encrypted: yes/d" \ -e "/cluster_size: [0-9]\\+/d" \ -e "/table_size: [0-9]\\+/d" \ diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index 7d4781d4ad..62529eed6e 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -85,6 +85,9 @@ else elif [ "$IMGPROTO" = "nfs" ]; then TEST_DIR="nfs://127.0.0.1/$TEST_DIR" TEST_IMG=$TEST_DIR/t.$IMGFMT + elif [ "$IMGPROTO" = "vxhs" ]; then + TEST_IMG_FILE=$TEST_DIR/t.$IMGFMT + TEST_IMG="vxhs://127.0.0.1:9999/t.$IMGFMT" else TEST_IMG=$IMGPROTO:$TEST_DIR/t.$IMGFMT fi @@ -171,6 +174,12 @@ _make_test_img() eval "$QEMU_NBD -v -t -b 127.0.0.1 -p 10810 -f $IMGFMT $TEST_IMG_FILE >/dev/null &" sleep 1 # FIXME: qemu-nbd needs to be listening before we continue fi + + # Start QNIO server on image directory for vxhs protocol + if [ $IMGPROTO = "vxhs" ]; then + eval "$QEMU_VXHS -d $TEST_DIR > /dev/null &" + sleep 1 # Wait for server to come up. + fi } _rm_test_img() @@ -197,6 +206,16 @@ _cleanup_test_img() fi rm -f "$TEST_IMG_FILE" ;; + vxhs) + if [ -f "${TEST_DIR}/qemu-vxhs.pid" ]; then + local QEMU_VXHS_PID + read QEMU_VXHS_PID < "${TEST_DIR}/qemu-vxhs.pid" + kill ${QEMU_VXHS_PID} >/dev/null 2>&1 + rm -f "${TEST_DIR}/qemu-vxhs.pid" + fi + rm -f "$TEST_IMG_FILE" + ;; + file) _rm_test_img "$TEST_DIR/t.$IMGFMT" _rm_test_img "$TEST_DIR/t.$IMGFMT.orig" diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 1f4bf03185..43142ddfcf 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -168,3 +168,4 @@ 173 rw auto 174 auto 175 auto quick +176 rw auto backing diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index bec8eb4b8d..abcf3c10e2 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -177,6 +177,14 @@ class VM(qtest.QEMUQtestMachine): self._num_drives += 1 return self + def add_blockdev(self, opts): + self._args.append('-blockdev') + if isinstance(opts, str): + self._args.append(opts) + else: + self._args.append(','.join(opts)) + return self + def pause_drive(self, drive, event=None): '''Pause drive r/w operations''' if not event: @@ -235,6 +243,13 @@ class QMPTestCase(unittest.TestCase): output[basestr[:-1]] = obj # Strip trailing '.' return output + def qmp_to_opts(self, obj): + obj = self.flatten_qmp_object(obj) + output_list = list() + for key in obj: + output_list += [key + '=' + obj[key]] + return ','.join(output_list) + def assert_qmp_absent(self, d, path): try: result = self.dictpath(d, path) diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c index 4ccbda14af..0f80194e85 100644 --- a/tests/test-blockjob-txn.c +++ b/tests/test-blockjob-txn.c @@ -110,7 +110,6 @@ static BlockJob *test_block_job_start(unsigned int iterations, s->result = result; data->job = s; data->result = result; - block_job_start(&s->common); return &s->common; } @@ -123,6 +122,7 @@ static void test_single_job(int expected) txn = block_job_txn_new(); job = test_block_job_start(1, true, expected, &result); block_job_txn_add_job(txn, job); + block_job_start(job); if (expected == -ECANCELED) { block_job_cancel(job); @@ -164,6 +164,8 @@ static void test_pair_jobs(int expected1, int expected2) block_job_txn_add_job(txn, job1); job2 = test_block_job_start(2, true, expected2, &result2); block_job_txn_add_job(txn, job2); + block_job_start(job1); + block_job_start(job2); if (expected1 == -ECANCELED) { block_job_cancel(job1); @@ -223,6 +225,8 @@ static void test_pair_jobs_fail_cancel_race(void) block_job_txn_add_job(txn, job1); job2 = test_block_job_start(2, false, 0, &result2); block_job_txn_add_job(txn, job2); + block_job_start(job1); + block_job_start(job2); block_job_cancel(job1); diff --git a/tests/test-crypto-block.c b/tests/test-crypto-block.c index 1957a86743..85e6603d59 100644 --- a/tests/test-crypto-block.c +++ b/tests/test-crypto-block.c @@ -187,11 +187,11 @@ static struct QCryptoBlockTestData { static ssize_t test_block_read_func(QCryptoBlock *block, + void *opaque, size_t offset, uint8_t *buf, size_t buflen, - Error **errp, - void *opaque) + Error **errp) { Buffer *header = opaque; @@ -204,9 +204,9 @@ static ssize_t test_block_read_func(QCryptoBlock *block, static ssize_t test_block_init_func(QCryptoBlock *block, + void *opaque, size_t headerlen, - Error **errp, - void *opaque) + Error **errp) { Buffer *header = opaque; @@ -219,11 +219,11 @@ static ssize_t test_block_init_func(QCryptoBlock *block, static ssize_t test_block_write_func(QCryptoBlock *block, + void *opaque, size_t offset, const uint8_t *buf, size_t buflen, - Error **errp, - void *opaque) + Error **errp) { Buffer *header = opaque; diff --git a/tests/test-io-channel-socket.c b/tests/test-io-channel-socket.c index aaa9116fb7..c5c131479c 100644 --- a/tests/test-io-channel-socket.c +++ b/tests/test-io-channel-socket.c @@ -234,6 +234,8 @@ static void test_io_channel(bool async, qio_channel_has_feature(src, QIO_CHANNEL_FEATURE_FD_PASS)); g_assert(!passFD || qio_channel_has_feature(dst, QIO_CHANNEL_FEATURE_FD_PASS)); + g_assert(qio_channel_has_feature(src, QIO_CHANNEL_FEATURE_SHUTDOWN)); + g_assert(qio_channel_has_feature(dst, QIO_CHANNEL_FEATURE_SHUTDOWN)); test = qio_channel_test_new(); qio_channel_test_run_threads(test, true, src, dst); @@ -248,6 +250,8 @@ static void test_io_channel(bool async, qio_channel_has_feature(src, QIO_CHANNEL_FEATURE_FD_PASS)); g_assert(!passFD || qio_channel_has_feature(dst, QIO_CHANNEL_FEATURE_FD_PASS)); + g_assert(qio_channel_has_feature(src, QIO_CHANNEL_FEATURE_SHUTDOWN)); + g_assert(qio_channel_has_feature(dst, QIO_CHANNEL_FEATURE_SHUTDOWN)); test = qio_channel_test_new(); qio_channel_test_run_threads(test, false, src, dst); @@ -262,6 +266,8 @@ static void test_io_channel(bool async, qio_channel_has_feature(src, QIO_CHANNEL_FEATURE_FD_PASS)); g_assert(!passFD || qio_channel_has_feature(dst, QIO_CHANNEL_FEATURE_FD_PASS)); + g_assert(qio_channel_has_feature(src, QIO_CHANNEL_FEATURE_SHUTDOWN)); + g_assert(qio_channel_has_feature(dst, QIO_CHANNEL_FEATURE_SHUTDOWN)); test = qio_channel_test_new(); qio_channel_test_run_threads(test, true, src, dst); @@ -276,6 +282,8 @@ static void test_io_channel(bool async, qio_channel_has_feature(src, QIO_CHANNEL_FEATURE_FD_PASS)); g_assert(!passFD || qio_channel_has_feature(dst, QIO_CHANNEL_FEATURE_FD_PASS)); + g_assert(qio_channel_has_feature(src, QIO_CHANNEL_FEATURE_SHUTDOWN)); + g_assert(qio_channel_has_feature(dst, QIO_CHANNEL_FEATURE_SHUTDOWN)); test = qio_channel_test_new(); qio_channel_test_run_threads(test, false, src, dst); diff --git a/tests/test-keyval.c b/tests/test-keyval.c index 71288b082c..ba19560a22 100644 --- a/tests/test-keyval.c +++ b/tests/test-keyval.c @@ -14,6 +14,7 @@ #include "qapi/error.h" #include "qapi/qmp/qstring.h" #include "qapi/qobject-input-visitor.h" +#include "test-qapi-visit.h" #include "qemu/cutils.h" #include "qemu/option.h" @@ -218,14 +219,14 @@ static void test_keyval_parse_list(void) QDECREF(qdict); /* Multiple indexes, last one wins */ - qdict = keyval_parse("list.1=goner,list.0=null,list.1=eins,list.2=zwei", + qdict = keyval_parse("list.1=goner,list.0=null,list.01=eins,list.2=zwei", NULL, &error_abort); g_assert_cmpint(qdict_size(qdict), ==, 1); check_list012(qdict_get_qlist(qdict, "list")); QDECREF(qdict); /* List at deeper nesting */ - qdict = keyval_parse("a.list.1=eins,a.list.0=null,a.list.2=zwei", + qdict = keyval_parse("a.list.1=eins,a.list.00=null,a.list.2=zwei", NULL, &error_abort); g_assert_cmpint(qdict_size(qdict), ==, 1); sub_qdict = qdict_get_qdict(qdict, "a"); @@ -242,7 +243,7 @@ static void test_keyval_parse_list(void) g_assert(!qdict); /* Missing list indexes */ - qdict = keyval_parse("list.2=lonely", NULL, &err); + qdict = keyval_parse("list.1=lonely", NULL, &err); error_free_or_abort(&err); g_assert(!qdict); qdict = keyval_parse("list.0=null,list.2=eins,list.02=zwei", NULL, &err); @@ -608,6 +609,56 @@ static void test_keyval_visit_optional(void) visit_free(v); } +static void test_keyval_visit_alternate(void) +{ + Error *err = NULL; + Visitor *v; + QDict *qdict; + AltNumStr *ans; + AltNumInt *ani; + + /* + * Can't do scalar alternate variants other than string. You get + * the string variant if there is one, else an error. + */ + qdict = keyval_parse("a=1,b=2", NULL, &error_abort); + v = qobject_input_visitor_new_keyval(QOBJECT(qdict)); + QDECREF(qdict); + visit_start_struct(v, NULL, NULL, 0, &error_abort); + visit_type_AltNumStr(v, "a", &ans, &error_abort); + g_assert_cmpint(ans->type, ==, QTYPE_QSTRING); + g_assert_cmpstr(ans->u.s, ==, "1"); + visit_type_AltNumInt(v, "a", &ani, &err); + error_free_or_abort(&err); + visit_end_struct(v, NULL); + visit_free(v); +} + +static void test_keyval_visit_any(void) +{ + Visitor *v; + QDict *qdict; + QObject *any; + QList *qlist; + QString *qstr; + + qdict = keyval_parse("a.0=null,a.1=1", NULL, &error_abort); + v = qobject_input_visitor_new_keyval(QOBJECT(qdict)); + QDECREF(qdict); + visit_start_struct(v, NULL, NULL, 0, &error_abort); + visit_type_any(v, "a", &any, &error_abort); + qlist = qobject_to_qlist(any); + g_assert(qlist); + qstr = qobject_to_qstring(qlist_pop(qlist)); + g_assert_cmpstr(qstring_get_str(qstr), ==, "null"); + qstr = qobject_to_qstring(qlist_pop(qlist)); + g_assert_cmpstr(qstring_get_str(qstr), ==, "1"); + g_assert(qlist_empty(qlist)); + visit_check_struct(v, &error_abort); + visit_end_struct(v, NULL); + visit_free(v); +} + int main(int argc, char *argv[]) { g_test_init(&argc, &argv, NULL); @@ -619,6 +670,8 @@ int main(int argc, char *argv[]) g_test_add_func("/keyval/visit/dict", test_keyval_visit_dict); g_test_add_func("/keyval/visit/list", test_keyval_visit_list); g_test_add_func("/keyval/visit/optional", test_keyval_visit_optional); + g_test_add_func("/keyval/visit/alternate", test_keyval_visit_alternate); + g_test_add_func("/keyval/visit/any", test_keyval_visit_any); g_test_run(); return 0; } diff --git a/tests/test-opts-visitor.c b/tests/test-opts-visitor.c index 2238f8efe5..23e897061c 100644 --- a/tests/test-opts-visitor.c +++ b/tests/test-opts-visitor.c @@ -175,6 +175,7 @@ expect_u64_max(OptsVisitorFixture *f, gconstpointer test_data) static void test_opts_range_unvisited(void) { + Error *err = NULL; intList *list = NULL; intList *tail; QemuOpts *opts; @@ -199,10 +200,11 @@ test_opts_range_unvisited(void) g_assert_cmpint(tail->value, ==, 1); tail = (intList *)visit_next_list(v, (GenericList *)tail, sizeof(*list)); g_assert(tail); - visit_check_list(v, &error_abort); /* BUG: unvisited tail not reported */ + visit_check_list(v, &error_abort); /* unvisited tail ignored until... */ visit_end_list(v, (void **)&list); - visit_check_struct(v, &error_abort); + visit_check_struct(v, &err); /* ...here */ + error_free_or_abort(&err); visit_end_struct(v, NULL); qapi_free_intList(list); @@ -247,6 +249,25 @@ test_opts_range_beyond(void) qemu_opts_del(opts); } +static void +test_opts_dict_unvisited(void) +{ + Error *err = NULL; + QemuOpts *opts; + Visitor *v; + UserDefOptions *userdef; + + opts = qemu_opts_parse(qemu_find_opts("userdef"), "i64x=0,bogus=1", false, + &error_abort); + + v = opts_visitor_new(opts); + visit_type_UserDefOptions(v, NULL, &userdef, &err); + error_free_or_abort(&err); + visit_free(v); + qemu_opts_del(opts); + g_assert(!userdef); +} + int main(int argc, char **argv) { @@ -343,6 +364,8 @@ main(int argc, char **argv) g_test_add_func("/visitor/opts/range/beyond", test_opts_range_beyond); + g_test_add_func("/visitor/opts/dict/unvisited", test_opts_dict_unvisited); + g_test_run(); return 0; } diff --git a/tests/test-qobject-input-visitor.c b/tests/test-qobject-input-visitor.c index 6eb48fee7b..f965743b6e 100644 --- a/tests/test-qobject-input-visitor.c +++ b/tests/test-qobject-input-visitor.c @@ -116,6 +116,34 @@ static void test_visitor_in_int(TestInputVisitorData *data, g_assert_cmpint(res, ==, value); } +static void test_visitor_in_uint(TestInputVisitorData *data, + const void *unused) +{ + Error *err = NULL; + uint64_t res = 0; + int value = 42; + Visitor *v; + + v = visitor_input_test_init(data, "%d", value); + + visit_type_uint64(v, NULL, &res, &error_abort); + g_assert_cmpuint(res, ==, (uint64_t)value); + + /* BUG: value between INT64_MIN and -1 accepted modulo 2^64 */ + + v = visitor_input_test_init(data, "%d", -value); + + visit_type_uint64(v, NULL, &res, &error_abort); + g_assert_cmpuint(res, ==, (uint64_t)-value); + + /* BUG: value between INT64_MAX+1 and UINT64_MAX rejected */ + + v = visitor_input_test_init(data, "18446744073709551574"); + + visit_type_uint64(v, NULL, &res, &err); + error_free_or_abort(&err); +} + static void test_visitor_in_int_overflow(TestInputVisitorData *data, const void *unused) { @@ -1225,6 +1253,8 @@ int main(int argc, char **argv) input_visitor_test_add("/visitor/input/int", NULL, test_visitor_in_int); + input_visitor_test_add("/visitor/input/uint", + NULL, test_visitor_in_uint); input_visitor_test_add("/visitor/input/int_overflow", NULL, test_visitor_in_int_overflow); input_visitor_test_add("/visitor/input/int_keyval", diff --git a/tests/test-replication.c b/tests/test-replication.c index fac2da3f58..3016c6f2e0 100644 --- a/tests/test-replication.c +++ b/tests/test-replication.c @@ -144,18 +144,18 @@ static void prepare_imgs(void) /* Primary */ bdrv_img_create(p_local_disk, "qcow2", NULL, NULL, NULL, IMG_SIZE, - BDRV_O_RDWR, &local_err, true); + BDRV_O_RDWR, true, &local_err); g_assert(!local_err); /* Secondary */ bdrv_img_create(s_local_disk, "qcow2", NULL, NULL, NULL, IMG_SIZE, - BDRV_O_RDWR, &local_err, true); + BDRV_O_RDWR, true, &local_err); g_assert(!local_err); bdrv_img_create(s_active_disk, "qcow2", NULL, NULL, NULL, IMG_SIZE, - BDRV_O_RDWR, &local_err, true); + BDRV_O_RDWR, true, &local_err); g_assert(!local_err); bdrv_img_create(s_hidden_disk, "qcow2", NULL, NULL, NULL, IMG_SIZE, - BDRV_O_RDWR, &local_err, true); + BDRV_O_RDWR, true, &local_err); g_assert(!local_err); } diff --git a/tests/test-string-input-visitor.c b/tests/test-string-input-visitor.c index 6db850bc89..79313a7f7a 100644 --- a/tests/test-string-input-visitor.c +++ b/tests/test-string-input-visitor.c @@ -63,6 +63,11 @@ static void test_visitor_in_int(TestInputVisitorData *data, visit_type_int(v, NULL, &res, &err); error_free_or_abort(&err); + + v = visitor_input_test_init(data, ""); + + visit_type_int(v, NULL, &res, &err); + error_free_or_abort(&err); } static void check_ilist(Visitor *v, int64_t *expected, size_t n) @@ -140,11 +145,11 @@ static void test_visitor_in_intList(TestInputVisitorData *data, v = visitor_input_test_init(data, "18446744073709551615"); check_ulist(v, expect4, ARRAY_SIZE(expect4)); - /* Empty list is invalid (weird) */ + /* Empty list */ v = visitor_input_test_init(data, ""); - visit_type_int64List(v, NULL, &res, &err); - error_free_or_abort(&err); + visit_type_int64List(v, NULL, &res, &error_abort); + g_assert(!res); /* Not a list */ diff --git a/tests/test-throttle.c b/tests/test-throttle.c index bd7c501b2e..a9201b1fea 100644 --- a/tests/test-throttle.c +++ b/tests/test-throttle.c @@ -205,8 +205,8 @@ static void test_config_functions(void) orig_cfg.buckets[THROTTLE_OPS_READ].avg = 69; orig_cfg.buckets[THROTTLE_OPS_WRITE].avg = 23; - orig_cfg.buckets[THROTTLE_BPS_TOTAL].max = 0; /* should be corrected */ - orig_cfg.buckets[THROTTLE_BPS_READ].max = 1; /* should not be corrected */ + orig_cfg.buckets[THROTTLE_BPS_TOTAL].max = 0; + orig_cfg.buckets[THROTTLE_BPS_READ].max = 56; orig_cfg.buckets[THROTTLE_BPS_WRITE].max = 120; orig_cfg.buckets[THROTTLE_OPS_TOTAL].max = 150; @@ -246,8 +246,8 @@ static void test_config_functions(void) g_assert(final_cfg.buckets[THROTTLE_OPS_READ].avg == 69); g_assert(final_cfg.buckets[THROTTLE_OPS_WRITE].avg == 23); - g_assert(final_cfg.buckets[THROTTLE_BPS_TOTAL].max == 15.3);/* fixed */ - g_assert(final_cfg.buckets[THROTTLE_BPS_READ].max == 1); /* not fixed */ + g_assert(final_cfg.buckets[THROTTLE_BPS_TOTAL].max == 0); + g_assert(final_cfg.buckets[THROTTLE_BPS_READ].max == 56); g_assert(final_cfg.buckets[THROTTLE_BPS_WRITE].max == 120); g_assert(final_cfg.buckets[THROTTLE_OPS_TOTAL].max == 150); diff --git a/tests/virtio-9p-test.c b/tests/virtio-9p-test.c index 43a1ad813f..ad33d96387 100644 --- a/tests/virtio-9p-test.c +++ b/tests/virtio-9p-test.c @@ -256,8 +256,8 @@ static void v9fs_req_recv(P9Req *req, uint8_t id) qvirtio_wait_queue_isr(v9p->dev, v9p->vq, 1000 * 1000); v9fs_memread(req, &hdr, 7); - le32_to_cpus(&hdr.size); - le16_to_cpus(&hdr.tag); + hdr.size = ldl_le_p(&hdr.size); + hdr.tag = lduw_le_p(&hdr.tag); if (hdr.size >= 7) { break; } diff --git a/trace/Makefile.objs b/trace/Makefile.objs index 1b8eb4a616..afd571c3ec 100644 --- a/trace/Makefile.objs +++ b/trace/Makefile.objs @@ -9,27 +9,27 @@ $(BUILD_DIR)/trace-events-all: $(trace-events-files) $(obj)/generated-helpers-wrappers.h: $(obj)/generated-helpers-wrappers.h-timestamp @cmp $< $@ >/dev/null 2>&1 || cp $< $@ -$(obj)/generated-helpers-wrappers.h-timestamp: $(trace-events-files) $(BUILD_DIR)/config-host.mak $(tracetool-y) +$(obj)/generated-helpers-wrappers.h-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak $(tracetool-y) $(call quiet-command,$(TRACETOOL) \ - --group=all \ + --group=root \ --format=tcg-helper-wrapper-h \ --backend=$(TRACE_BACKENDS) \ $< > $@,"GEN","$(patsubst %-timestamp,%,$@)") $(obj)/generated-helpers.h: $(obj)/generated-helpers.h-timestamp @cmp $< $@ >/dev/null 2>&1 || cp $< $@ -$(obj)/generated-helpers.h-timestamp: $(trace-events-files) $(BUILD_DIR)/config-host.mak $(tracetool-y) +$(obj)/generated-helpers.h-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak $(tracetool-y) $(call quiet-command,$(TRACETOOL) \ - --group=all \ + --group=root \ --format=tcg-helper-h \ --backend=$(TRACE_BACKENDS) \ $< > $@,"GEN","$(patsubst %-timestamp,%,$@)") $(obj)/generated-helpers.c: $(obj)/generated-helpers.c-timestamp @cmp $< $@ >/dev/null 2>&1 || cp $< $@ -$(obj)/generated-helpers.c-timestamp: $(trace-events-files) $(BUILD_DIR)/config-host.mak $(tracetool-y) +$(obj)/generated-helpers.c-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak $(tracetool-y) $(call quiet-command,$(TRACETOOL) \ - --group=all \ + --group=root \ --format=tcg-helper-c \ --backend=$(TRACE_BACKENDS) \ $< > $@,"GEN","$(patsubst %-timestamp,%,$@)") @@ -41,9 +41,9 @@ target-obj-y += generated-helpers.o $(obj)/generated-tcg-tracers.h: $(obj)/generated-tcg-tracers.h-timestamp @cmp $< $@ >/dev/null 2>&1 || cp $< $@ -$(obj)/generated-tcg-tracers.h-timestamp: $(trace-events-files) $(BUILD_DIR)/config-host.mak $(tracetool-y) +$(obj)/generated-tcg-tracers.h-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak $(tracetool-y) $(call quiet-command,$(TRACETOOL) \ - --group=all \ + --group=root \ --format=tcg-h \ --backend=$(TRACE_BACKENDS) \ $< > $@,"GEN","$(patsubst %-timestamp,%,$@)") diff --git a/ui/cocoa.m b/ui/cocoa.m index c81f7b6183..207555edf7 100644 --- a/ui/cocoa.m +++ b/ui/cocoa.m @@ -45,7 +45,36 @@ #ifndef MAC_OS_X_VERSION_10_10 #define MAC_OS_X_VERSION_10_10 101000 #endif +#ifndef MAC_OS_X_VERSION_10_12 +#define MAC_OS_X_VERSION_10_12 101200 +#endif +/* macOS 10.12 deprecated many constants, #define the new names for older SDKs */ +#if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_12 +#define NSEventMaskAny NSAnyEventMask +#define NSEventModifierFlagCommand NSCommandKeyMask +#define NSEventModifierFlagControl NSControlKeyMask +#define NSEventModifierFlagOption NSAlternateKeyMask +#define NSEventTypeFlagsChanged NSFlagsChanged +#define NSEventTypeKeyUp NSKeyUp +#define NSEventTypeKeyDown NSKeyDown +#define NSEventTypeMouseMoved NSMouseMoved +#define NSEventTypeLeftMouseDown NSLeftMouseDown +#define NSEventTypeRightMouseDown NSRightMouseDown +#define NSEventTypeOtherMouseDown NSOtherMouseDown +#define NSEventTypeLeftMouseDragged NSLeftMouseDragged +#define NSEventTypeRightMouseDragged NSRightMouseDragged +#define NSEventTypeOtherMouseDragged NSOtherMouseDragged +#define NSEventTypeLeftMouseUp NSLeftMouseUp +#define NSEventTypeRightMouseUp NSRightMouseUp +#define NSEventTypeOtherMouseUp NSOtherMouseUp +#define NSEventTypeScrollWheel NSScrollWheel +#define NSTextAlignmentCenter NSCenterTextAlignment +#define NSWindowStyleMaskBorderless NSBorderlessWindowMask +#define NSWindowStyleMaskClosable NSClosableWindowMask +#define NSWindowStyleMaskMiniaturizable NSMiniaturizableWindowMask +#define NSWindowStyleMaskTitled NSTitledWindowMask +#endif //#define DEBUG @@ -494,7 +523,7 @@ QemuCocoaView *cocoaView; } else { [NSMenu setMenuBarVisible:NO]; fullScreenWindow = [[NSWindow alloc] initWithContentRect:[[NSScreen mainScreen] frame] - styleMask:NSBorderlessWindowMask + styleMask:NSWindowStyleMaskBorderless backing:NSBackingStoreBuffered defer:NO]; [fullScreenWindow setAcceptsMouseMovedEvents: YES]; @@ -517,7 +546,7 @@ QemuCocoaView *cocoaView; NSPoint p = [event locationInWindow]; switch ([event type]) { - case NSFlagsChanged: + case NSEventTypeFlagsChanged: keycode = cocoa_keycode_to_qemu([event keyCode]); if ((keycode == Q_KEY_CODE_META_L || keycode == Q_KEY_CODE_META_R) @@ -544,15 +573,15 @@ QemuCocoaView *cocoaView; } // release Mouse grab when pressing ctrl+alt - if (([event modifierFlags] & NSControlKeyMask) && ([event modifierFlags] & NSAlternateKeyMask)) { + if (([event modifierFlags] & NSEventModifierFlagControl) && ([event modifierFlags] & NSEventModifierFlagOption)) { [self ungrabMouse]; } break; - case NSKeyDown: + case NSEventTypeKeyDown: keycode = cocoa_keycode_to_qemu([event keyCode]); // forward command key combos to the host UI unless the mouse is grabbed - if (!isMouseGrabbed && ([event modifierFlags] & NSCommandKeyMask)) { + if (!isMouseGrabbed && ([event modifierFlags] & NSEventModifierFlagCommand)) { [NSApp sendEvent:event]; return; } @@ -560,7 +589,7 @@ QemuCocoaView *cocoaView; // default // handle control + alt Key Combos (ctrl+alt is reserved for QEMU) - if (([event modifierFlags] & NSControlKeyMask) && ([event modifierFlags] & NSAlternateKeyMask)) { + if (([event modifierFlags] & NSEventModifierFlagControl) && ([event modifierFlags] & NSEventModifierFlagOption)) { switch (keycode) { // enable graphic console @@ -609,12 +638,12 @@ QemuCocoaView *cocoaView; kbd_put_keysym(keysym); } break; - case NSKeyUp: + case NSEventTypeKeyUp: keycode = cocoa_keycode_to_qemu([event keyCode]); // don't pass the guest a spurious key-up if we treated this // command-key combo as a host UI action - if (!isMouseGrabbed && ([event modifierFlags] & NSCommandKeyMask)) { + if (!isMouseGrabbed && ([event modifierFlags] & NSEventModifierFlagCommand)) { return; } @@ -622,7 +651,7 @@ QemuCocoaView *cocoaView; qemu_input_event_send_key_qcode(dcl->con, keycode, false); } break; - case NSMouseMoved: + case NSEventTypeMouseMoved: if (isAbsoluteEnabled) { if (![self screenContainsPoint:p] || ![[self window] isKeyWindow]) { if (isMouseGrabbed) { @@ -636,39 +665,39 @@ QemuCocoaView *cocoaView; } mouse_event = true; break; - case NSLeftMouseDown: - if ([event modifierFlags] & NSCommandKeyMask) { + case NSEventTypeLeftMouseDown: + if ([event modifierFlags] & NSEventModifierFlagCommand) { buttons |= MOUSE_EVENT_RBUTTON; } else { buttons |= MOUSE_EVENT_LBUTTON; } mouse_event = true; break; - case NSRightMouseDown: + case NSEventTypeRightMouseDown: buttons |= MOUSE_EVENT_RBUTTON; mouse_event = true; break; - case NSOtherMouseDown: + case NSEventTypeOtherMouseDown: buttons |= MOUSE_EVENT_MBUTTON; mouse_event = true; break; - case NSLeftMouseDragged: - if ([event modifierFlags] & NSCommandKeyMask) { + case NSEventTypeLeftMouseDragged: + if ([event modifierFlags] & NSEventModifierFlagCommand) { buttons |= MOUSE_EVENT_RBUTTON; } else { buttons |= MOUSE_EVENT_LBUTTON; } mouse_event = true; break; - case NSRightMouseDragged: + case NSEventTypeRightMouseDragged: buttons |= MOUSE_EVENT_RBUTTON; mouse_event = true; break; - case NSOtherMouseDragged: + case NSEventTypeOtherMouseDragged: buttons |= MOUSE_EVENT_MBUTTON; mouse_event = true; break; - case NSLeftMouseUp: + case NSEventTypeLeftMouseUp: mouse_event = true; if (!isMouseGrabbed && [self screenContainsPoint:p]) { if([[self window] isKeyWindow]) { @@ -676,13 +705,13 @@ QemuCocoaView *cocoaView; } } break; - case NSRightMouseUp: + case NSEventTypeRightMouseUp: mouse_event = true; break; - case NSOtherMouseUp: + case NSEventTypeOtherMouseUp: mouse_event = true; break; - case NSScrollWheel: + case NSEventTypeScrollWheel: if (isMouseGrabbed) { buttons |= ([event deltaY] < 0) ? MOUSE_EVENT_WHEELUP : MOUSE_EVENT_WHEELDN; @@ -847,7 +876,7 @@ QemuCocoaView *cocoaView; // create a window normalWindow = [[NSWindow alloc] initWithContentRect:[cocoaView frame] - styleMask:NSTitledWindowMask|NSMiniaturizableWindowMask|NSClosableWindowMask + styleMask:NSWindowStyleMaskTitled|NSWindowStyleMaskMiniaturizable|NSWindowStyleMaskClosable backing:NSBackingStoreBuffered defer:NO]; if(!normalWindow) { fprintf(stderr, "(cocoa) can't create window\n"); @@ -1152,8 +1181,8 @@ QemuCocoaView *cocoaView; int x = 0, y = 0, about_width = 400, about_height = 200; NSRect window_rect = NSMakeRect(x, y, about_width, about_height); about_window = [[NSWindow alloc] initWithContentRect:window_rect - styleMask:NSTitledWindowMask | NSClosableWindowMask | - NSMiniaturizableWindowMask + styleMask:NSWindowStyleMaskTitled | NSWindowStyleMaskClosable | + NSWindowStyleMaskMiniaturizable backing:NSBackingStoreBuffered defer:NO]; [about_window setTitle: @"About"]; @@ -1192,7 +1221,7 @@ QemuCocoaView *cocoaView; [name_label setEditable: NO]; [name_label setBezeled: NO]; [name_label setDrawsBackground: NO]; - [name_label setAlignment: NSCenterTextAlignment]; + [name_label setAlignment: NSTextAlignmentCenter]; NSString *qemu_name = [[NSString alloc] initWithCString: gArgv[0] encoding: NSASCIIStringEncoding]; qemu_name = [qemu_name lastPathComponent]; @@ -1208,7 +1237,7 @@ QemuCocoaView *cocoaView; version_rect]; [version_label setEditable: NO]; [version_label setBezeled: NO]; - [version_label setAlignment: NSCenterTextAlignment]; + [version_label setAlignment: NSTextAlignmentCenter]; [version_label setDrawsBackground: NO]; /* Create the version string*/ @@ -1228,7 +1257,7 @@ QemuCocoaView *cocoaView; [copyright_label setEditable: NO]; [copyright_label setBezeled: NO]; [copyright_label setDrawsBackground: NO]; - [copyright_label setAlignment: NSCenterTextAlignment]; + [copyright_label setAlignment: NSTextAlignmentCenter]; [copyright_label setStringValue: [NSString stringWithFormat: @"%s", QEMU_COPYRIGHT]]; [superView addSubview: copyright_label]; @@ -1285,7 +1314,7 @@ int main (int argc, const char * argv[]) { [menu addItem:[NSMenuItem separatorItem]]; //Separator [menu addItemWithTitle:@"Hide QEMU" action:@selector(hide:) keyEquivalent:@"h"]; //Hide QEMU menuItem = (NSMenuItem *)[menu addItemWithTitle:@"Hide Others" action:@selector(hideOtherApplications:) keyEquivalent:@"h"]; // Hide Others - [menuItem setKeyEquivalentModifierMask:(NSAlternateKeyMask|NSCommandKeyMask)]; + [menuItem setKeyEquivalentModifierMask:(NSEventModifierFlagOption|NSEventModifierFlagCommand)]; [menu addItemWithTitle:@"Show All" action:@selector(unhideAllApplications:) keyEquivalent:@""]; // Show All [menu addItem:[NSMenuItem separatorItem]]; //Separator [menu addItemWithTitle:@"Quit QEMU" action:@selector(terminate:) keyEquivalent:@"q"]; @@ -1399,7 +1428,7 @@ static void cocoa_refresh(DisplayChangeListener *dcl) NSEvent *event; distantPast = [NSDate distantPast]; do { - event = [NSApp nextEventMatchingMask:NSAnyEventMask untilDate:distantPast + event = [NSApp nextEventMatchingMask:NSEventMaskAny untilDate:distantPast inMode: NSDefaultRunLoopMode dequeue:YES]; if (event != nil) { [cocoaView handleEvent:event]; diff --git a/ui/console.c b/ui/console.c index 937c950840..189eecfd29 100644 --- a/ui/console.c +++ b/ui/console.c @@ -1410,6 +1410,8 @@ void register_displaychangelistener(DisplayChangeListener *dcl) static DisplaySurface *dummy; QemuConsole *con; + assert(!dcl->ds); + if (dcl->ops->dpy_gl_ctx_create) { /* display has opengl support */ assert(dcl->con); @@ -1538,6 +1540,8 @@ void dpy_gfx_replace_surface(QemuConsole *con, DisplaySurface *old_surface = con->surface; DisplayChangeListener *dcl; + assert(old_surface != surface); + con->surface = surface; QLIST_FOREACH(dcl, &s->listeners, next) { if (con != (dcl->con ? dcl->con : active_console)) { @@ -1576,17 +1580,22 @@ bool dpy_gfx_check_format(QemuConsole *con, } /* - * Safe DPY refresh for TCG guests. This runs when the TCG vCPUs are - * quiescent so we can avoid races between dirty page tracking for - * direct frame-buffer access by the guest. + * Safe DPY refresh for TCG guests. We use the exclusive mechanism to + * ensure the TCG vCPUs are quiescent so we can avoid races between + * dirty page tracking for direct frame-buffer access by the guest. * * This is a temporary stopgap until we've fixed the dirty tracking * races in display adapters. */ -static void do_safe_dpy_refresh(CPUState *cpu, run_on_cpu_data opaque) +static void do_safe_dpy_refresh(DisplayChangeListener *dcl) { - DisplayChangeListener *dcl = opaque.host_ptr; + qemu_mutex_unlock_iothread(); + start_exclusive(); + qemu_mutex_lock_iothread(); dcl->ops->dpy_refresh(dcl); + qemu_mutex_unlock_iothread(); + end_exclusive(); + qemu_mutex_lock_iothread(); } static void dpy_refresh(DisplayState *s) @@ -1596,8 +1605,7 @@ static void dpy_refresh(DisplayState *s) QLIST_FOREACH(dcl, &s->listeners, next) { if (dcl->ops->dpy_refresh) { if (tcg_enabled()) { - async_safe_run_on_cpu(first_cpu, do_safe_dpy_refresh, - RUN_ON_CPU_HOST_PTR(dcl)); + do_safe_dpy_refresh(dcl); } else { dcl->ops->dpy_refresh(dcl); } diff --git a/ui/egl-helpers.c b/ui/egl-helpers.c index 584dd1b04d..b7b6b2e3cc 100644 --- a/ui/egl-helpers.c +++ b/ui/egl-helpers.c @@ -192,6 +192,56 @@ EGLSurface qemu_egl_init_surface_x11(EGLContext ectx, Window win) /* ---------------------------------------------------------------------- */ +/* + * Taken from glamor_egl.h from the Xorg xserver, which is MIT licensed + * + * Create an EGLDisplay from a native display type. This is a little quirky + * for a few reasons. + * + * 1: GetPlatformDisplayEXT and GetPlatformDisplay are the API you want to + * use, but have different function signatures in the third argument; this + * happens not to matter for us, at the moment, but it means epoxy won't alias + * them together. + * + * 2: epoxy 1.3 and earlier don't understand EGL client extensions, which + * means you can't call "eglGetPlatformDisplayEXT" directly, as the resolver + * will crash. + * + * 3: You can't tell whether you have EGL 1.5 at this point, because + * eglQueryString(EGL_VERSION) is a property of the display, which we don't + * have yet. So you have to query for extensions no matter what. Fortunately + * epoxy_has_egl_extension _does_ let you query for client extensions, so + * we don't have to write our own extension string parsing. + * + * 4. There is no EGL_KHR_platform_base to complement the EXT one, thus one + * needs to know EGL 1.5 is supported in order to use the eglGetPlatformDisplay + * function pointer. + * We can workaround this (circular dependency) by probing for the EGL 1.5 + * platform extensions (EGL_KHR_platform_gbm and friends) yet it doesn't seem + * like mesa will be able to advertise these (even though it can do EGL 1.5). + */ +static EGLDisplay qemu_egl_get_display(void *native) +{ + EGLDisplay dpy = EGL_NO_DISPLAY; + +#ifdef EGL_MESA_platform_gbm + /* In practise any EGL 1.5 implementation would support the EXT extension */ + if (epoxy_has_egl_extension(NULL, "EGL_EXT_platform_base")) { + PFNEGLGETPLATFORMDISPLAYEXTPROC getPlatformDisplayEXT = + (void *) eglGetProcAddress("eglGetPlatformDisplayEXT"); + if (getPlatformDisplayEXT) { + dpy = getPlatformDisplayEXT(EGL_PLATFORM_GBM_MESA, native, NULL); + } + } +#endif + + if (dpy == EGL_NO_DISPLAY) { + /* fallback */ + dpy = eglGetDisplay(native); + } + return dpy; +} + int qemu_egl_init_dpy(EGLNativeDisplayType dpy, bool gles, bool debug) { static const EGLint conf_att_gl[] = { @@ -222,12 +272,8 @@ int qemu_egl_init_dpy(EGLNativeDisplayType dpy, bool gles, bool debug) setenv("LIBGL_DEBUG", "verbose", true); } - egl_dbg("eglGetDisplay (dpy %p) ...\n", dpy); -#ifdef EGL_MESA_platform_gbm - qemu_egl_display = eglGetPlatformDisplayEXT(EGL_PLATFORM_GBM_MESA, dpy, NULL); -#else - qemu_egl_display = eglGetDisplay(dpy); -#endif + egl_dbg("qemu_egl_get_display (dpy %p) ...\n", dpy); + qemu_egl_display = qemu_egl_get_display(dpy); if (qemu_egl_display == EGL_NO_DISPLAY) { error_report("egl: eglGetDisplay failed"); return -1; diff --git a/ui/input-linux.c b/ui/input-linux.c index ac31f47719..dc0613ca1f 100644 --- a/ui/input-linux.c +++ b/ui/input-linux.c @@ -169,6 +169,8 @@ struct InputLinux { bool has_abs_x; int num_keys; int num_btns; + struct input_event event; + int read_offset; QTAILQ_ENTRY(InputLinux) next; }; @@ -327,25 +329,30 @@ static void input_linux_handle_mouse(InputLinux *il, struct input_event *event) static void input_linux_event(void *opaque) { InputLinux *il = opaque; - struct input_event event; int rc; + int read_size; + uint8_t *p = (uint8_t *)&il->event; for (;;) { - rc = read(il->fd, &event, sizeof(event)); - if (rc != sizeof(event)) { + read_size = sizeof(il->event) - il->read_offset; + rc = read(il->fd, &p[il->read_offset], read_size); + if (rc != read_size) { if (rc < 0 && errno != EAGAIN) { fprintf(stderr, "%s: read: %s\n", __func__, strerror(errno)); qemu_set_fd_handler(il->fd, NULL, NULL, NULL); close(il->fd); + } else if (rc > 0) { + il->read_offset += rc; } break; } + il->read_offset = 0; if (il->num_keys) { - input_linux_handle_keyboard(il, &event); + input_linux_handle_keyboard(il, &il->event); } if (il->has_rel_x && il->num_btns) { - input_linux_handle_mouse(il, &event); + input_linux_handle_mouse(il, &il->event); } } } @@ -129,10 +129,13 @@ static void vnc_init_basic_info(SocketAddress *addr, info->family = NETWORK_ADDRESS_FAMILY_UNIX; break; - default: - error_setg(errp, "Unsupported socket kind %d", - addr->type); + case SOCKET_ADDRESS_KIND_VSOCK: + case SOCKET_ADDRESS_KIND_FD: + error_setg(errp, "Unsupported socket address type %s", + SocketAddressKind_lookup[addr->type]); break; + default: + abort(); } return; @@ -411,10 +414,13 @@ VncInfo *qmp_query_vnc(Error **errp) info->family = NETWORK_ADDRESS_FAMILY_UNIX; break; - default: - error_setg(errp, "Unsupported socket kind %d", - addr->type); + case SOCKET_ADDRESS_KIND_VSOCK: + case SOCKET_ADDRESS_KIND_FD: + error_setg(errp, "Unsupported socket address type %s", + SocketAddressKind_lookup[addr->type]); goto out_error; + default: + abort(); } info->has_host = true; @@ -3401,6 +3407,7 @@ vnc_display_create_creds(bool x509, static int vnc_display_get_address(const char *addrstr, bool websocket, + bool reverse, int displaynum, int to, bool has_ipv4, @@ -3480,21 +3487,22 @@ static int vnc_display_get_address(const char *addrstr, inet->port = g_strdup(port); } } else { + int offset = reverse ? 0 : 5900; if (parse_uint_full(port, &baseport, 10) < 0) { error_setg(errp, "can't convert to a number: %s", port); goto cleanup; } if (baseport > 65535 || - baseport + 5900 > 65535) { + baseport + offset > 65535) { error_setg(errp, "port %s out of range", port); goto cleanup; } inet->port = g_strdup_printf( - "%d", (int)baseport + 5900); + "%d", (int)baseport + offset); if (to) { inet->has_to = true; - inet->to = to + 5900; + inet->to = to + offset; } } @@ -3516,6 +3524,7 @@ static int vnc_display_get_address(const char *addrstr, } static int vnc_display_get_addresses(QemuOpts *opts, + bool reverse, SocketAddress ***retsaddr, size_t *retnsaddr, SocketAddress ***retwsaddr, @@ -3555,7 +3564,7 @@ static int vnc_display_get_addresses(QemuOpts *opts, qemu_opt_iter_init(&addriter, opts, "vnc"); while ((addr = qemu_opt_iter_next(&addriter)) != NULL) { int rv; - rv = vnc_display_get_address(addr, false, 0, to, + rv = vnc_display_get_address(addr, false, reverse, 0, to, has_ipv4, has_ipv6, ipv4, ipv6, &saddr, errp); @@ -3580,7 +3589,7 @@ static int vnc_display_get_addresses(QemuOpts *opts, qemu_opt_iter_init(&addriter, opts, "websocket"); while ((addr = qemu_opt_iter_next(&addriter)) != NULL) { - if (vnc_display_get_address(addr, true, displaynum, to, + if (vnc_display_get_address(addr, true, reverse, displaynum, to, has_ipv4, has_ipv6, ipv4, ipv6, &wsaddr, errp) < 0) { @@ -3639,6 +3648,7 @@ static int vnc_display_connect(VncDisplay *vd, error_setg(errp, "Expected a single address in reverse mode"); return -1; } + /* TODO SOCKET_ADDRESS_KIND_FD when fd has AF_UNIX */ vd->is_unix = saddr[0]->type == SOCKET_ADDRESS_KIND_UNIX; sioc = qio_channel_socket_new(); qio_channel_set_name(QIO_CHANNEL(sioc), "vnc-reverse"); @@ -3777,15 +3787,12 @@ void vnc_display_open(const char *id, Error **errp) return; } - if (vnc_display_get_addresses(opts, &saddr, &nsaddr, + reverse = qemu_opt_get_bool(opts, "reverse", false); + if (vnc_display_get_addresses(opts, reverse, &saddr, &nsaddr, &wsaddr, &nwsaddr, errp) < 0) { goto fail; } - if (saddr == NULL) { - return; - } - password = qemu_opt_get_bool(opts, "password", false); if (password) { if (fips_get_state()) { @@ -3803,7 +3810,6 @@ void vnc_display_open(const char *id, Error **errp) } } - reverse = qemu_opt_get_bool(opts, "reverse", false); lock_key_sync = qemu_opt_get_bool(opts, "lock-key-sync", true); key_delay_ms = qemu_opt_get_number(opts, "key-delay-ms", 1); sasl = qemu_opt_get_bool(opts, "sasl", false); @@ -3971,6 +3977,10 @@ void vnc_display_open(const char *id, Error **errp) register_displaychangelistener(&vd->dcl); } + if (saddr == NULL) { + goto cleanup; + } + if (reverse) { if (vnc_display_connect(vd, saddr, nsaddr, wsaddr, nwsaddr, errp) < 0) { goto fail; diff --git a/user-exec.c b/user-exec.c index 6db075884d..a8f95fa1e1 100644 --- a/user-exec.c +++ b/user-exec.c @@ -57,10 +57,23 @@ static void cpu_exit_tb_from_sighandler(CPUState *cpu, sigset_t *old_set) static inline int handle_cpu_signal(uintptr_t pc, unsigned long address, int is_write, sigset_t *old_set) { - CPUState *cpu; + CPUState *cpu = current_cpu; CPUClass *cc; int ret; + /* For synchronous signals we expect to be coming from the vCPU + * thread (so current_cpu should be valid) and either from running + * code or during translation which can fault as we cross pages. + * + * If neither is true then something has gone wrong and we should + * abort rather than try and restart the vCPU execution. + */ + if (!cpu || !cpu->running) { + printf("qemu:%s received signal outside vCPU context @ pc=0x%" + PRIxPTR "\n", __func__, pc); + abort(); + } + #if defined(DEBUG_SIGNAL) printf("qemu: SIGSEGV pc=0x%08lx address=%08lx w=%d oldset=0x%08lx\n", pc, address, is_write, *(unsigned long *)old_set); @@ -83,7 +96,7 @@ static inline int handle_cpu_signal(uintptr_t pc, unsigned long address, * currently executing TB was modified and must be exited * immediately. */ - cpu_exit_tb_from_sighandler(current_cpu, old_set); + cpu_exit_tb_from_sighandler(cpu, old_set); g_assert_not_reached(); default: g_assert_not_reached(); @@ -94,7 +107,6 @@ static inline int handle_cpu_signal(uintptr_t pc, unsigned long address, are still valid segv ones */ address = h2g_nocheck(address); - cpu = current_cpu; cc = CPU_GET_CLASS(cpu); /* see if it is an MMU fault */ g_assert(cc->handle_mmu_fault); diff --git a/util/async.c b/util/async.c index 663e297e1f..355af73ee7 100644 --- a/util/async.c +++ b/util/async.c @@ -453,6 +453,11 @@ void aio_co_wake(struct Coroutine *co) smp_read_barrier_depends(); ctx = atomic_read(&co->ctx); + aio_co_enter(ctx, co); +} + +void aio_co_enter(AioContext *ctx, struct Coroutine *co) +{ if (ctx != qemu_get_current_aio_context()) { aio_co_schedule(ctx, co); return; @@ -464,7 +469,7 @@ void aio_co_wake(struct Coroutine *co) QSIMPLEQ_INSERT_TAIL(&self->co_queue_wakeup, co, co_queue_next); } else { aio_context_acquire(ctx); - qemu_coroutine_enter(co); + qemu_aio_coroutine_enter(ctx, co); aio_context_release(ctx); } } diff --git a/util/bitmap.c b/util/bitmap.c index c1a84ca5e3..efced9a7d8 100644 --- a/util/bitmap.c +++ b/util/bitmap.c @@ -287,6 +287,17 @@ bool bitmap_test_and_clear_atomic(unsigned long *map, long start, long nr) return dirty != 0; } +void bitmap_copy_and_clear_atomic(unsigned long *dst, unsigned long *src, + long nr) +{ + while (nr > 0) { + *dst = atomic_xchg(src, 0); + dst++; + src++; + nr -= BITS_PER_LONG; + } +} + #define ALIGN_MASK(x,mask) (((x)+(mask))&~(mask)) /** diff --git a/util/error.c b/util/error.c index 9c40b1f458..020b86b9f0 100644 --- a/util/error.c +++ b/util/error.c @@ -134,6 +134,7 @@ void error_vprepend(Error **errp, const char *fmt, va_list ap) newmsg = g_string_new(NULL); g_string_vprintf(newmsg, fmt, ap); g_string_append(newmsg, (*errp)->msg); + g_free((*errp)->msg); (*errp)->msg = g_string_free(newmsg, 0); } diff --git a/util/event_notifier-posix.c b/util/event_notifier-posix.c index 7e40252ade..acdbe3b483 100644 --- a/util/event_notifier-posix.c +++ b/util/event_notifier-posix.c @@ -81,8 +81,10 @@ void event_notifier_cleanup(EventNotifier *e) { if (e->rfd != e->wfd) { close(e->rfd); + e->rfd = -1; } close(e->wfd); + e->wfd = -1; } int event_notifier_get_fd(const EventNotifier *e) diff --git a/util/event_notifier-win32.c b/util/event_notifier-win32.c index 519fb59123..62c53b0a99 100644 --- a/util/event_notifier-win32.c +++ b/util/event_notifier-win32.c @@ -25,6 +25,7 @@ int event_notifier_init(EventNotifier *e, int active) void event_notifier_cleanup(EventNotifier *e) { CloseHandle(e->event); + e->event = NULL; } HANDLE event_notifier_get_handle(EventNotifier *e) diff --git a/util/keyval.c b/util/keyval.c index f646b36821..93d5db6b59 100644 --- a/util/keyval.c +++ b/util/keyval.c @@ -21,22 +21,36 @@ * * Semantics defined by reduction to JSON: * - * key-vals is a tree of objects and arrays rooted at object R - * where for each key-val = key-fragment . ... = val in key-vals - * R op key-fragment op ... = val' - * where (left-associative) op is - * array subscript L[key-fragment] for numeric key-fragment - * member reference L.key-fragment otherwise - * val' is val with ',,' replaced by ',' - * and only R may be empty. + * key-vals specifies a JSON object, i.e. a tree whose root is an + * object, inner nodes other than the root are objects or arrays, + * and leaves are strings. * - * Duplicate keys are permitted; all but the last one are ignored. + * Each key-val = key-fragment '.' ... '=' val specifies a path from + * root to a leaf (left of '='), and the leaf's value (right of + * '='). * - * The equations must have a solution. Counter-example: a.b=1,a=2 - * doesn't have one, because R.a must be an object to satisfy a.b=1 - * and a string to satisfy a=2. + * A path from the root is defined recursively: + * L '.' key-fragment is a child of the node denoted by path L + * key-fragment is a child of the tree root + * If key-fragment is numeric, the parent is an array and the child + * is its key-fragment-th member, counting from zero. + * Else, the parent is an object, and the child is its member named + * key-fragment. * - * Key-fragments must be valid QAPI names or consist only of digits. + * This constrains inner nodes to be either array or object. The + * constraints must be satisfiable. Counter-example: a.b=1,a=2 is + * not, because root.a must be an object to satisfy a.b=1 and a + * string to satisfy a=2. + * + * Array subscripts can occur in any order, but the set of + * subscripts must not have gaps. For instance, a.1=v is not okay, + * because root.a[0] is missing. + * + * If multiple key-val denote the same leaf, the last one determines + * the value. + * + * Key-fragments must be valid QAPI names or consist only of decimal + * digits. * * The length of any key-fragment must be between 1 and 127. * @@ -47,6 +61,16 @@ * "key absent" already means "optional object/array absent", which * isn't the same as "empty object/array present". * + * Design flaw: scalar values can only be strings; there is no way to + * denote numbers, true, false or null. The special QObject input + * visitor returned by qobject_input_visitor_new_keyval() mostly hides + * this by automatically converting strings to the type the visitor + * expects. Breaks down for alternate types and type 'any', where the + * visitor's expectation isn't clear. Code visiting such types needs + * to do the conversion itself, but only when using this keyval + * visitor. Awkward. Alternate types without a string member don't + * work at all. + * * Additional syntax for use with an implied key: * * key-vals-ik = val-no-key [ ',' key-vals ] @@ -64,8 +88,8 @@ /* * Convert @key to a list index. - * Convert all leading digits to a (non-negative) number, capped at - * INT_MAX. + * Convert all leading decimal digits to a (non-negative) number, + * capped at INT_MAX. * If @end is non-null, assign a pointer to the first character after * the number to *@end. * Else, fail if any characters follow. @@ -337,7 +361,8 @@ static QObject *keyval_listify(QDict *cur, GSList *key_of_cur, Error **errp) } /* - * Make a list from @elt[], reporting any missing elements. + * Make a list from @elt[], reporting the first missing element, + * if any. * If we dropped an index >= nelt in the previous loop, this loop * will run into the sentinel and report index @nelt missing. */ diff --git a/util/main-loop.c b/util/main-loop.c index 4534c89308..19cad6b8b6 100644 --- a/util/main-loop.c +++ b/util/main-loop.c @@ -218,9 +218,12 @@ static void glib_pollfds_poll(void) static int os_host_main_loop_wait(int64_t timeout) { + GMainContext *context = g_main_context_default(); int ret; static int spin_counter; + g_main_context_acquire(context); + glib_pollfds_fill(&timeout); /* If the I/O thread is very busy or we are incorrectly busy waiting in @@ -256,6 +259,9 @@ static int os_host_main_loop_wait(int64_t timeout) } glib_pollfds_poll(); + + g_main_context_release(context); + return ret; } #else @@ -412,12 +418,15 @@ static int os_host_main_loop_wait(int64_t timeout) fd_set rfds, wfds, xfds; int nfds; + g_main_context_acquire(context); + /* XXX: need to suppress polling by better using win32 events */ ret = 0; for (pe = first_polling_entry; pe != NULL; pe = pe->next) { ret |= pe->func(pe->opaque); } if (ret != 0) { + g_main_context_release(context); return ret; } @@ -472,6 +481,8 @@ static int os_host_main_loop_wait(int64_t timeout) g_main_context_dispatch(context); } + g_main_context_release(context); + return select_ret || g_poll_ret; } #endif diff --git a/util/oslib-posix.c b/util/oslib-posix.c index 3fe6089c3e..4d9189e9ef 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -55,7 +55,7 @@ #include "qemu/error-report.h" #endif -#define MAX_MEM_PREALLOC_THREAD_COUNT (MIN(sysconf(_SC_NPROCESSORS_ONLN), 16)) +#define MAX_MEM_PREALLOC_THREAD_COUNT 16 struct MemsetThread { char *addr; @@ -381,6 +381,18 @@ static void *do_touch_pages(void *arg) return NULL; } +static inline int get_memset_num_threads(int smp_cpus) +{ + long host_procs = sysconf(_SC_NPROCESSORS_ONLN); + int ret = 1; + + if (host_procs > 0) { + ret = MIN(MIN(host_procs, MAX_MEM_PREALLOC_THREAD_COUNT), smp_cpus); + } + /* In case sysconf() fails, we fall back to single threaded */ + return ret; +} + static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages, int smp_cpus) { @@ -389,7 +401,7 @@ static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages, int i = 0; memset_thread_failed = false; - memset_num_threads = MIN(smp_cpus, MAX_MEM_PREALLOC_THREAD_COUNT); + memset_num_threads = get_memset_num_threads(smp_cpus); memset_thread = g_new0(MemsetThread, memset_num_threads); numpages_per_thread = (numpages / memset_num_threads); size_per_thread = (hpagesize * numpages_per_thread); diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c index 72412e5649..486af9a622 100644 --- a/util/qemu-coroutine.c +++ b/util/qemu-coroutine.c @@ -102,12 +102,12 @@ static void coroutine_delete(Coroutine *co) qemu_coroutine_delete(co); } -void qemu_coroutine_enter(Coroutine *co) +void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co) { Coroutine *self = qemu_coroutine_self(); CoroutineAction ret; - trace_qemu_coroutine_enter(self, co, co->entry_arg); + trace_qemu_aio_coroutine_enter(ctx, self, co, co->entry_arg); if (co->caller) { fprintf(stderr, "Co-routine re-entered recursively\n"); @@ -115,7 +115,7 @@ void qemu_coroutine_enter(Coroutine *co) } co->caller = self; - co->ctx = qemu_get_current_aio_context(); + co->ctx = ctx; /* Store co->ctx before anything that stores co. Matches * barrier in aio_co_wake and qemu_co_mutex_wake. @@ -139,6 +139,11 @@ void qemu_coroutine_enter(Coroutine *co) } } +void qemu_coroutine_enter(Coroutine *co) +{ + qemu_aio_coroutine_enter(qemu_get_current_aio_context(), co); +} + void qemu_coroutine_enter_if_inactive(Coroutine *co) { if (!qemu_coroutine_entered(co)) { diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c index 7c120c45ce..8188d9a8d7 100644 --- a/util/qemu-sockets.c +++ b/util/qemu-sockets.c @@ -25,6 +25,7 @@ #include "qapi/error.h" #include "qemu/sockets.h" #include "qemu/main-loop.h" +#include "qapi/clone-visitor.h" #include "qapi/qobject-input-visitor.h" #include "qapi/qobject-output-visitor.h" #include "qapi-visit.h" @@ -426,8 +427,9 @@ static struct addrinfo *inet_parse_connect_saddr(InetSocketAddress *saddr, * function succeeds, callback will be called when the connection * completes, with the file descriptor on success, or -1 on error. */ -int inet_connect_saddr(InetSocketAddress *saddr, Error **errp, - NonBlockingConnectHandler *callback, void *opaque) +int inet_connect_saddr(InetSocketAddress *saddr, + NonBlockingConnectHandler *callback, void *opaque, + Error **errp) { Error *local_err = NULL; struct addrinfo *res, *e; @@ -658,7 +660,7 @@ int inet_connect(const char *str, Error **errp) addr = inet_parse(str, errp); if (addr != NULL) { - sock = inet_connect_saddr(addr, errp, NULL, NULL); + sock = inet_connect_saddr(addr, NULL, NULL, errp); qapi_free_InetSocketAddress(addr); } return sock; @@ -726,9 +728,10 @@ static int vsock_connect_addr(const struct sockaddr_vm *svm, bool *in_progress, return sock; } -static int vsock_connect_saddr(VsockSocketAddress *vaddr, Error **errp, +static int vsock_connect_saddr(VsockSocketAddress *vaddr, NonBlockingConnectHandler *callback, - void *opaque) + void *opaque, + Error **errp) { struct sockaddr_vm svm; int sock = -1; @@ -817,9 +820,9 @@ static void vsock_unsupported(Error **errp) error_setg(errp, "socket family AF_VSOCK unsupported"); } -static int vsock_connect_saddr(VsockSocketAddress *vaddr, Error **errp, +static int vsock_connect_saddr(VsockSocketAddress *vaddr, NonBlockingConnectHandler *callback, - void *opaque) + void *opaque, Error **errp) { vsock_unsupported(errp); return -1; @@ -909,8 +912,9 @@ err: return -1; } -static int unix_connect_saddr(UnixSocketAddress *saddr, Error **errp, - NonBlockingConnectHandler *callback, void *opaque) +static int unix_connect_saddr(UnixSocketAddress *saddr, + NonBlockingConnectHandler *callback, void *opaque, + Error **errp) { struct sockaddr_un un; ConnectState *connect_state = NULL; @@ -977,8 +981,9 @@ static int unix_listen_saddr(UnixSocketAddress *saddr, return -1; } -static int unix_connect_saddr(UnixSocketAddress *saddr, Error **errp, - NonBlockingConnectHandler *callback, void *opaque) +static int unix_connect_saddr(UnixSocketAddress *saddr, + NonBlockingConnectHandler *callback, void *opaque, + Error **errp) { error_setg(errp, "unix sockets are not available on windows"); errno = ENOTSUP; @@ -1024,7 +1029,7 @@ int unix_connect(const char *path, Error **errp) saddr = g_new0(UnixSocketAddress, 1); saddr->path = g_strdup(path); - sock = unix_connect_saddr(saddr, errp, NULL, NULL); + sock = unix_connect_saddr(saddr, NULL, NULL, errp); qapi_free_UnixSocketAddress(saddr); return sock; } @@ -1073,18 +1078,18 @@ fail: return NULL; } -int socket_connect(SocketAddress *addr, Error **errp, - NonBlockingConnectHandler *callback, void *opaque) +int socket_connect(SocketAddress *addr, NonBlockingConnectHandler *callback, + void *opaque, Error **errp) { int fd; switch (addr->type) { case SOCKET_ADDRESS_KIND_INET: - fd = inet_connect_saddr(addr->u.inet.data, errp, callback, opaque); + fd = inet_connect_saddr(addr->u.inet.data, callback, opaque, errp); break; case SOCKET_ADDRESS_KIND_UNIX: - fd = unix_connect_saddr(addr->u.q_unix.data, errp, callback, opaque); + fd = unix_connect_saddr(addr->u.q_unix.data, callback, opaque, errp); break; case SOCKET_ADDRESS_KIND_FD: @@ -1096,7 +1101,7 @@ int socket_connect(SocketAddress *addr, Error **errp, break; case SOCKET_ADDRESS_KIND_VSOCK: - fd = vsock_connect_saddr(addr->u.vsock.data, errp, callback, opaque); + fd = vsock_connect_saddr(addr->u.vsock.data, callback, opaque, errp); break; default: @@ -1154,6 +1159,10 @@ int socket_dgram(SocketAddress *remote, SocketAddress *local, Error **errp) { int fd; + /* + * TODO SOCKET_ADDRESS_KIND_FD when fd is AF_INET or AF_INET6 + * (although other address families can do SOCK_DGRAM, too) + */ switch (remote->type) { case SOCKET_ADDRESS_KIND_INET: fd = inet_dgram_saddr(remote->u.inet.data, @@ -1307,19 +1316,14 @@ char *socket_address_to_string(struct SocketAddress *addr, Error **errp) { char *buf; InetSocketAddress *inet; - char host_port[INET6_ADDRSTRLEN + 5 + 4]; switch (addr->type) { case SOCKET_ADDRESS_KIND_INET: inet = addr->u.inet.data; if (strchr(inet->host, ':') == NULL) { - snprintf(host_port, sizeof(host_port), "%s:%s", inet->host, - inet->port); - buf = g_strdup(host_port); + buf = g_strdup_printf("%s:%s", inet->host, inet->port); } else { - snprintf(host_port, sizeof(host_port), "[%s]:%s", inet->host, - inet->port); - buf = g_strdup(host_port); + buf = g_strdup_printf("[%s]:%s", inet->host, inet->port); } break; @@ -1338,9 +1342,38 @@ char *socket_address_to_string(struct SocketAddress *addr, Error **errp) break; default: - error_setg(errp, "socket family %d unsupported", - addr->type); - return NULL; + abort(); } return buf; } + +SocketAddress *socket_address_crumple(SocketAddressFlat *addr_flat) +{ + SocketAddress *addr = g_new(SocketAddress, 1); + + switch (addr_flat->type) { + case SOCKET_ADDRESS_FLAT_TYPE_INET: + addr->type = SOCKET_ADDRESS_KIND_INET; + addr->u.inet.data = QAPI_CLONE(InetSocketAddress, + &addr_flat->u.inet); + break; + case SOCKET_ADDRESS_FLAT_TYPE_UNIX: + addr->type = SOCKET_ADDRESS_KIND_UNIX; + addr->u.q_unix.data = QAPI_CLONE(UnixSocketAddress, + &addr_flat->u.q_unix); + break; + case SOCKET_ADDRESS_FLAT_TYPE_VSOCK: + addr->type = SOCKET_ADDRESS_KIND_VSOCK; + addr->u.vsock.data = QAPI_CLONE(VsockSocketAddress, + &addr_flat->u.vsock); + break; + case SOCKET_ADDRESS_FLAT_TYPE_FD: + addr->type = SOCKET_ADDRESS_KIND_FD; + addr->u.fd.data = QAPI_CLONE(String, &addr_flat->u.fd); + break; + default: + abort(); + } + + return addr; +} diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c index 29c3e4dd85..59befd5202 100644 --- a/util/qemu-thread-win32.c +++ b/util/qemu-thread-win32.c @@ -10,6 +10,11 @@ * See the COPYING file in the top-level directory. * */ + +#ifndef _WIN32_WINNT +#define _WIN32_WINNT 0x0600 +#endif + #include "qemu/osdep.h" #include "qemu-common.h" #include "qemu/thread.h" @@ -39,44 +44,30 @@ static void error_exit(int err, const char *msg) void qemu_mutex_init(QemuMutex *mutex) { - mutex->owner = 0; - InitializeCriticalSection(&mutex->lock); + InitializeSRWLock(&mutex->lock); } void qemu_mutex_destroy(QemuMutex *mutex) { - assert(mutex->owner == 0); - DeleteCriticalSection(&mutex->lock); + InitializeSRWLock(&mutex->lock); } void qemu_mutex_lock(QemuMutex *mutex) { - EnterCriticalSection(&mutex->lock); - - /* Win32 CRITICAL_SECTIONs are recursive. Assert that we're not - * using them as such. - */ - assert(mutex->owner == 0); - mutex->owner = GetCurrentThreadId(); + AcquireSRWLockExclusive(&mutex->lock); } int qemu_mutex_trylock(QemuMutex *mutex) { int owned; - owned = TryEnterCriticalSection(&mutex->lock); - if (owned) { - assert(mutex->owner == 0); - mutex->owner = GetCurrentThreadId(); - } + owned = TryAcquireSRWLockExclusive(&mutex->lock); return !owned; } void qemu_mutex_unlock(QemuMutex *mutex) { - assert(mutex->owner == GetCurrentThreadId()); - mutex->owner = 0; - LeaveCriticalSection(&mutex->lock); + ReleaseSRWLockExclusive(&mutex->lock); } void qemu_rec_mutex_init(QemuRecMutex *mutex) @@ -107,124 +98,27 @@ void qemu_rec_mutex_unlock(QemuRecMutex *mutex) void qemu_cond_init(QemuCond *cond) { memset(cond, 0, sizeof(*cond)); - - cond->sema = CreateSemaphore(NULL, 0, LONG_MAX, NULL); - if (!cond->sema) { - error_exit(GetLastError(), __func__); - } - cond->continue_event = CreateEvent(NULL, /* security */ - FALSE, /* auto-reset */ - FALSE, /* not signaled */ - NULL); /* name */ - if (!cond->continue_event) { - error_exit(GetLastError(), __func__); - } + InitializeConditionVariable(&cond->var); } void qemu_cond_destroy(QemuCond *cond) { - BOOL result; - result = CloseHandle(cond->continue_event); - if (!result) { - error_exit(GetLastError(), __func__); - } - cond->continue_event = 0; - result = CloseHandle(cond->sema); - if (!result) { - error_exit(GetLastError(), __func__); - } - cond->sema = 0; + InitializeConditionVariable(&cond->var); } void qemu_cond_signal(QemuCond *cond) { - DWORD result; - - /* - * Signal only when there are waiters. cond->waiters is - * incremented by pthread_cond_wait under the external lock, - * so we are safe about that. - */ - if (cond->waiters == 0) { - return; - } - - /* - * Waiting threads decrement it outside the external lock, but - * only if another thread is executing pthread_cond_broadcast and - * has the mutex. So, it also cannot be decremented concurrently - * with this particular access. - */ - cond->target = cond->waiters - 1; - result = SignalObjectAndWait(cond->sema, cond->continue_event, - INFINITE, FALSE); - if (result == WAIT_ABANDONED || result == WAIT_FAILED) { - error_exit(GetLastError(), __func__); - } + WakeConditionVariable(&cond->var); } void qemu_cond_broadcast(QemuCond *cond) { - BOOLEAN result; - /* - * As in pthread_cond_signal, access to cond->waiters and - * cond->target is locked via the external mutex. - */ - if (cond->waiters == 0) { - return; - } - - cond->target = 0; - result = ReleaseSemaphore(cond->sema, cond->waiters, NULL); - if (!result) { - error_exit(GetLastError(), __func__); - } - - /* - * At this point all waiters continue. Each one takes its - * slice of the semaphore. Now it's our turn to wait: Since - * the external mutex is held, no thread can leave cond_wait, - * yet. For this reason, we can be sure that no thread gets - * a chance to eat *more* than one slice. OTOH, it means - * that the last waiter must send us a wake-up. - */ - WaitForSingleObject(cond->continue_event, INFINITE); + WakeAllConditionVariable(&cond->var); } void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex) { - /* - * This access is protected under the mutex. - */ - cond->waiters++; - - /* - * Unlock external mutex and wait for signal. - * NOTE: we've held mutex locked long enough to increment - * waiters count above, so there's no problem with - * leaving mutex unlocked before we wait on semaphore. - */ - qemu_mutex_unlock(mutex); - WaitForSingleObject(cond->sema, INFINITE); - - /* Now waiters must rendez-vous with the signaling thread and - * let it continue. For cond_broadcast this has heavy contention - * and triggers thundering herd. So goes life. - * - * Decrease waiters count. The mutex is not taken, so we have - * to do this atomically. - * - * All waiters contend for the mutex at the end of this function - * until the signaling thread relinquishes it. To ensure - * each waiter consumes exactly one slice of the semaphore, - * the signaling thread stops until it is told by the last - * waiter that it can go on. - */ - if (InterlockedDecrement(&cond->waiters) == cond->target) { - SetEvent(cond->continue_event); - } - - qemu_mutex_lock(mutex); + SleepConditionVariableSRW(&cond->var, &mutex->lock, INFINITE, 0); } void qemu_sem_init(QemuSemaphore *sem, int init) diff --git a/util/throttle.c b/util/throttle.c index 3817d9b904..3570ed25fc 100644 --- a/util/throttle.c +++ b/util/throttle.c @@ -380,6 +380,14 @@ static void throttle_fix_bucket(LeakyBucket *bkt) } } +/* undo internal bucket parameter changes (see throttle_fix_bucket()) */ +static void throttle_unfix_bucket(LeakyBucket *bkt) +{ + if (bkt->max < bkt->avg) { + bkt->max = 0; + } +} + /* take care of canceling a timer */ static void throttle_cancel_timer(QEMUTimer *timer) { @@ -420,7 +428,13 @@ void throttle_config(ThrottleState *ts, */ void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg) { + int i; + *cfg = ts->cfg; + + for (i = 0; i < BUCKETS_COUNT; i++) { + throttle_unfix_bucket(&cfg->buckets[i]); + } } diff --git a/util/trace-events b/util/trace-events index ac27d94a97..b44ef4f895 100644 --- a/util/trace-events +++ b/util/trace-events @@ -22,7 +22,7 @@ buffer_move(const char *buf, size_t len, const char *from) "%s: %zd bytes from % buffer_free(const char *buf, size_t len) "%s: capacity %zd" # util/qemu-coroutine.c -qemu_coroutine_enter(void *from, void *to, void *opaque) "from %p to %p opaque %p" +qemu_aio_coroutine_enter(void *ctx, void *from, void *to, void *opaque) "ctx %p from %p to %p opaque %p" qemu_coroutine_yield(void *from, void *to) "from %p to %p" qemu_coroutine_terminate(void *co) "self %p" |