326 files changed, 9446 insertions, 4125 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 420505e995..355982b623 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1232,6 +1232,15 @@ M: Samuel Thibault <samuel.thibault@ens-lyon.org>
 S: Maintained
 F: backends/baum.c
 
+Command line option argument parsing
+M: Markus Armbruster <armbru@redhat.com>
+S: Supported
+F: include/qemu/option.h
+F: tests/test-keyval.c
+F: tests/test-qemu-opts.c
+F: util/keyval.c
+F: util/qemu-option.c
+
 Coverity model
 M: Markus Armbruster <armbru@redhat.com>
 S: Supported
@@ -1366,7 +1375,9 @@ X: include/qapi/qmp/
 F: include/qapi/qmp/dispatch.h
 F: tests/qapi-schema/
 F: tests/test-*-visitor.c
+F: tests/test-qapi-*.c
 F: tests/test-qmp-*.c
+F: tests/test-visitor-serialization.c
 F: scripts/qapi*
 F: docs/qapi*
 T: git git://repo.or.cz/qemu/armbru.git qapi-next
@@ -1807,8 +1818,8 @@ S: Supported
 F: tests/image-fuzzer/
 
 Replication
-M: Wen Congyang <wency@cn.fujitsu.com>
-M: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
+M: Wen Congyang <wencongyang2@huawei.com>
+M: Xie Changlong <xiechanglong.d@gmail.com>
 S: Supported
 F: replication*
 F: block/replication.c
diff --git a/Makefile b/Makefile
index f4f90dfad6..31d41a7eae 100644
--- a/Makefile
+++ b/Makefile
@@ -346,7 +346,7 @@ dtc/%:
 	mkdir -p $@
 
 $(SUBDIR_RULES): libqemuutil.a libqemustub.a $(common-obj-y) $(chardev-obj-y) \
-	$(qom-obj-y) $(crypto-aes-obj-$(CONFIG_USER_ONLY)) $(trace-obj-y)
+	$(qom-obj-y) $(crypto-aes-obj-$(CONFIG_USER_ONLY))
 
 ROMSUBDIR_RULES=$(patsubst %,romsubdir-%, $(ROMS))
 # Only keep -O and -g cflags
@@ -366,11 +366,11 @@ Makefile: $(version-obj-y)
 # Build libraries
 
 libqemustub.a: $(stub-obj-y)
-libqemuutil.a: $(util-obj-y)
+libqemuutil.a: $(util-obj-y) $(trace-obj-y)
 
 ######################################################################
 
-COMMON_LDADDS = $(trace-obj-y) libqemuutil.a libqemustub.a
+COMMON_LDADDS = libqemuutil.a libqemustub.a
 
 qemu-img.o: qemu-img-cmds.h
 
@@ -392,7 +392,6 @@ qemu-ga$(EXESUF): QEMU_CFLAGS += -I qga/qapi-generated
 gen-out-type = $(subst .,-,$(suffix $@))
 
 qapi-py = $(SRC_PATH)/scripts/qapi.py $(SRC_PATH)/scripts/ordereddict.py
-qapi-py += $(SRC_PATH)/scripts/qapi2texi.py
 
 qga/qapi-generated/qga-qapi-types.c qga/qapi-generated/qga-qapi-types.h :\
 $(SRC_PATH)/qga/qapi-schema.json $(SRC_PATH)/scripts/qapi-types.py $(qapi-py)
@@ -701,10 +700,12 @@ qemu-monitor-info.texi: $(SRC_PATH)/hmp-commands-info.hx $(SRC_PATH)/scripts/hxt
 qemu-img-cmds.texi: $(SRC_PATH)/qemu-img-cmds.hx $(SRC_PATH)/scripts/hxtool
 	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@")
 
-docs/qemu-qmp-qapi.texi: $(qapi-modules) $(qapi-py)
+docs/qemu-qmp-qapi.texi docs/qemu-ga-qapi.texi: $(SRC_PATH)/scripts/qapi2texi.py $(qapi-py)
+
+docs/qemu-qmp-qapi.texi: $(qapi-modules)
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi2texi.py $< > $@,"GEN","$@")
 
-docs/qemu-ga-qapi.texi: $(SRC_PATH)/qga/qapi-schema.json $(qapi-py)
+docs/qemu-ga-qapi.texi: $(SRC_PATH)/qga/qapi-schema.json
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi2texi.py $< > $@,"GEN","$@")
 
 qemu.1: qemu-doc.texi qemu-options.texi qemu-monitor.texi qemu-monitor-info.texi
diff --git a/Makefile.target b/Makefile.target
index a535980110..465a633367 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -182,8 +182,7 @@ dummy := $(call unnest-vars,.., \
                qom-obj-y \
                io-obj-y \
                common-obj-y \
-               common-obj-m \
-               trace-obj-y)
+               common-obj-m)
 target-obj-y := $(target-obj-y-save)
 all-obj-y += $(common-obj-y)
 all-obj-y += $(target-obj-y)
@@ -195,7 +194,7 @@ all-obj-$(CONFIG_SOFTMMU) += $(io-obj-y)
 
 $(QEMU_PROG_BUILD): config-devices.mak
 
-COMMON_LDADDS = $(trace-obj-y) ../libqemuutil.a ../libqemustub.a
+COMMON_LDADDS = ../libqemuutil.a ../libqemustub.a
 
 # build either PROG or PROGW
 $(QEMU_PROG_BUILD): $(all-obj-y) $(COMMON_LDADDS)
diff --git a/VERSION b/VERSION
index 04cfc0cb14..12ca1d1c12 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.8.91
+2.9.50
diff --git a/backends/cryptodev-builtin.c b/backends/cryptodev-builtin.c
index 82a068e792..657c0ba2f3 100644
--- a/backends/cryptodev-builtin.c
+++ b/backends/cryptodev-builtin.c
@@ -320,10 +320,12 @@ static int cryptodev_builtin_sym_operation(
 
     sess = builtin->sessions[op_info->session_id];
 
-    ret = qcrypto_cipher_setiv(sess->cipher, op_info->iv,
-                               op_info->iv_len, errp);
-    if (ret < 0) {
-        return -VIRTIO_CRYPTO_ERR;
+    if (op_info->iv_len > 0) {
+        ret = qcrypto_cipher_setiv(sess->cipher, op_info->iv,
+                                   op_info->iv_len, errp);
+        if (ret < 0) {
+            return -VIRTIO_CRYPTO_ERR;
+        }
     }
 
     if (sess->direction == VIRTIO_CRYPTO_OP_ENCRYPT) {
@@ -359,8 +361,6 @@ static void cryptodev_builtin_cleanup(
         }
     }
 
-    assert(queues == 1);
-
     for (i = 0; i < queues; i++) {
         cc = backend->conf.peers.ccs[i];
         if (cc) {
diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
index 42efb2f28a..fc4ef46d11 100644
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -51,7 +51,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
 #ifndef CONFIG_LINUX
     error_setg(errp, "-mem-path not supported on this host");
 #else
-    if (!memory_region_size(&backend->mr)) {
+    if (!host_memory_backend_mr_inited(backend)) {
         gchar *path;
         backend->force_prealloc = mem_prealloc;
         path = object_get_canonical_path(OBJECT(backend));
@@ -76,7 +76,7 @@ static void set_mem_path(Object *o, const char *str, Error **errp)
     HostMemoryBackend *backend = MEMORY_BACKEND(o);
     HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
 
-    if (memory_region_size(&backend->mr)) {
+    if (host_memory_backend_mr_inited(backend)) {
         error_setg(errp, "cannot change property value");
         return;
     }
@@ -96,7 +96,7 @@ static void file_memory_backend_set_share(Object *o, bool value, Error **errp)
     HostMemoryBackend *backend = MEMORY_BACKEND(o);
     HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
 
-    if (memory_region_size(&backend->mr)) {
+    if (host_memory_backend_mr_inited(backend)) {
         error_setg(errp, "cannot change property value");
         return;
     }
diff --git a/backends/hostmem.c b/backends/hostmem.c
index 162c2187d8..4606b73849 100644
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -45,7 +45,7 @@ host_memory_backend_set_size(Object *obj, Visitor *v, const char *name,
     Error *local_err = NULL;
     uint64_t value;
 
-    if (memory_region_size(&backend->mr)) {
+    if (host_memory_backend_mr_inited(backend)) {
         error_setg(&local_err, "cannot change property value");
         goto out;
     }
@@ -64,14 +64,6 @@ out:
     error_propagate(errp, local_err);
 }
 
-static uint16List **host_memory_append_node(uint16List **node,
-                                            unsigned long value)
-{
-     *node = g_malloc0(sizeof(**node));
-     (*node)->value = value;
-     return &(*node)->next;
-}
-
 static void
 host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
                                    void *opaque, Error **errp)
@@ -82,23 +74,25 @@ host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
     unsigned long value;
 
     value = find_first_bit(backend->host_nodes, MAX_NODES);
-
-    node = host_memory_append_node(node, value);
-
     if (value == MAX_NODES) {
-        goto out;
+        return;
     }
 
+    *node = g_malloc0(sizeof(**node));
+    (*node)->value = value;
+    node = &(*node)->next;
+
     do {
         value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
         if (value == MAX_NODES) {
             break;
         }
 
-        node = host_memory_append_node(node, value);
+        *node = g_malloc0(sizeof(**node));
+        (*node)->value = value;
+        node = &(*node)->next;
     } while (true);
 
-out:
     visit_type_uint16List(v, name, &host_nodes, errp);
 }
 
@@ -152,7 +146,7 @@ static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
 {
     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 
-    if (!memory_region_size(&backend->mr)) {
+    if (!host_memory_backend_mr_inited(backend)) {
         backend->merge = value;
         return;
     }
@@ -178,7 +172,7 @@ static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
 {
     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
 
-    if (!memory_region_size(&backend->mr)) {
+    if (!host_memory_backend_mr_inited(backend)) {
         backend->dump = value;
         return;
     }
@@ -214,7 +208,7 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value,
         }
     }
 
-    if (!memory_region_size(&backend->mr)) {
+    if (!host_memory_backend_mr_inited(backend)) {
         backend->prealloc = value;
         return;
     }
@@ -243,10 +237,19 @@ static void host_memory_backend_init(Object *obj)
     backend->prealloc = mem_prealloc;
 }
 
+bool host_memory_backend_mr_inited(HostMemoryBackend *backend)
+{
+    /*
+     * NOTE: We forbid zero-length memory backend, so here zero means
+     * "we haven't inited the backend memory region yet".
+     */
+    return memory_region_size(&backend->mr) != 0;
+}
+
 MemoryRegion *
 host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp)
 {
-    return memory_region_size(&backend->mr) ? &backend->mr : NULL;
+    return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL;
 }
 
 void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped)
diff --git a/block.c b/block.c
index 6e906ec53c..5db266be21 100644
--- a/block.c
+++ b/block.c
@@ -192,6 +192,43 @@ void path_combine(char *dest, int dest_size,
     }
 }
 
+bool bdrv_is_read_only(BlockDriverState *bs)
+{
+    return bs->read_only;
+}
+
+int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, Error **errp)
+{
+    /* Do not set read_only if copy_on_read is enabled */
+    if (bs->copy_on_read && read_only) {
+        error_setg(errp, "Can't set node '%s' to r/o with copy-on-read enabled",
+                   bdrv_get_device_or_node_name(bs));
+        return -EINVAL;
+    }
+
+    /* Do not clear read_only if it is prohibited */
+    if (!read_only && !(bs->open_flags & BDRV_O_ALLOW_RDWR)) {
+        error_setg(errp, "Node '%s' is read only",
+                   bdrv_get_device_or_node_name(bs));
+        return -EPERM;
+    }
+
+    return 0;
+}
+
+int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp)
+{
+    int ret = 0;
+
+    ret = bdrv_can_set_read_only(bs, read_only, errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    bs->read_only = read_only;
+    return 0;
+}
+
 void bdrv_get_full_backing_filename_from_filename(const char *backed,
                                                   const char *backing,
                                                   char *dest, size_t sz,
@@ -1157,6 +1194,13 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
     if (file != NULL) {
         filename = blk_bs(file)->filename;
     } else {
+        /*
+         * Caution: while qdict_get_try_str() is fine, getting
+         * non-string types would require more care.  When @options
+         * come from -blockdev or blockdev_add, its members are typed
+         * according to the QAPI schema, but when they come from
+         * -drive, they're all QString.
+         */
         filename = qdict_get_try_str(options, "filename");
     }
 
@@ -1324,6 +1368,13 @@ static int bdrv_fill_options(QDict **options, const char *filename,
     BlockDriver *drv = NULL;
     Error *local_err = NULL;
 
+    /*
+     * Caution: while qdict_get_try_str() is fine, getting non-string
+     * types would require more care.  When @options come from
+     * -blockdev or blockdev_add, its members are typed according to
+     * the QAPI schema, but when they come from -drive, they're all
+     * QString.
+     */
     drvname = qdict_get_try_str(*options, "driver");
     if (drvname) {
         drv = bdrv_find_format(drvname);
@@ -1358,6 +1409,7 @@ static int bdrv_fill_options(QDict **options, const char *filename,
     }
 
     /* Find the right block driver */
+    /* See cautionary note on accessing @options above */
     filename = qdict_get_try_str(*options, "filename");
 
     if (!drvname && protocol) {
@@ -1737,6 +1789,9 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
 {
     BlockDriverState *old_bs = child->bs;
 
+    if (old_bs && new_bs) {
+        assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
+    }
     if (old_bs) {
         if (old_bs->quiesce_counter && child->role->drained_end) {
             child->role->drained_end(child);
@@ -1837,6 +1892,7 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
     bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm);
 
     assert(parent_bs->drv);
+    assert(bdrv_get_aio_context(parent_bs) == bdrv_get_aio_context(child_bs));
     parent_bs->drv->bdrv_child_perm(parent_bs, NULL, child_role,
                                     perm, shared_perm, &perm, &shared_perm);
 
@@ -1987,6 +2043,13 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
     qdict_extract_subqdict(parent_options, &options, bdref_key_dot);
     g_free(bdref_key_dot);
 
+    /*
+     * Caution: while qdict_get_try_str() is fine, getting non-string
+     * types would require more care.  When @parent_options come from
+     * -blockdev or blockdev_add, its members are typed according to
+     * the QAPI schema, but when they come from -drive, they're all
+     * QString.
+     */
     reference = qdict_get_try_str(parent_options, bdref_key);
     if (reference || qdict_haskey(options, "file.filename")) {
         backing_filename[0] = '\0';
@@ -2059,6 +2122,13 @@ bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key,
     qdict_extract_subqdict(options, &image_options, bdref_key_dot);
     g_free(bdref_key_dot);
 
+    /*
+     * Caution: while qdict_get_try_str() is fine, getting non-string
+     * types would require more care.  When @options come from
+     * -blockdev or blockdev_add, its members are typed according to
+     * the QAPI schema, but when they come from -drive, they're all
+     * QString.
+     */
     reference = qdict_get_try_str(options, bdref_key);
     if (!filename && !reference && !qdict_size(image_options)) {
         if (!allow_none) {
@@ -2274,9 +2344,13 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
         goto fail;
     }
 
-    /* Set the BDRV_O_RDWR and BDRV_O_ALLOW_RDWR flags.
-     * FIXME: we're parsing the QDict to avoid having to create a
-     * QemuOpts just for this, but neither option is optimal. */
+    /*
+     * Set the BDRV_O_RDWR and BDRV_O_ALLOW_RDWR flags.
+     * Caution: getting a boolean member of @options requires care.
+     * When @options come from -blockdev or blockdev_add, members are
+     * typed according to the QAPI schema, but when they come from
+     * -drive, they're all QString.
+     */
     if (g_strcmp0(qdict_get_try_str(options, BDRV_OPT_READ_ONLY), "on") &&
         !qdict_get_try_bool(options, BDRV_OPT_READ_ONLY, false)) {
         flags |= (BDRV_O_RDWR | BDRV_O_ALLOW_RDWR);
@@ -2298,6 +2372,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
     options = qdict_clone_shallow(options);
 
     /* Find the right image format driver */
+    /* See cautionary note on accessing @options above */
     drvname = qdict_get_try_str(options, "driver");
     if (drvname) {
         drv = bdrv_find_format(drvname);
@@ -2309,6 +2384,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
 
     assert(drvname || !(flags & BDRV_O_PROTOCOL));
 
+    /* See cautionary note on accessing @options above */
     backing = qdict_get_try_str(options, "backing");
     if (backing && *backing == '\0') {
         flags |= BDRV_O_NO_BACKING;
@@ -2713,6 +2789,7 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
     BlockDriver *drv;
     QemuOpts *opts;
     const char *value;
+    bool read_only;
 
     assert(reopen_state != NULL);
     assert(reopen_state->bs->drv != NULL);
@@ -2741,12 +2818,13 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
         qdict_put(reopen_state->options, "driver", qstring_from_str(value));
     }
 
-    /* if we are to stay read-only, do not allow permission change
-     * to r/w */
-    if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
-        reopen_state->flags & BDRV_O_RDWR) {
-        error_setg(errp, "Node '%s' is read only",
-                   bdrv_get_device_or_node_name(reopen_state->bs));
+    /* If we are to stay read-only, do not allow permission change
+     * to r/w. Attempting to set to r/w may fail if either BDRV_O_ALLOW_RDWR is
+     * not set, or if the BDS still has copy_on_read enabled */
+    read_only = !(reopen_state->flags & BDRV_O_RDWR);
+    ret = bdrv_can_set_read_only(reopen_state->bs, read_only, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
         goto error;
     }
 
@@ -2787,6 +2865,13 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
         do {
             QString *new_obj = qobject_to_qstring(entry->value);
             const char *new = qstring_get_str(new_obj);
+            /*
+             * Caution: while qdict_get_try_str() is fine, getting
+             * non-string types would require more care.  When
+             * bs->options come from -blockdev or blockdev_add, its
+             * members are typed according to the QAPI schema, but
+             * when they come from -drive, they're all QString.
+             */
             const char *old = qdict_get_try_str(reopen_state->bs->options,
                                                 entry->key);
 
@@ -3228,7 +3313,11 @@ int bdrv_truncate(BdrvChild *child, int64_t offset)
     BlockDriver *drv = bs->drv;
     int ret;
 
-    assert(child->perm & BLK_PERM_RESIZE);
+    /* FIXME: Some format block drivers use this function instead of implicitly
+     *        growing their file by writing beyond its end.
+     *        See bdrv_aligned_pwritev() for an explanation why we currently
+     *        cannot assert this permission in that case. */
+    // assert(child->perm & BLK_PERM_RESIZE);
 
     if (!drv)
         return -ENOMEDIUM;
@@ -3305,11 +3394,6 @@ void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
     *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
 }
 
-bool bdrv_is_read_only(BlockDriverState *bs)
-{
-    return bs->read_only;
-}
-
 bool bdrv_is_sg(BlockDriverState *bs)
 {
     return bs->sg;
@@ -4111,8 +4195,8 @@ bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
 
 void bdrv_img_create(const char *filename, const char *fmt,
                      const char *base_filename, const char *base_fmt,
-                     char *options, uint64_t img_size, int flags,
-                     Error **errp, bool quiet)
+                     char *options, uint64_t img_size, int flags, bool quiet,
+                     Error **errp)
 {
     QemuOptsList *create_opts = NULL;
     QemuOpts *opts = NULL;
@@ -4278,6 +4362,11 @@ AioContext *bdrv_get_aio_context(BlockDriverState *bs)
     return bs->aio_context;
 }
 
+void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co)
+{
+    aio_co_enter(bdrv_get_aio_context(bs), co);
+}
+
 static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban)
 {
     QLIST_REMOVE(ban, list);
@@ -4350,11 +4439,12 @@ void bdrv_attach_aio_context(BlockDriverState *bs,
 
 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
 {
-    AioContext *ctx;
+    AioContext *ctx = bdrv_get_aio_context(bs);
 
+    aio_disable_external(ctx);
+    bdrv_parent_drained_begin(bs);
     bdrv_drain(bs); /* ensure there are no in-flight requests */
 
-    ctx = bdrv_get_aio_context(bs);
     while (aio_poll(ctx, false)) {
         /* wait for all bottom halves to execute */
     }
@@ -4366,6 +4456,8 @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
      */
     aio_context_acquire(new_context);
     bdrv_attach_aio_context(bs, new_context);
+    bdrv_parent_drained_end(bs);
+    aio_enable_external(ctx);
     aio_context_release(new_context);
 }
 
diff --git a/block/Makefile.objs b/block/Makefile.objs
index de96f8ee80..ea955302c8 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -19,6 +19,7 @@ block-obj-$(CONFIG_LIBNFS) += nfs.o
 block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
+block-obj-$(CONFIG_VXHS) += vxhs.o
 block-obj-$(CONFIG_LIBSSH2) += ssh.o
 block-obj-y += accounting.o dirty-bitmap.o
 block-obj-y += write-threshold.o
@@ -38,6 +39,7 @@ rbd.o-cflags       := $(RBD_CFLAGS)
 rbd.o-libs         := $(RBD_LIBS)
 gluster.o-cflags   := $(GLUSTERFS_CFLAGS)
 gluster.o-libs     := $(GLUSTERFS_LIBS)
+vxhs.o-libs        := $(VXHS_LIBS)
 ssh.o-cflags       := $(LIBSSH2_CFLAGS)
 ssh.o-libs         := $(LIBSSH2_LIBS)
 block-obj-$(if $(CONFIG_BZIP2),m,n) += dmg-bz2.o
diff --git a/block/block-backend.c b/block/block-backend.c
index 5742c09c2c..7405024e08 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -61,10 +61,13 @@ struct BlockBackend {
 
     uint64_t perm;
     uint64_t shared_perm;
+    bool disable_perm;
 
     bool allow_write_beyond_eof;
 
     NotifierList remove_bs_notifiers, insert_bs_notifiers;
+
+    int quiesce_counter;
 };
 
 typedef struct BlockBackendAIOCB {
@@ -228,6 +231,9 @@ static void blk_delete(BlockBackend *blk)
     assert(!blk->refcnt);
     assert(!blk->name);
     assert(!blk->dev);
+    if (blk->public.throttle_state) {
+        blk_io_limits_disable(blk);
+    }
     if (blk->root) {
         blk_remove_bs(blk);
     }
@@ -576,7 +582,7 @@ int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
 {
     int ret;
 
-    if (blk->root) {
+    if (blk->root && !blk->disable_perm) {
         ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp);
         if (ret < 0) {
             return ret;
@@ -595,15 +601,52 @@ void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
     *shared_perm = blk->shared_perm;
 }
 
+/*
+ * Notifies the user of all BlockBackends that migration has completed. qdev
+ * devices can tighten their permissions in response (specifically revoke
+ * shared write permissions that we needed for storage migration).
+ *
+ * If an error is returned, the VM cannot be allowed to be resumed.
+ */
+void blk_resume_after_migration(Error **errp)
+{
+    BlockBackend *blk;
+    Error *local_err = NULL;
+
+    for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) {
+        if (!blk->disable_perm) {
+            continue;
+        }
+
+        blk->disable_perm = false;
+
+        blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            blk->disable_perm = true;
+            return;
+        }
+    }
+}
+
 static int blk_do_attach_dev(BlockBackend *blk, void *dev)
 {
     if (blk->dev) {
         return -EBUSY;
     }
+
+    /* While migration is still incoming, we don't need to apply the
+     * permissions of guest device BlockBackends. We might still have a block
+     * job or NBD server writing to the image for storage migration. */
+    if (runstate_check(RUN_STATE_INMIGRATE)) {
+        blk->disable_perm = true;
+    }
+
     blk_ref(blk);
     blk->dev = dev;
     blk->legacy_dev = false;
     blk_iostatus_reset(blk);
+
     return 0;
 }
 
@@ -699,12 +742,17 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
                      void *opaque)
 {
     /* All drivers that use blk_set_dev_ops() are qdevified and we want to keep
-     * it that way, so we can assume blk->dev is a DeviceState if blk->dev_ops
-     * is set. */
+     * it that way, so we can assume blk->dev, if present, is a DeviceState if
+     * blk->dev_ops is set. Non-device users may use dev_ops without device. */
     assert(!blk->legacy_dev);
 
     blk->dev_ops = ops;
     blk->dev_opaque = opaque;
+
+    /* Are we currently quiesced? Should we enforce this right now? */
+    if (blk->quiesce_counter && ops->drained_begin) {
+        ops->drained_begin(opaque);
+    }
 }
 
 /*
@@ -1000,7 +1048,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
         co_entry(&rwco);
     } else {
         Coroutine *co = qemu_coroutine_create(co_entry, &rwco);
-        qemu_coroutine_enter(co);
+        bdrv_coroutine_enter(blk_bs(blk), co);
         BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
     }
 
@@ -1107,7 +1155,7 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
     acb->has_returned = false;
 
     co = qemu_coroutine_create(co_entry, acb);
-    qemu_coroutine_enter(co);
+    bdrv_coroutine_enter(blk_bs(blk), co);
 
     acb->has_returned = true;
     if (acb->rwco.ret != NOT_DONE) {
@@ -1870,6 +1918,12 @@ static void blk_root_drained_begin(BdrvChild *child)
 {
     BlockBackend *blk = child->opaque;
 
+    if (++blk->quiesce_counter == 1) {
+        if (blk->dev_ops && blk->dev_ops->drained_begin) {
+            blk->dev_ops->drained_begin(blk->dev_opaque);
+        }
+    }
+
     /* Note that blk->root may not be accessible here yet if we are just
      * attaching to a BlockDriverState that is drained. Use child instead. */
 
@@ -1881,7 +1935,14 @@ static void blk_root_drained_begin(BdrvChild *child)
 static void blk_root_drained_end(BdrvChild *child)
 {
     BlockBackend *blk = child->opaque;
+    assert(blk->quiesce_counter);
 
     assert(blk->public.io_limits_disabled);
     --blk->public.io_limits_disabled;
+
+    if (--blk->quiesce_counter == 0) {
+        if (blk->dev_ops && blk->dev_ops->drained_end) {
+            blk->dev_ops->drained_end(blk->dev_opaque);
+        }
+    }
 }
diff --git a/block/bochs.c b/block/bochs.c
index 516da56c3b..a759b6eff0 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -110,7 +110,10 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
         return -EINVAL;
     }
 
-    bs->read_only = true; /* no write support yet */
+    ret = bdrv_set_read_only(bs, true, errp); /* no write support yet */
+    if (ret < 0) {
+        return ret;
+    }
 
     ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs));
     if (ret < 0) {
diff --git a/block/cloop.c b/block/cloop.c
index a6c7b9dbe6..d6597fcf78 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -72,7 +72,10 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
         return -EINVAL;
     }
 
-    bs->read_only = true;
+    ret = bdrv_set_read_only(bs, true, errp);
+    if (ret < 0) {
+        return ret;
+    }
 
     /* read header */
     ret = bdrv_pread(bs->file, 128, &s->block_size, 4);
diff --git a/block/commit.c b/block/commit.c
index 28324820a4..91d2c344f6 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -335,6 +335,8 @@ void commit_start(const char *job_id, BlockDriverState *bs,
     if (commit_top_bs == NULL) {
         goto fail;
     }
+    commit_top_bs->total_sectors = top->total_sectors;
+    bdrv_set_aio_context(commit_top_bs, bdrv_get_aio_context(top));
 
     bdrv_set_backing_hd(commit_top_bs, top, &local_err);
     if (local_err) {
@@ -482,6 +484,7 @@ int bdrv_commit(BlockDriverState *bs)
         error_report_err(local_err);
         goto ro_cleanup;
     }
+    bdrv_set_aio_context(commit_top_bs, bdrv_get_aio_context(backing_file_bs));
 
     bdrv_set_backing_hd(commit_top_bs, backing_file_bs, &error_abort);
     bdrv_set_backing_hd(bs, commit_top_bs, &error_abort);
diff --git a/block/crypto.c b/block/crypto.c
index 4a2038888d..34549b28a5 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -56,11 +56,11 @@ static int block_crypto_probe_generic(QCryptoBlockFormat format,
 
 
 static ssize_t block_crypto_read_func(QCryptoBlock *block,
+                                      void *opaque,
                                       size_t offset,
                                       uint8_t *buf,
                                       size_t buflen,
-                                      Error **errp,
-                                      void *opaque)
+                                      Error **errp)
 {
     BlockDriverState *bs = opaque;
     ssize_t ret;
@@ -83,11 +83,11 @@ struct BlockCryptoCreateData {
 
 
 static ssize_t block_crypto_write_func(QCryptoBlock *block,
+                                       void *opaque,
                                        size_t offset,
                                        const uint8_t *buf,
                                        size_t buflen,
-                                       Error **errp,
-                                       void *opaque)
+                                       Error **errp)
 {
     struct BlockCryptoCreateData *data = opaque;
     ssize_t ret;
@@ -102,9 +102,9 @@ static ssize_t block_crypto_write_func(QCryptoBlock *block,
 
 
 static ssize_t block_crypto_init_func(QCryptoBlock *block,
+                                      void *opaque,
                                       size_t headerlen,
-                                      Error **errp,
-                                      void *opaque)
+                                      Error **errp)
 {
     struct BlockCryptoCreateData *data = opaque;
     int ret;
diff --git a/block/curl.c b/block/curl.c
index 34dbd335f4..2708d57c2f 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -659,6 +659,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
     const char *cookie;
     double d;
     const char *secretid;
+    const char *protocol_delimiter;
 
     static int inited = 0;
 
@@ -700,6 +701,15 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
         goto out_noclean;
     }
 
+    if (!strstart(file, bs->drv->protocol_name, &protocol_delimiter) ||
+        !strstart(protocol_delimiter, "://", NULL))
+    {
+        error_setg(errp, "%s curl driver cannot handle the URL '%s' (does not "
+                   "start with '%s://')", bs->drv->protocol_name, file,
+                   bs->drv->protocol_name);
+        goto out_noclean;
+    }
+
     s->username = g_strdup(qemu_opt_get(opts, CURL_BLOCK_OPT_USERNAME));
     secretid = qemu_opt_get(opts, CURL_BLOCK_OPT_PASSWORD_SECRET);
 
diff --git a/block/dmg.c b/block/dmg.c
index a7d25fc47b..900ae5a678 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -419,8 +419,12 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
         return -EINVAL;
     }
 
+    ret = bdrv_set_read_only(bs, true, errp);
+    if (ret < 0) {
+        return ret;
+    }
+
     block_module_load_one("dmg-bz2");
-    bs->read_only = true;
 
     s->n_chunks = 0;
     s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL;
diff --git a/block/file-posix.c b/block/file-posix.c
index 53febd3767..0c4896876e 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -144,6 +144,7 @@ typedef struct BDRVRawState {
     bool has_write_zeroes:1;
     bool discard_zeroes:1;
     bool use_linux_aio:1;
+    bool page_cache_inconsistent:1;
     bool has_fallocate;
     bool needs_alignment;
 } BDRVRawState;
@@ -219,28 +220,28 @@ static int probe_logical_blocksize(int fd, unsigned int *sector_size_p)
 {
     unsigned int sector_size;
     bool success = false;
+    int i;
 
     errno = ENOTSUP;
-
-    /* Try a few ioctls to get the right size */
+    static const unsigned long ioctl_list[] = {
 #ifdef BLKSSZGET
-    if (ioctl(fd, BLKSSZGET, &sector_size) >= 0) {
-        *sector_size_p = sector_size;
-        success = true;
-    }
+        BLKSSZGET,
 #endif
 #ifdef DKIOCGETBLOCKSIZE
-    if (ioctl(fd, DKIOCGETBLOCKSIZE, &sector_size) >= 0) {
-        *sector_size_p = sector_size;
-        success = true;
-    }
+        DKIOCGETBLOCKSIZE,
 #endif
 #ifdef DIOCGSECTORSIZE
-    if (ioctl(fd, DIOCGSECTORSIZE, &sector_size) >= 0) {
-        *sector_size_p = sector_size;
-        success = true;
-    }
+        DIOCGSECTORSIZE,
 #endif
+    };
+
+    /* Try a few ioctls to get the right size */
+    for (i = 0; i < (int)ARRAY_SIZE(ioctl_list); i++) {
+        if (ioctl(fd, ioctl_list[i], &sector_size) >= 0) {
+            *sector_size_p = sector_size;
+            success = true;
+        }
+    }
 
     return success ? 0 : -errno;
 }
@@ -824,10 +825,31 @@ static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
 
 static ssize_t handle_aiocb_flush(RawPosixAIOData *aiocb)
 {
+    BDRVRawState *s = aiocb->bs->opaque;
     int ret;
 
+    if (s->page_cache_inconsistent) {
+        return -EIO;
+    }
+
     ret = qemu_fdatasync(aiocb->aio_fildes);
     if (ret == -1) {
+        /* There is no clear definition of the semantics of a failing fsync(),
+         * so we may have to assume the worst. The sad truth is that this
+         * assumption is correct for Linux. Some pages are now probably marked
+         * clean in the page cache even though they are inconsistent with the
+         * on-disk contents. The next fdatasync() call would succeed, but no
+         * further writeback attempt will be made. We can't get back to a state
+         * in which we know what is on disk (we would have to rewrite
+         * everything that was touched since the last fdatasync() at least), so
+         * make bdrv_flush() fail permanently. Given that the behaviour isn't
+         * really defined, I have little hope that other OSes are doing better.
+         *
+         * Obviously, this doesn't affect O_DIRECT, which bypasses the page
+         * cache. */
+        if ((s->open_flags & O_DIRECT) == 0) {
+            s->page_cache_inconsistent = true;
+        }
         return -errno;
     }
     return 0;
@@ -2171,6 +2193,12 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
     int ret;
 
 #if defined(__APPLE__) && defined(__MACH__)
+    /*
+     * Caution: while qdict_get_str() is fine, getting non-string types
+     * would require more care.  When @options come from -blockdev or
+     * blockdev_add, its members are typed according to the QAPI
+     * schema, but when they come from -drive, they're all QString.
+     */
     const char *filename = qdict_get_str(options, "filename");
     char bsd_path[MAXPATHLEN] = "";
     bool error_occurred = false;
diff --git a/block/gluster.c b/block/gluster.c
index a577daef10..cf29b5f9a4 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -412,10 +412,12 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
     glfs_set_preopened(gconf->volume, glfs);
 
     for (server = gconf->server; server; server = server->next) {
-        if (server->value->type  == SOCKET_ADDRESS_FLAT_TYPE_UNIX) {
+        switch (server->value->type) {
+        case SOCKET_ADDRESS_FLAT_TYPE_UNIX:
             ret = glfs_set_volfile_server(glfs, "unix",
                                    server->value->u.q_unix.path, 0);
-        } else {
+            break;
+        case SOCKET_ADDRESS_FLAT_TYPE_INET:
             if (parse_uint_full(server->value->u.inet.port, &port, 10) < 0 ||
                 port > 65535) {
                 error_setg(errp, "'%s' is not a valid port number",
@@ -426,6 +428,11 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
             ret = glfs_set_volfile_server(glfs, "tcp",
                                    server->value->u.inet.host,
                                    (int)port);
+            break;
+        case SOCKET_ADDRESS_FLAT_TYPE_VSOCK:
+        case SOCKET_ADDRESS_FLAT_TYPE_FD:
+        default:
+            abort();
         }
 
         if (ret < 0) {
@@ -487,7 +494,7 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
     char *str = NULL;
     const char *ptr;
     size_t num_servers;
-    int i;
+    int i, type;
 
     /* create opts info from runtime_json_opts list */
     opts = qemu_opts_create(&runtime_json_opts, NULL, 0, &error_abort);
@@ -539,16 +546,17 @@ static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
         if (!strcmp(ptr, "tcp")) {
             ptr = "inet";       /* accept legacy "tcp" */
         }
-        gsconf->type = qapi_enum_parse(SocketAddressFlatType_lookup, ptr,
-                                       SOCKET_ADDRESS_FLAT_TYPE__MAX, -1,
-                                       &local_err);
-        if (local_err) {
-            error_append_hint(&local_err,
-                              "Parameter '%s' may be 'inet' or 'unix'\n",
-                              GLUSTER_OPT_TYPE);
+        type = qapi_enum_parse(SocketAddressFlatType_lookup, ptr,
+                               SOCKET_ADDRESS_FLAT_TYPE__MAX, -1, NULL);
+        if (type != SOCKET_ADDRESS_FLAT_TYPE_INET
+            && type != SOCKET_ADDRESS_FLAT_TYPE_UNIX) {
+            error_setg(&local_err,
+                       "Parameter '%s' may be 'inet' or 'unix'",
+                       GLUSTER_OPT_TYPE);
             error_append_hint(&local_err, GERR_INDEX_HINT, i);
             goto out;
         }
+        gsconf->type = type;
         qemu_opts_del(opts);
 
         if (gsconf->type == SOCKET_ADDRESS_FLAT_TYPE_INET) {
diff --git a/block/io.c b/block/io.c
index 2709a7007f..a7142e00e8 100644
--- a/block/io.c
+++ b/block/io.c
@@ -44,7 +44,7 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque);
 static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
     int64_t offset, int count, BdrvRequestFlags flags);
 
-static void bdrv_parent_drained_begin(BlockDriverState *bs)
+void bdrv_parent_drained_begin(BlockDriverState *bs)
 {
     BdrvChild *c;
 
@@ -55,7 +55,7 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs)
     }
 }
 
-static void bdrv_parent_drained_end(BlockDriverState *bs)
+void bdrv_parent_drained_end(BlockDriverState *bs)
 {
     BdrvChild *c;
 
@@ -158,7 +158,7 @@ bool bdrv_requests_pending(BlockDriverState *bs)
 
 static bool bdrv_drain_recurse(BlockDriverState *bs)
 {
-    BdrvChild *child;
+    BdrvChild *child, *tmp;
     bool waited;
 
     waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);
@@ -167,8 +167,25 @@ static bool bdrv_drain_recurse(BlockDriverState *bs)
         bs->drv->bdrv_drain(bs);
     }
 
-    QLIST_FOREACH(child, &bs->children, next) {
-        waited |= bdrv_drain_recurse(child->bs);
+    QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
+        BlockDriverState *bs = child->bs;
+        bool in_main_loop =
+            qemu_get_current_aio_context() == qemu_get_aio_context();
+        assert(bs->refcnt > 0);
+        if (in_main_loop) {
+            /* In case the recursive bdrv_drain_recurse processes a
+             * block_job_defer_to_main_loop BH and modifies the graph,
+             * let's hold a reference to bs until we are done.
+             *
+             * IOThread doesn't have such a BH, and it is not safe to call
+             * bdrv_unref without BQL, so skip doing it there.
+             */
+            bdrv_ref(bs);
+        }
+        waited |= bdrv_drain_recurse(bs);
+        if (in_main_loop) {
+            bdrv_unref(bs);
+        }
     }
 
     return waited;
@@ -616,7 +633,7 @@ static int bdrv_prwv_co(BdrvChild *child, int64_t offset,
         bdrv_rw_co_entry(&rwco);
     } else {
         co = qemu_coroutine_create(bdrv_rw_co_entry, &rwco);
-        qemu_coroutine_enter(co);
+        bdrv_coroutine_enter(child->bs, co);
         BDRV_POLL_WHILE(child->bs, rwco.ret == NOT_DONE);
     }
     return rwco.ret;
@@ -945,7 +962,14 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
     size_t skip_bytes;
     int ret;
 
-    assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
+    /* FIXME We cannot require callers to have write permissions when all they
+     * are doing is a read request. If we did things right, write permissions
+     * would be obtained anyway, but internally by the copy-on-read code. As
+     * long as it is implemented here rather than in a separat filter driver,
+     * the copy-on-read code doesn't have its own BdrvChild, however, for which
+     * it could request permissions. Therefore we have to bypass the permission
+     * system for the moment. */
+    // assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
 
     /* Cover entire cluster so no additional backing file I/O is required when
      * allocating cluster in the image file.
@@ -1338,8 +1362,16 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
     assert(!waited || !req->serialising);
     assert(req->overlap_offset <= offset);
     assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
-    assert(child->perm & BLK_PERM_WRITE);
-    assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);
+    /* FIXME: Block migration uses the BlockBackend of the guest device at a
+     *        point when it has not yet taken write permissions. This will be
+     *        fixed by a future patch, but for now we have to bypass this
+     *        assertion for block migration to work. */
+    // assert(child->perm & BLK_PERM_WRITE);
+    /* FIXME: Because of the above, we also cannot guarantee that all format
+     *        BDS take the BLK_PERM_RESIZE permission on their file BDS, since
+     *        they are not obligated to do so if they do not have any parent
+     *        that has taken the permission to write to them. */
+    // assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);
 
     ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
 
@@ -1873,7 +1905,7 @@ int64_t bdrv_get_block_status_above(BlockDriverState *bs,
     } else {
         co = qemu_coroutine_create(bdrv_get_block_status_above_co_entry,
                                    &data);
-        qemu_coroutine_enter(co);
+        bdrv_coroutine_enter(bs, co);
         BDRV_POLL_WHILE(bs, !data.done);
     }
     return data.ret;
@@ -1999,7 +2031,7 @@ bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
         };
         Coroutine *co = qemu_coroutine_create(bdrv_co_rw_vmstate_entry, &data);
 
-        qemu_coroutine_enter(co);
+        bdrv_coroutine_enter(bs, co);
         while (data.ret == -EINPROGRESS) {
             aio_poll(bdrv_get_aio_context(bs), true);
         }
@@ -2216,7 +2248,7 @@ static BlockAIOCB *bdrv_co_aio_prw_vector(BdrvChild *child,
     acb->is_write = is_write;
 
     co = qemu_coroutine_create(bdrv_co_do_rw, acb);
-    qemu_coroutine_enter(co);
+    bdrv_coroutine_enter(child->bs, co);
 
     bdrv_co_maybe_schedule_bh(acb);
     return &acb->common;
@@ -2247,7 +2279,7 @@ BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
     acb->req.error = -EINPROGRESS;
 
     co = qemu_coroutine_create(bdrv_aio_flush_co_entry, acb);
-    qemu_coroutine_enter(co);
+    bdrv_coroutine_enter(bs, co);
 
     bdrv_co_maybe_schedule_bh(acb);
     return &acb->common;
@@ -2271,16 +2303,17 @@ static void coroutine_fn bdrv_flush_co_entry(void *opaque)
 
 int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
 {
-    int ret;
+    int current_gen;
+    int ret = 0;
+
+    bdrv_inc_in_flight(bs);
 
     if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
         bdrv_is_sg(bs)) {
-        return 0;
+        goto early_exit;
     }
 
-    bdrv_inc_in_flight(bs);
-
-    int current_gen = bs->write_gen;
+    current_gen = bs->write_gen;
 
     /* Wait until any previous flushes are completed */
     while (bs->active_flush_req) {
@@ -2363,6 +2396,7 @@ out:
     /* Return value is ignored - it's ok if wait queue is empty */
     qemu_co_queue_next(&bs->flush_queue);
 
+early_exit:
     bdrv_dec_in_flight(bs);
     return ret;
 }
@@ -2380,7 +2414,7 @@ int bdrv_flush(BlockDriverState *bs)
         bdrv_flush_co_entry(&flush_co);
     } else {
         co = qemu_coroutine_create(bdrv_flush_co_entry, &flush_co);
-        qemu_coroutine_enter(co);
+        bdrv_coroutine_enter(bs, co);
         BDRV_POLL_WHILE(bs, flush_co.ret == NOT_DONE);
     }
 
@@ -2527,7 +2561,7 @@ int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int count)
         bdrv_pdiscard_co_entry(&rwco);
     } else {
         co = qemu_coroutine_create(bdrv_pdiscard_co_entry, &rwco);
-        qemu_coroutine_enter(co);
+        bdrv_coroutine_enter(bs, co);
         BDRV_POLL_WHILE(bs, rwco.ret == NOT_DONE);
     }
 
diff --git a/block/iscsi.c b/block/iscsi.c
index 75d890538e..42fb0b019c 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -103,7 +103,6 @@ typedef struct IscsiTask {
 
 typedef struct IscsiAIOCB {
     BlockAIOCB common;
-    QEMUIOVector *qiov;
     QEMUBH *bh;
     IscsiLun *iscsilun;
     struct scsi_task *task;
@@ -2093,6 +2092,7 @@ static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp)
     BlockDriverState *bs;
     IscsiLun *iscsilun = NULL;
     QDict *bs_options;
+    Error *local_err = NULL;
 
     bs = bdrv_new();
 
@@ -2103,8 +2103,13 @@ static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp)
     iscsilun = bs->opaque;
 
     bs_options = qdict_new();
-    qdict_put(bs_options, "filename", qstring_from_str(filename));
-    ret = iscsi_open(bs, bs_options, 0, NULL);
+    iscsi_parse_filename(filename, bs_options, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+    } else {
+        ret = iscsi_open(bs, bs_options, 0, NULL);
+    }
     QDECREF(bs_options);
 
     if (ret != 0) {
diff --git a/block/mirror.c b/block/mirror.c
index ca4baa510a..9f5eb692fd 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -634,7 +634,8 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
             }
 
             if (s->in_flight >= MAX_IN_FLIGHT) {
-                trace_mirror_yield(s, s->in_flight, s->buf_free_count, -1);
+                trace_mirror_yield(s, UINT64_MAX, s->buf_free_count,
+                                   s->in_flight);
                 mirror_wait_for_io(s);
                 continue;
             }
@@ -809,7 +810,7 @@ static void coroutine_fn mirror_run(void *opaque)
             s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
             if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 ||
                 (cnt == 0 && s->in_flight > 0)) {
-                trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt);
+                trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight);
                 mirror_wait_for_io(s);
                 continue;
             } else if (cnt != 0) {
@@ -1111,10 +1112,11 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
                              BlockdevOnError on_target_error,
                              bool unmap,
                              BlockCompletionFunc *cb,
-                             void *opaque, Error **errp,
+                             void *opaque,
                              const BlockJobDriver *driver,
                              bool is_none_mode, BlockDriverState *base,
-                             bool auto_complete, const char *filter_node_name)
+                             bool auto_complete, const char *filter_node_name,
+                             Error **errp)
 {
     MirrorBlockJob *s;
     BlockDriverState *mirror_top_bs;
@@ -1147,9 +1149,10 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
         return;
     }
     mirror_top_bs->total_sectors = bs->total_sectors;
+    bdrv_set_aio_context(mirror_top_bs, bdrv_get_aio_context(bs));
 
     /* bdrv_append takes ownership of the mirror_top_bs reference, need to keep
-     * it alive until block_job_create() even if bs has no parent. */
+     * it alive until block_job_create() succeeds even if bs has no parent. */
     bdrv_ref(mirror_top_bs);
     bdrv_drained_begin(bs);
     bdrv_append(mirror_top_bs, bs, &local_err);
@@ -1167,10 +1170,12 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
                          BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
                          BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD, speed,
                          creation_flags, cb, opaque, errp);
-    bdrv_unref(mirror_top_bs);
     if (!s) {
         goto fail;
     }
+    /* The block job now has a reference to this node */
+    bdrv_unref(mirror_top_bs);
+
     s->source = bs;
     s->mirror_top_bs = mirror_top_bs;
 
@@ -1241,6 +1246,10 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
 
 fail:
     if (s) {
+        /* Make sure this BDS does not go away until we have completed the graph
+         * changes below */
+        bdrv_ref(mirror_top_bs);
+
         g_free(s->replaces);
         blk_unref(s->target);
         block_job_unref(&s->common);
@@ -1249,6 +1258,8 @@ fail:
     bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL,
                             &error_abort);
     bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort);
+
+    bdrv_unref(mirror_top_bs);
 }
 
 void mirror_start(const char *job_id, BlockDriverState *bs,
@@ -1270,17 +1281,17 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
     base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
     mirror_start_job(job_id, bs, BLOCK_JOB_DEFAULT, target, replaces,
                      speed, granularity, buf_size, backing_mode,
-                     on_source_error, on_target_error, unmap, NULL, NULL, errp,
+                     on_source_error, on_target_error, unmap, NULL, NULL,
                      &mirror_job_driver, is_none_mode, base, false,
-                     filter_node_name);
+                     filter_node_name, errp);
 }
 
 void commit_active_start(const char *job_id, BlockDriverState *bs,
                          BlockDriverState *base, int creation_flags,
                          int64_t speed, BlockdevOnError on_error,
                          const char *filter_node_name,
-                         BlockCompletionFunc *cb, void *opaque, Error **errp,
-                         bool auto_complete)
+                         BlockCompletionFunc *cb, void *opaque,
+                         bool auto_complete, Error **errp)
 {
     int orig_base_flags;
     Error *local_err = NULL;
@@ -1293,9 +1304,9 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
 
     mirror_start_job(job_id, bs, creation_flags, base, NULL, speed, 0, 0,
                      MIRROR_LEAVE_BACKING_CHAIN,
-                     on_error, on_error, true, cb, opaque, &local_err,
+                     on_error, on_error, true, cb, opaque,
                      &commit_active_job_driver, false, base, auto_complete,
-                     filter_node_name);
+                     filter_node_name, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         goto error_restore_flags;
diff --git a/block/nbd-client.c b/block/nbd-client.c
index 0dc12c2d67..1e2952fdae 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -33,17 +33,15 @@
 #define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
 #define INDEX_TO_HANDLE(bs, index)  ((index)  ^ ((uint64_t)(intptr_t)bs))
 
-static void nbd_recv_coroutines_enter_all(BlockDriverState *bs)
+static void nbd_recv_coroutines_enter_all(NBDClientSession *s)
 {
-    NBDClientSession *s = nbd_get_client_session(bs);
     int i;
 
     for (i = 0; i < MAX_NBD_REQUESTS; i++) {
         if (s->recv_coroutine[i]) {
-            qemu_coroutine_enter(s->recv_coroutine[i]);
+            aio_co_wake(s->recv_coroutine[i]);
         }
     }
-    BDRV_POLL_WHILE(bs, s->read_reply_co);
 }
 
 static void nbd_teardown_connection(BlockDriverState *bs)
@@ -58,7 +56,7 @@ static void nbd_teardown_connection(BlockDriverState *bs)
     qio_channel_shutdown(client->ioc,
                          QIO_CHANNEL_SHUTDOWN_BOTH,
                          NULL);
-    nbd_recv_coroutines_enter_all(bs);
+    BDRV_POLL_WHILE(bs, client->read_reply_co);
 
     nbd_client_detach_aio_context(bs);
     object_unref(OBJECT(client->sioc));
@@ -76,7 +74,7 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
     for (;;) {
         assert(s->reply.handle == 0);
         ret = nbd_receive_reply(s->ioc, &s->reply);
-        if (ret < 0) {
+        if (ret <= 0) {
             break;
         }
 
@@ -103,6 +101,8 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
         aio_co_wake(s->recv_coroutine[i]);
         qemu_coroutine_yield();
     }
+
+    nbd_recv_coroutines_enter_all(s);
     s->read_reply_co = NULL;
 }
 
diff --git a/block/nbd-client.h b/block/nbd-client.h
index 8cdfc92e94..891ba44a20 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -30,8 +30,6 @@ typedef struct NBDClientSession {
 
     Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
     NBDReply reply;
-
-    bool is_unix;
 } NBDClientSession;
 
 NBDClientSession *nbd_get_client_session(BlockDriverState *bs);
diff --git a/block/nbd.c b/block/nbd.c
index f478f80b4a..814ab26dce 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -47,7 +47,7 @@ typedef struct BDRVNBDState {
     NBDClientSession client;
 
     /* For nbd_refresh_filename() */
-    SocketAddress *saddr;
+    SocketAddressFlat *saddr;
     char *export, *tlscredsid;
 } BDRVNBDState;
 
@@ -95,7 +95,7 @@ static int nbd_parse_uri(const char *filename, QDict *options)
             goto out;
         }
         qdict_put(options, "server.type", qstring_from_str("unix"));
-        qdict_put(options, "server.data.path",
+        qdict_put(options, "server.path",
                   qstring_from_str(qp->p[0].value));
     } else {
         QString *host;
@@ -116,10 +116,10 @@ static int nbd_parse_uri(const char *filename, QDict *options)
         }
 
         qdict_put(options, "server.type", qstring_from_str("inet"));
-        qdict_put(options, "server.data.host", host);
+        qdict_put(options, "server.host", host);
 
         port_str = g_strdup_printf("%d", uri->port ?: NBD_DEFAULT_PORT);
-        qdict_put(options, "server.data.port", qstring_from_str(port_str));
+        qdict_put(options, "server.port", qstring_from_str(port_str));
         g_free(port_str);
     }
 
@@ -197,7 +197,7 @@ static void nbd_parse_filename(const char *filename, QDict *options,
     /* are we a UNIX or TCP socket? */
     if (strstart(host_spec, "unix:", &unixpath)) {
         qdict_put(options, "server.type", qstring_from_str("unix"));
-        qdict_put(options, "server.data.path", qstring_from_str(unixpath));
+        qdict_put(options, "server.path", qstring_from_str(unixpath));
     } else {
         InetSocketAddress *addr = NULL;
 
@@ -207,8 +207,8 @@ static void nbd_parse_filename(const char *filename, QDict *options,
         }
 
         qdict_put(options, "server.type", qstring_from_str("inet"));
-        qdict_put(options, "server.data.host", qstring_from_str(addr->host));
-        qdict_put(options, "server.data.port", qstring_from_str(addr->port));
+        qdict_put(options, "server.host", qstring_from_str(addr->host));
+        qdict_put(options, "server.port", qstring_from_str(addr->port));
         qapi_free_InetSocketAddress(addr);
     }
 
@@ -248,20 +248,21 @@ static bool nbd_process_legacy_socket_options(QDict *output_options,
         }
 
         qdict_put(output_options, "server.type", qstring_from_str("unix"));
-        qdict_put(output_options, "server.data.path", qstring_from_str(path));
+        qdict_put(output_options, "server.path", qstring_from_str(path));
     } else if (host) {
         qdict_put(output_options, "server.type", qstring_from_str("inet"));
-        qdict_put(output_options, "server.data.host", qstring_from_str(host));
-        qdict_put(output_options, "server.data.port",
+        qdict_put(output_options, "server.host", qstring_from_str(host));
+        qdict_put(output_options, "server.port",
                   qstring_from_str(port ?: stringify(NBD_DEFAULT_PORT)));
     }
 
     return true;
 }
 
-static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, Error **errp)
+static SocketAddressFlat *nbd_config(BDRVNBDState *s, QDict *options,
+                                     Error **errp)
 {
-    SocketAddress *saddr = NULL;
+    SocketAddressFlat *saddr = NULL;
     QDict *addr = NULL;
     QObject *crumpled_addr = NULL;
     Visitor *iv = NULL;
@@ -278,15 +279,21 @@ static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, Error **errp)
         goto done;
     }
 
+    /*
+     * FIXME .numeric, .to, .ipv4 or .ipv6 don't work with -drive
+     * server.type=inet.  .to doesn't matter, it's ignored anyway.
+     * That's because when @options come from -blockdev or
+     * blockdev_add, members are typed according to the QAPI schema,
+     * but when they come from -drive, they're all QString.  The
+     * visitor expects the former.
+     */
     iv = qobject_input_visitor_new(crumpled_addr);
-    visit_type_SocketAddress(iv, NULL, &saddr, &local_err);
+    visit_type_SocketAddressFlat(iv, NULL, &saddr, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         goto done;
     }
 
-    s->client.is_unix = saddr->type == SOCKET_ADDRESS_KIND_UNIX;
-
 done:
     QDECREF(addr);
     qobject_decref(crumpled_addr);
@@ -300,9 +307,10 @@ NBDClientSession *nbd_get_client_session(BlockDriverState *bs)
     return &s->client;
 }
 
-static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
+static QIOChannelSocket *nbd_establish_connection(SocketAddressFlat *saddr_flat,
                                                   Error **errp)
 {
+    SocketAddress *saddr = socket_address_crumple(saddr_flat);
     QIOChannelSocket *sioc;
     Error *local_err = NULL;
 
@@ -312,7 +320,9 @@ static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
     qio_channel_socket_connect_sync(sioc,
                                     saddr,
                                     &local_err);
+    qapi_free_SocketAddress(saddr);
     if (local_err) {
+        object_unref(OBJECT(sioc));
         error_propagate(errp, local_err);
         return NULL;
     }
@@ -403,7 +413,7 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
         goto error;
     }
 
-    /* Translate @host, @port, and @path to a SocketAddress */
+    /* Translate @host, @port, and @path to a SocketAddressFlat */
     if (!nbd_process_legacy_socket_options(options, opts, errp)) {
         goto error;
     }
@@ -423,11 +433,12 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
             goto error;
         }
 
-        if (s->saddr->type != SOCKET_ADDRESS_KIND_INET) {
+        /* TODO SOCKET_ADDRESS_KIND_FD where fd has AF_INET or AF_INET6 */
+        if (s->saddr->type != SOCKET_ADDRESS_FLAT_TYPE_INET) {
             error_setg(errp, "TLS only supported over IP sockets");
             goto error;
         }
-        hostname = s->saddr->u.inet.data->host;
+        hostname = s->saddr->u.inet.host;
     }
 
     /* establish TCP connection, return error if it fails
@@ -450,7 +461,7 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
         object_unref(OBJECT(tlscreds));
     }
     if (ret < 0) {
-        qapi_free_SocketAddress(s->saddr);
+        qapi_free_SocketAddressFlat(s->saddr);
         g_free(s->export);
         g_free(s->tlscredsid);
     }
@@ -476,7 +487,7 @@ static void nbd_close(BlockDriverState *bs)
 
     nbd_client_close(bs);
 
-    qapi_free_SocketAddress(s->saddr);
+    qapi_free_SocketAddressFlat(s->saddr);
     g_free(s->export);
     g_free(s->tlscredsid);
 }
@@ -507,15 +518,15 @@ static void nbd_refresh_filename(BlockDriverState *bs, QDict *options)
     Visitor *ov;
     const char *host = NULL, *port = NULL, *path = NULL;
 
-    if (s->saddr->type == SOCKET_ADDRESS_KIND_INET) {
-        const InetSocketAddress *inet = s->saddr->u.inet.data;
+    if (s->saddr->type == SOCKET_ADDRESS_FLAT_TYPE_INET) {
+        const InetSocketAddress *inet = &s->saddr->u.inet;
         if (!inet->has_ipv4 && !inet->has_ipv6 && !inet->has_to) {
             host = inet->host;
             port = inet->port;
         }
-    } else if (s->saddr->type == SOCKET_ADDRESS_KIND_UNIX) {
-        path = s->saddr->u.q_unix.data->path;
-    }
+    } else if (s->saddr->type == SOCKET_ADDRESS_FLAT_TYPE_UNIX) {
+        path = s->saddr->u.q_unix.path;
+    } /* else can't represent as pseudo-filename */
 
     qdict_put(opts, "driver", qstring_from_str("nbd"));
 
@@ -534,7 +545,7 @@ static void nbd_refresh_filename(BlockDriverState *bs, QDict *options)
     }
 
     ov = qobject_output_visitor_new(&saddr_qdict);
-    visit_type_SocketAddress(ov, NULL, &s->saddr, &error_abort);
+    visit_type_SocketAddressFlat(ov, NULL, &s->saddr, &error_abort);
     visit_complete(ov, &saddr_qdict);
     visit_free(ov);
     qdict_put_obj(opts, "server", saddr_qdict);
diff --git a/block/nfs.c b/block/nfs.c
index 3f43f6e26a..6541dec1fc 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -474,6 +474,13 @@ static NFSServer *nfs_config(QDict *options, Error **errp)
         goto out;
     }
 
+    /*
+     * Caution: this works only because all scalar members of
+     * NFSServer are QString in @crumpled_addr.  The visitor expects
+     * @crumpled_addr to be typed according to the QAPI schema.  It
+     * is when @options come from -blockdev or blockdev_add.  But when
+     * they come from -drive, they're all QString.
+     */
     iv = qobject_input_visitor_new(crumpled_addr);
     visit_type_NFSServer(iv, NULL, &server, &local_error);
     if (local_error) {
@@ -490,7 +497,7 @@ out:
 
 
 static int64_t nfs_client_open(NFSClient *client, QDict *options,
-                               int flags, Error **errp, int open_flags)
+                               int flags, int open_flags, Error **errp)
 {
     int ret = -EINVAL;
     QemuOpts *opts = NULL;
@@ -656,7 +663,7 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
 
     ret = nfs_client_open(client, options,
                           (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
-                          errp, bs->open_flags);
+                          bs->open_flags, errp);
     if (ret < 0) {
         return ret;
     }
@@ -698,7 +705,7 @@ static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp)
         goto out;
     }
 
-    ret = nfs_client_open(client, options, O_CREAT, errp, 0);
+    ret = nfs_client_open(client, options, O_CREAT, 0, errp);
     if (ret < 0) {
         goto out;
     }
diff --git a/block/parallels.c b/block/parallels.c
index 19935e29a9..90acf79687 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -114,7 +114,7 @@ static QemuOptsList parallels_runtime_opts = {
             .name = PARALLELS_OPT_PREALLOC_SIZE,
             .type = QEMU_OPT_SIZE,
             .help = "Preallocation size on image expansion",
-            .def_value_str = "128MiB",
+            .def_value_str = "128M",
         },
         {
             .name = PARALLELS_OPT_PREALLOC_MODE,
@@ -192,8 +192,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
                                  int nb_sectors, int *pnum)
 {
     BDRVParallelsState *s = bs->opaque;
-    uint32_t idx, to_allocate, i;
-    int64_t pos, space;
+    int64_t pos, space, idx, to_allocate, i;
 
     pos = block_status(s, sector_num, nb_sectors, pnum);
     if (pos > 0) {
@@ -201,11 +200,19 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
     }
 
     idx = sector_num / s->tracks;
-    if (idx >= s->bat_size) {
-        return -EINVAL;
-    }
-
     to_allocate = DIV_ROUND_UP(sector_num + *pnum, s->tracks) - idx;
+
+    /* This function is called only by parallels_co_writev(), which will never
+     * pass a sector_num at or beyond the end of the image (because the block
+     * layer never passes such a sector_num to that function). Therefore, idx
+     * is always below s->bat_size.
+     * block_status() will limit *pnum so that sector_num + *pnum will not
+     * exceed the image end. Therefore, idx + to_allocate cannot exceed
+     * s->bat_size.
+     * Note that s->bat_size is an unsigned int, therefore idx + to_allocate
+     * will always fit into a uint32_t. */
+    assert(idx < s->bat_size && idx + to_allocate <= s->bat_size);
+
     space = to_allocate * s->tracks;
     if (s->data_end + space > bdrv_getlength(bs->file->bs) >> BDRV_SECTOR_BITS) {
         int ret;
@@ -687,7 +694,8 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
     if (local_err != NULL) {
         goto fail_options;
     }
-    if (!bdrv_has_zero_init(bs->file->bs) ||
+
+    if (!(flags & BDRV_O_RESIZE) || !bdrv_has_zero_init(bs->file->bs) ||
             bdrv_truncate(bs->file, bdrv_getlength(bs->file->bs)) != 0) {
         s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
     }
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 78c11d4948..100398c565 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1519,12 +1519,10 @@ int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
 
     end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS);
 
-    /* Round start up and end down */
-    offset = align_offset(offset, s->cluster_size);
-    end_offset = start_of_cluster(s, end_offset);
-
-    if (offset > end_offset) {
-        return 0;
+    /* The caller must cluster-align start; round end down except at EOF */
+    assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
+    if (end_offset != bs->total_sectors * BDRV_SECTOR_SIZE) {
+        end_offset = start_of_cluster(s, end_offset);
     }
 
     nb_clusters = size_to_clusters(s, end_offset - offset);
diff --git a/block/rbd.c b/block/rbd.c
index ee13f3d9d3..6471f4fd2b 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -13,14 +13,14 @@
 
 #include "qemu/osdep.h"
 
+#include <rbd/librbd.h>
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "block/block_int.h"
 #include "crypto/secret.h"
 #include "qemu/cutils.h"
 #include "qapi/qmp/qstring.h"
-
-#include <rbd/librbd.h>
+#include "qapi/qmp/qjson.h"
 
 /*
  * When specifying the image filename use:
@@ -56,11 +56,6 @@
 
 #define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER)
 
-#define RBD_MAX_CONF_NAME_SIZE 128
-#define RBD_MAX_CONF_VAL_SIZE 512
-#define RBD_MAX_CONF_SIZE 1024
-#define RBD_MAX_POOL_NAME_SIZE 128
-#define RBD_MAX_SNAP_NAME_SIZE 128
 #define RBD_MAX_SNAPS 100
 
 /* The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h */
@@ -99,43 +94,28 @@ typedef struct BDRVRBDState {
     rados_t cluster;
     rados_ioctx_t io_ctx;
     rbd_image_t image;
-    char name[RBD_MAX_IMAGE_NAME_SIZE];
+    char *image_name;
     char *snap;
 } BDRVRBDState;
 
-static char *qemu_rbd_next_tok(int max_len,
-                               char *src, char delim,
-                               const char *name,
-                               char **p, Error **errp)
+static char *qemu_rbd_next_tok(char *src, char delim, char **p)
 {
-    int l;
     char *end;
 
     *p = NULL;
 
-    if (delim != '\0') {
-        for (end = src; *end; ++end) {
-            if (*end == delim) {
-                break;
-            }
-            if (*end == '\\' && end[1] != '\0') {
-                end++;
-            }
-        }
+    for (end = src; *end; ++end) {
         if (*end == delim) {
-            *p = end + 1;
-            *end = '\0';
+            break;
+        }
+        if (*end == '\\' && end[1] != '\0') {
+            end++;
         }
     }
-    l = strlen(src);
-    if (l >= max_len) {
-        error_setg(errp, "%s too long", name);
-        return NULL;
-    } else if (l == 0) {
-        error_setg(errp, "%s too short", name);
-        return NULL;
+    if (*end == delim) {
+        *p = end + 1;
+        *end = '\0';
     }
-
     return src;
 }
 
@@ -156,26 +136,19 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options,
                                     Error **errp)
 {
     const char *start;
-    char *p, *buf, *keypairs;
+    char *p, *buf;
+    QList *keypairs = NULL;
     char *found_str;
-    size_t max_keypair_size;
-    Error *local_err = NULL;
 
     if (!strstart(filename, "rbd:", &start)) {
         error_setg(errp, "File name must start with 'rbd:'");
         return;
     }
 
-    max_keypair_size = strlen(start) + 1;
     buf = g_strdup(start);
-    keypairs = g_malloc0(max_keypair_size);
     p = buf;
 
-    found_str = qemu_rbd_next_tok(RBD_MAX_POOL_NAME_SIZE, p,
-                                  '/', "pool name", &p, &local_err);
-    if (local_err) {
-        goto done;
-    }
+    found_str = qemu_rbd_next_tok(p, '/', &p);
     if (!p) {
         error_setg(errp, "Pool name is required");
         goto done;
@@ -184,27 +157,15 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options,
     qdict_put(options, "pool", qstring_from_str(found_str));
 
     if (strchr(p, '@')) {
-        found_str = qemu_rbd_next_tok(RBD_MAX_IMAGE_NAME_SIZE, p,
-                                      '@', "object name", &p, &local_err);
-        if (local_err) {
-            goto done;
-        }
+        found_str = qemu_rbd_next_tok(p, '@', &p);
         qemu_rbd_unescape(found_str);
         qdict_put(options, "image", qstring_from_str(found_str));
 
-        found_str = qemu_rbd_next_tok(RBD_MAX_SNAP_NAME_SIZE, p,
-                                      ':', "snap name", &p, &local_err);
-        if (local_err) {
-            goto done;
-        }
+        found_str = qemu_rbd_next_tok(p, ':', &p);
         qemu_rbd_unescape(found_str);
         qdict_put(options, "snapshot", qstring_from_str(found_str));
     } else {
-        found_str = qemu_rbd_next_tok(RBD_MAX_IMAGE_NAME_SIZE, p,
-                                      ':', "object name", &p, &local_err);
-        if (local_err) {
-            goto done;
-        }
+        found_str = qemu_rbd_next_tok(p, ':', &p);
         qemu_rbd_unescape(found_str);
         qdict_put(options, "image", qstring_from_str(found_str));
     }
@@ -212,24 +173,11 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options,
         goto done;
     }
 
-    found_str = qemu_rbd_next_tok(RBD_MAX_CONF_NAME_SIZE, p,
-                                  '\0', "configuration", &p, &local_err);
-    if (local_err) {
-        goto done;
-    }
-
-    p = found_str;
-
     /* The following are essentially all key/value pairs, and we treat
      * 'id' and 'conf' a bit special.  Key/value pairs may be in any order. */
     while (p) {
         char *name, *value;
-        name = qemu_rbd_next_tok(RBD_MAX_CONF_NAME_SIZE, p,
-                                 '=', "conf option name", &p, &local_err);
-        if (local_err) {
-            break;
-        }
-
+        name = qemu_rbd_next_tok(p, '=', &p);
         if (!p) {
             error_setg(errp, "conf option %s has no value", name);
             break;
@@ -237,11 +185,7 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options,
 
         qemu_rbd_unescape(name);
 
-        value = qemu_rbd_next_tok(RBD_MAX_CONF_VAL_SIZE, p,
-                                  ':', "conf option value", &p, &local_err);
-        if (local_err) {
-            break;
-        }
+        value = qemu_rbd_next_tok(p, ':', &p);
         qemu_rbd_unescape(value);
 
         if (!strcmp(name, "conf")) {
@@ -249,36 +193,30 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options,
         } else if (!strcmp(name, "id")) {
             qdict_put(options, "user" , qstring_from_str(value));
         } else {
-            /* FIXME: This is pretty ugly, and not the right way to do this.
-             *        These should be contained in a structure, and then
-             *        passed explicitly as individual key/value pairs to
-             *        rados.  Consider this legacy code that needs to be
-             *        updated. */
-            char *tmp = g_malloc0(max_keypair_size);
-            /* only use a delimiter if it is not the first keypair found */
-            /* These are sets of unknown key/value pairs we'll pass along
-             * to ceph */
-            if (keypairs[0]) {
-                snprintf(tmp, max_keypair_size, ":%s=%s", name, value);
-                pstrcat(keypairs, max_keypair_size, tmp);
-            } else {
-                snprintf(keypairs, max_keypair_size, "%s=%s", name, value);
+            /*
+             * We pass these internally to qemu_rbd_set_keypairs(), so
+             * we can get away with the simpler list of [ "key1",
+             * "value1", "key2", "value2" ] rather than a raw dict
+             * { "key1": "value1", "key2": "value2" } where we can't
+             * guarantee order, or even a more correct but complex
+             * [ { "key1": "value1" }, { "key2": "value2" } ]
+             */
+            if (!keypairs) {
+                keypairs = qlist_new();
             }
-            g_free(tmp);
+            qlist_append(keypairs, qstring_from_str(name));
+            qlist_append(keypairs, qstring_from_str(value));
         }
     }
 
-    if (keypairs[0]) {
-        qdict_put(options, "keyvalue-pairs", qstring_from_str(keypairs));
+    if (keypairs) {
+        qdict_put(options, "=keyvalue-pairs",
+                  qobject_to_json(QOBJECT(keypairs)));
     }
 
-
 done:
-    if (local_err) {
-        error_propagate(errp, local_err);
-    }
     g_free(buf);
-    g_free(keypairs);
+    QDECREF(keypairs);
     return;
 }
 
@@ -302,50 +240,41 @@ static int qemu_rbd_set_auth(rados_t cluster, const char *secretid,
     return 0;
 }
 
-static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs,
+static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs_json,
                                  Error **errp)
 {
-    char *p, *buf;
-    char *name;
-    char *value;
-    Error *local_err = NULL;
+    QList *keypairs;
+    QString *name;
+    QString *value;
+    const char *key;
+    size_t remaining;
     int ret = 0;
 
-    buf = g_strdup(keypairs);
-    p = buf;
-
-    while (p) {
-        name = qemu_rbd_next_tok(RBD_MAX_CONF_NAME_SIZE, p,
-                                 '=', "conf option name", &p, &local_err);
-        if (local_err) {
-            break;
-        }
-
-        if (!p) {
-            error_setg(errp, "conf option %s has no value", name);
-            ret = -EINVAL;
-            break;
-        }
+    if (!keypairs_json) {
+        return ret;
+    }
+    keypairs = qobject_to_qlist(qobject_from_json(keypairs_json,
+                                                  &error_abort));
+    remaining = qlist_size(keypairs) / 2;
+    assert(remaining);
 
-        value = qemu_rbd_next_tok(RBD_MAX_CONF_VAL_SIZE, p,
-                                  ':', "conf option value", &p, &local_err);
-        if (local_err) {
-            break;
-        }
+    while (remaining--) {
+        name = qobject_to_qstring(qlist_pop(keypairs));
+        value = qobject_to_qstring(qlist_pop(keypairs));
+        assert(name && value);
+        key = qstring_get_str(name);
 
-        ret = rados_conf_set(cluster, name, value);
+        ret = rados_conf_set(cluster, key, qstring_get_str(value));
+        QDECREF(name);
+        QDECREF(value);
         if (ret < 0) {
-            error_setg_errno(errp, -ret, "invalid conf option %s", name);
+            error_setg_errno(errp, -ret, "invalid conf option %s", key);
             ret = -EINVAL;
             break;
         }
     }
 
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-    }
-    g_free(buf);
+    QDECREF(keypairs);
     return ret;
 }
 
@@ -365,14 +294,14 @@ static QemuOptsList runtime_opts = {
     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
     .desc = {
         {
-            .name = "filename",
+            .name = "pool",
             .type = QEMU_OPT_STRING,
-            .help = "Specification of the rbd image",
+            .help = "Rados pool name",
         },
         {
-            .name = "password-secret",
+            .name = "image",
             .type = QEMU_OPT_STRING,
-            .help = "ID of secret providing the password",
+            .help = "Image name in the pool",
         },
         {
             .name = "conf",
@@ -380,16 +309,6 @@ static QemuOptsList runtime_opts = {
             .help = "Rados config file location",
         },
         {
-            .name = "pool",
-            .type = QEMU_OPT_STRING,
-            .help = "Rados pool name",
-        },
-        {
-            .name = "image",
-            .type = QEMU_OPT_STRING,
-            .help = "Image name in the pool",
-        },
-        {
             .name = "snapshot",
             .type = QEMU_OPT_STRING,
             .help = "Ceph snapshot name",
@@ -400,23 +319,26 @@ static QemuOptsList runtime_opts = {
             .type = QEMU_OPT_STRING,
             .help = "Rados id name",
         },
+        /*
+         * server.* extracted manually, see qemu_rbd_mon_host()
+         */
         {
-            .name = "keyvalue-pairs",
-            .type = QEMU_OPT_STRING,
-            .help = "Legacy rados key/value option parameters",
-        },
-        {
-            .name = "host",
-            .type = QEMU_OPT_STRING,
-        },
-        {
-            .name = "port",
+            .name = "password-secret",
             .type = QEMU_OPT_STRING,
+            .help = "ID of secret providing the password",
         },
+
+        /*
+         * Keys for qemu_rbd_parse_filename(), not in the QAPI schema
+         */
         {
-            .name = "auth",
+            /*
+             * HACK: name starts with '=' so that qemu_opts_parse()
+             * can't set it
+             */
+            .name = "=keyvalue-pairs",
             .type = QEMU_OPT_STRING,
-            .help = "Supported authentication method, either cephx or none",
+            .help = "Legacy rados key/value option parameters",
         },
         { /* end of list */ }
     },
@@ -428,12 +350,11 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
     int64_t bytes = 0;
     int64_t objsize;
     int obj_order = 0;
-    const char *pool, *name, *conf, *clientname, *keypairs;
+    const char *pool, *image_name, *conf, *user, *keypairs;
     const char *secretid;
     rados_t cluster;
     rados_ioctx_t io_ctx;
     QDict *options = NULL;
-    QemuOpts *rbd_opts = NULL;
     int ret = 0;
 
     secretid = qemu_opt_get(opts, "password-secret");
@@ -464,21 +385,19 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
         goto exit;
     }
 
-    rbd_opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(rbd_opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto exit;
-    }
-
-    pool       = qemu_opt_get(rbd_opts, "pool");
-    conf       = qemu_opt_get(rbd_opts, "conf");
-    clientname = qemu_opt_get(rbd_opts, "user");
-    name       = qemu_opt_get(rbd_opts, "image");
-    keypairs   = qemu_opt_get(rbd_opts, "keyvalue-pairs");
+    /*
+     * Caution: while qdict_get_try_str() is fine, getting non-string
+     * types would require more care.  When @options come from -blockdev
+     * or blockdev_add, its members are typed according to the QAPI
+     * schema, but when they come from -drive, they're all QString.
+     */
+    pool       = qdict_get_try_str(options, "pool");
+    conf       = qdict_get_try_str(options, "conf");
+    user       = qdict_get_try_str(options, "user");
+    image_name = qdict_get_try_str(options, "image");
+    keypairs   = qdict_get_try_str(options, "=keyvalue-pairs");
 
-    ret = rados_create(&cluster, clientname);
+    ret = rados_create(&cluster, user);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "error initializing");
         goto exit;
@@ -515,7 +434,7 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
         goto shutdown;
     }
 
-    ret = rbd_create(io_ctx, name, bytes, &obj_order);
+    ret = rbd_create(io_ctx, image_name, bytes, &obj_order);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "error rbd create");
     }
@@ -527,7 +446,6 @@ shutdown:
 
 exit:
     QDECREF(options);
-    qemu_opts_del(rbd_opts);
     return ret;
 }
 
@@ -578,91 +496,43 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
     qemu_aio_unref(acb);
 }
 
-#define RBD_MON_HOST          0
-#define RBD_AUTH_SUPPORTED    1
-
-static char *qemu_rbd_array_opts(QDict *options, const char *prefix, int type,
-                                 Error **errp)
+static char *qemu_rbd_mon_host(QDict *options, Error **errp)
 {
-    int num_entries;
-    QemuOpts *opts = NULL;
-    QDict *sub_options;
-    const char *host;
-    const char *port;
-    char *str;
-    char *rados_str = NULL;
-    Error *local_err = NULL;
+    const char **vals = g_new(const char *, qdict_size(options) + 1);
+    char keybuf[32];
+    const char *host, *port;
+    char *rados_str;
     int i;
 
-    assert(type == RBD_MON_HOST || type == RBD_AUTH_SUPPORTED);
-
-    num_entries = qdict_array_entries(options, prefix);
-
-    if (num_entries < 0) {
-        error_setg(errp, "Parse error on RBD QDict array");
-        return NULL;
-    }
-
-    for (i = 0; i < num_entries; i++) {
-        char *strbuf = NULL;
-        const char *value;
-        char *rados_str_tmp;
-
-        str = g_strdup_printf("%s%d.", prefix, i);
-        qdict_extract_subqdict(options, &sub_options, str);
-        g_free(str);
-
-        opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
-        qemu_opts_absorb_qdict(opts, sub_options, &local_err);
-        QDECREF(sub_options);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            g_free(rados_str);
+    for (i = 0;; i++) {
+        sprintf(keybuf, "server.%d.host", i);
+        host = qdict_get_try_str(options, keybuf);
+        qdict_del(options, keybuf);
+        sprintf(keybuf, "server.%d.port", i);
+        port = qdict_get_try_str(options, keybuf);
+        qdict_del(options, keybuf);
+        if (!host && !port) {
+            break;
+        }
+        if (!host) {
+            error_setg(errp, "Parameter server.%d.host is missing", i);
             rados_str = NULL;
-            goto exit;
+            goto out;
         }
 
-        if (type == RBD_MON_HOST) {
-            host = qemu_opt_get(opts, "host");
-            port = qemu_opt_get(opts, "port");
-
-            value = host;
-            if (port) {
-                /* check for ipv6 */
-                if (strchr(host, ':')) {
-                    strbuf = g_strdup_printf("[%s]:%s", host, port);
-                } else {
-                    strbuf = g_strdup_printf("%s:%s", host, port);
-                }
-                value = strbuf;
-            } else if (strchr(host, ':')) {
-                strbuf = g_strdup_printf("[%s]", host);
-                value = strbuf;
-            }
+        if (strchr(host, ':')) {
+            vals[i] = port ? g_strdup_printf("[%s]:%s", host, port)
+                : g_strdup_printf("[%s]", host);
         } else {
-            value = qemu_opt_get(opts, "auth");
+            vals[i] = port ? g_strdup_printf("%s:%s", host, port)
+                : g_strdup(host);
         }
-
-
-        /* each iteration in the for loop will build upon the string, and if
-         * rados_str is NULL then it is our first pass */
-        if (rados_str) {
-            /* separate options with ';', as that  is what rados_conf_set()
-             * requires */
-            rados_str_tmp = rados_str;
-            rados_str = g_strdup_printf("%s;%s", rados_str_tmp, value);
-            g_free(rados_str_tmp);
-        } else {
-            rados_str = g_strdup(value);
-        }
-
-        g_free(strbuf);
-        qemu_opts_del(opts);
-        opts = NULL;
     }
+    vals[i] = NULL;
 
-exit:
-    qemu_opts_del(opts);
+    rados_str = i ? g_strjoinv(";", (char **)vals) : NULL;
+out:
+    g_strfreev((char **)vals);
     return rados_str;
 }
 
@@ -670,32 +540,22 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
                          Error **errp)
 {
     BDRVRBDState *s = bs->opaque;
-    const char *pool, *snap, *conf, *clientname, *name, *keypairs;
+    const char *pool, *snap, *conf, *user, *image_name, *keypairs;
     const char *secretid;
     QemuOpts *opts;
     Error *local_err = NULL;
     char *mon_host = NULL;
-    char *auth_supported = NULL;
     int r;
 
     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
     qemu_opts_absorb_qdict(opts, options, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
-        qemu_opts_del(opts);
-        return -EINVAL;
-    }
-
-    auth_supported = qemu_rbd_array_opts(options, "auth-supported.",
-                                         RBD_AUTH_SUPPORTED, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
         r = -EINVAL;
         goto failed_opts;
     }
 
-    mon_host = qemu_rbd_array_opts(options, "server.",
-                                   RBD_MON_HOST, &local_err);
+    mon_host = qemu_rbd_mon_host(options, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         r = -EINVAL;
@@ -707,20 +567,24 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
     pool           = qemu_opt_get(opts, "pool");
     conf           = qemu_opt_get(opts, "conf");
     snap           = qemu_opt_get(opts, "snapshot");
-    clientname     = qemu_opt_get(opts, "user");
-    name           = qemu_opt_get(opts, "image");
-    keypairs       = qemu_opt_get(opts, "keyvalue-pairs");
+    user           = qemu_opt_get(opts, "user");
+    image_name     = qemu_opt_get(opts, "image");
+    keypairs       = qemu_opt_get(opts, "=keyvalue-pairs");
 
-    r = rados_create(&s->cluster, clientname);
+    if (!pool || !image_name) {
+        error_setg(errp, "Parameters 'pool' and 'image' are required");
+        r = -EINVAL;
+        goto failed_opts;
+    }
+
+    r = rados_create(&s->cluster, user);
     if (r < 0) {
         error_setg_errno(errp, -r, "error initializing");
         goto failed_opts;
     }
 
     s->snap = g_strdup(snap);
-    if (name) {
-        pstrcpy(s->name, RBD_MAX_IMAGE_NAME_SIZE, name);
-    }
+    s->image_name = g_strdup(image_name);
 
     /* try default location when conf=NULL, but ignore failure */
     r = rados_conf_read_file(s->cluster, conf);
@@ -741,13 +605,6 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
         }
     }
 
-    if (auth_supported) {
-        r = rados_conf_set(s->cluster, "auth_supported", auth_supported);
-        if (r < 0) {
-            goto failed_shutdown;
-        }
-    }
-
     if (qemu_rbd_set_auth(s->cluster, secretid, errp) < 0) {
         r = -EIO;
         goto failed_shutdown;
@@ -778,13 +635,23 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
         goto failed_shutdown;
     }
 
-    r = rbd_open(s->io_ctx, s->name, &s->image, s->snap);
+    /* rbd_open is always r/w */
+    r = rbd_open(s->io_ctx, s->image_name, &s->image, s->snap);
     if (r < 0) {
-        error_setg_errno(errp, -r, "error reading header from %s", s->name);
+        error_setg_errno(errp, -r, "error reading header from %s",
+                         s->image_name);
         goto failed_open;
     }
 
-    bs->read_only = (s->snap != NULL);
+    /* If we are using an rbd snapshot, we must be r/o, otherwise
+     * leave as-is */
+    if (s->snap != NULL) {
+        r = bdrv_set_read_only(bs, true, &local_err);
+        if (r < 0) {
+            error_propagate(errp, local_err);
+            goto failed_open;
+        }
+    }
 
     qemu_opts_del(opts);
     return 0;
@@ -794,13 +661,33 @@ failed_open:
 failed_shutdown:
     rados_shutdown(s->cluster);
     g_free(s->snap);
+    g_free(s->image_name);
 failed_opts:
     qemu_opts_del(opts);
     g_free(mon_host);
-    g_free(auth_supported);
     return r;
 }
 
+
+/* Since RBD is currently always opened R/W via the API,
+ * we just need to check if we are using a snapshot or not, in
+ * order to determine if we will allow it to be R/W */
+static int qemu_rbd_reopen_prepare(BDRVReopenState *state,
+                                   BlockReopenQueue *queue, Error **errp)
+{
+    BDRVRBDState *s = state->bs->opaque;
+    int ret = 0;
+
+    if (s->snap && state->flags & BDRV_O_RDWR) {
+        error_setg(errp,
+                   "Cannot change node '%s' to r/w when using RBD snapshot",
+                   bdrv_get_device_or_node_name(state->bs));
+        ret = -EINVAL;
+    }
+
+    return ret;
+}
+
 static void qemu_rbd_close(BlockDriverState *bs)
 {
     BDRVRBDState *s = bs->opaque;
@@ -808,6 +695,7 @@ static void qemu_rbd_close(BlockDriverState *bs)
     rbd_close(s->image);
     rados_ioctx_destroy(s->io_ctx);
     g_free(s->snap);
+    g_free(s->image_name);
     rados_shutdown(s->cluster);
 }
 
@@ -1206,6 +1094,7 @@ static BlockDriver bdrv_rbd = {
     .bdrv_parse_filename    = qemu_rbd_parse_filename,
     .bdrv_file_open         = qemu_rbd_open,
     .bdrv_close             = qemu_rbd_close,
+    .bdrv_reopen_prepare    = qemu_rbd_reopen_prepare,
     .bdrv_create            = qemu_rbd_create,
     .bdrv_has_zero_init     = bdrv_has_zero_init_1,
     .bdrv_get_info          = qemu_rbd_getinfo,
diff --git a/block/replication.c b/block/replication.c
index bf3c395eb4..d300c15475 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -656,7 +656,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
         s->replication_state = BLOCK_REPLICATION_FAILOVER;
         commit_active_start(NULL, s->active_disk->bs, s->secondary_disk->bs,
                             BLOCK_JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT,
-                            NULL, replication_done, bs, errp, true);
+                            NULL, replication_done, bs, true, errp);
         break;
     default:
         aio_context_release(aio_context);
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 89e98edab6..b2a5998188 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -13,9 +13,11 @@
  */
 
 #include "qemu/osdep.h"
+#include "qapi-visit.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qint.h"
+#include "qapi/qobject-input-visitor.h"
 #include "qemu/uri.h"
 #include "qemu/error-report.h"
 #include "qemu/sockets.h"
@@ -547,12 +549,53 @@ static SocketAddress *sd_socket_address(const char *path,
     return addr;
 }
 
+static SocketAddress *sd_server_config(QDict *options, Error **errp)
+{
+    QDict *server = NULL;
+    QObject *crumpled_server = NULL;
+    Visitor *iv = NULL;
+    SocketAddressFlat *saddr_flat = NULL;
+    SocketAddress *saddr = NULL;
+    Error *local_err = NULL;
+
+    qdict_extract_subqdict(options, &server, "server.");
+
+    crumpled_server = qdict_crumple(server, errp);
+    if (!crumpled_server) {
+        goto done;
+    }
+
+    /*
+     * FIXME .numeric, .to, .ipv4 or .ipv6 don't work with -drive
+     * server.type=inet.  .to doesn't matter, it's ignored anyway.
+     * That's because when @options come from -blockdev or
+     * blockdev_add, members are typed according to the QAPI schema,
+     * but when they come from -drive, they're all QString.  The
+     * visitor expects the former.
+     */
+    iv = qobject_input_visitor_new(crumpled_server);
+    visit_type_SocketAddressFlat(iv, NULL, &saddr_flat, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        goto done;
+    }
+
+    saddr = socket_address_crumple(saddr_flat);
+
+done:
+    qapi_free_SocketAddressFlat(saddr_flat);
+    visit_free(iv);
+    qobject_decref(crumpled_server);
+    QDECREF(server);
+    return saddr;
+}
+
 /* Return -EIO in case of error, file descriptor on success */
 static int connect_to_sdog(BDRVSheepdogState *s, Error **errp)
 {
     int fd;
 
-    fd = socket_connect(s->addr, errp, NULL, NULL);
+    fd = socket_connect(s->addr, NULL, NULL, errp);
 
     if (s->addr->type == SOCKET_ADDRESS_KIND_INET && fd >= 0) {
         int ret = socket_set_nodelay(fd);
@@ -693,7 +736,7 @@ static int do_req(int sockfd, BlockDriverState *bs, SheepdogReq *hdr,
     } else {
         co = qemu_coroutine_create(do_co_req, &srco);
         if (bs) {
-            qemu_coroutine_enter(co);
+            bdrv_coroutine_enter(bs, co);
             BDRV_POLL_WHILE(bs, !srco.finished);
         } else {
             qemu_coroutine_enter(co);
@@ -899,7 +942,7 @@ static void co_read_response(void *opaque)
         s->co_recv = qemu_coroutine_create(aio_read_response, opaque);
     }
 
-    aio_co_wake(s->co_recv);
+    aio_co_enter(s->aio_context, s->co_recv);
 }
 
 static void co_write_request(void *opaque)
@@ -1174,15 +1217,15 @@ static void sd_parse_filename(const char *filename, QDict *options,
         return;
     }
 
-    if (cfg.host) {
-        qdict_set_default_str(options, "host", cfg.host);
-    }
-    if (cfg.port) {
-        snprintf(buf, sizeof(buf), "%d", cfg.port);
-        qdict_set_default_str(options, "port", buf);
-    }
     if (cfg.path) {
-        qdict_set_default_str(options, "path", cfg.path);
+        qdict_set_default_str(options, "server.path", cfg.path);
+        qdict_set_default_str(options, "server.type", "unix");
+    } else {
+        qdict_set_default_str(options, "server.type", "inet");
+        qdict_set_default_str(options, "server.host",
+                              cfg.host ?: SD_DEFAULT_ADDR);
+        snprintf(buf, sizeof(buf), "%d", cfg.port ?: SD_DEFAULT_PORT);
+        qdict_set_default_str(options, "server.port", buf);
     }
     qdict_set_default_str(options, "vdi", cfg.vdi);
     qdict_set_default_str(options, "tag", cfg.tag);
@@ -1510,18 +1553,6 @@ static QemuOptsList runtime_opts = {
     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
     .desc = {
         {
-            .name = "host",
-            .type = QEMU_OPT_STRING,
-        },
-        {
-            .name = "port",
-            .type = QEMU_OPT_STRING,
-        },
-        {
-            .name = "path",
-            .type = QEMU_OPT_STRING,
-        },
-        {
             .name = "vdi",
             .type = QEMU_OPT_STRING,
         },
@@ -1543,7 +1574,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
     int ret, fd;
     uint32_t vid = 0;
     BDRVSheepdogState *s = bs->opaque;
-    const char *host, *port, *path, *vdi, *snap_id_str, *tag;
+    const char *vdi, *snap_id_str, *tag;
     uint64_t snap_id;
     char *buf = NULL;
     QemuOpts *opts;
@@ -1560,20 +1591,17 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
         goto err_no_fd;
     }
 
-    host = qemu_opt_get(opts, "host");
-    port = qemu_opt_get(opts, "port");
-    path = qemu_opt_get(opts, "path");
+    s->addr = sd_server_config(options, errp);
+    if (!s->addr) {
+        ret = -EINVAL;
+        goto err_no_fd;
+    }
+
     vdi = qemu_opt_get(opts, "vdi");
     snap_id_str = qemu_opt_get(opts, "snap-id");
     snap_id = qemu_opt_get_number(opts, "snap-id", CURRENT_VDI_ID);
     tag = qemu_opt_get(opts, "tag");
 
-    if ((host || port) && path) {
-        error_setg(errp, "can't use 'path' together with 'host' or 'port'");
-        ret = -EINVAL;
-        goto err_no_fd;
-    }
-
     if (!vdi) {
         error_setg(errp, "parameter 'vdi' is missing");
         ret = -EINVAL;
@@ -1604,8 +1632,6 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
         goto err_no_fd;
     }
 
-    s->addr = sd_socket_address(path, host, port);
-
     QLIST_INIT(&s->inflight_aio_head);
     QLIST_INIT(&s->failed_aio_head);
     QLIST_INIT(&s->inflight_aiocb_head);
diff --git a/block/snapshot.c b/block/snapshot.c
index bf5c2ca5e1..06b1185d27 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -27,6 +27,7 @@
 #include "block/block_int.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qstring.h"
 
 QemuOptsList internal_snapshot_opts = {
     .name = "snapshot",
@@ -189,14 +190,33 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
     }
 
     if (bs->file) {
+        BlockDriverState *file;
+        QDict *options = qdict_clone_shallow(bs->options);
+        QDict *file_options;
+
+        file = bs->file->bs;
+        /* Prevent it from getting deleted when detached from bs */
+        bdrv_ref(file);
+
+        qdict_extract_subqdict(options, &file_options, "file.");
+        QDECREF(file_options);
+        qdict_put(options, "file", qstring_from_str(bdrv_get_node_name(file)));
+
         drv->bdrv_close(bs);
-        ret = bdrv_snapshot_goto(bs->file->bs, snapshot_id);
-        open_ret = drv->bdrv_open(bs, NULL, bs->open_flags, NULL);
+        bdrv_unref_child(bs, bs->file);
+        bs->file = NULL;
+
+        ret = bdrv_snapshot_goto(file, snapshot_id);
+        open_ret = drv->bdrv_open(bs, options, bs->open_flags, NULL);
+        QDECREF(options);
         if (open_ret < 0) {
-            bdrv_unref(bs->file->bs);
+            bdrv_unref(file);
             bs->drv = NULL;
             return open_ret;
         }
+
+        assert(bs->file->bs == file);
+        bdrv_unref(file);
         return ret;
     }
 
diff --git a/block/ssh.c b/block/ssh.c
index 278e66faa6..df09f6c5ba 100644
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -601,6 +601,14 @@ static InetSocketAddress *ssh_config(QDict *options, Error **errp)
         goto out;
     }
 
+    /*
+     * FIXME .numeric, .to, .ipv4 or .ipv6 don't work with -drive.
+     * .to doesn't matter, it's ignored anyway.
+     * That's because when @options come from -blockdev or
+     * blockdev_add, members are typed according to the QAPI schema,
+     * but when they come from -drive, they're all QString.  The
+     * visitor expects the former.
+     */
     iv = qobject_input_visitor_new(crumpled_addr);
     visit_type_InetSocketAddress(iv, NULL, &inet, &local_error);
     if (local_error) {
@@ -673,7 +681,7 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
     }
 
     /* Open the socket and connect. */
-    s->sock = inet_connect_saddr(s->inet, errp, NULL, NULL);
+    s->sock = inet_connect_saddr(s->inet, NULL, NULL, errp);
     if (s->sock < 0) {
         ret = -EIO;
         goto err;
diff --git a/block/trace-events b/block/trace-events
index 0bc5c0adf1..9a71c7fb04 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -110,3 +110,20 @@ qed_aio_write_data(void *s, void *acb, int ret, uint64_t offset, size_t len) "s
 qed_aio_write_prefill(void *s, void *acb, uint64_t start, size_t len, uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64
 qed_aio_write_postfill(void *s, void *acb, uint64_t start, size_t len, uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64
 qed_aio_write_main(void *s, void *acb, int ret, uint64_t offset, size_t len) "s %p acb %p ret %d offset %"PRIu64" len %zu"
+
+# block/vxhs.c
+vxhs_iio_callback(int error) "ctx is NULL: error %d"
+vxhs_iio_callback_chnfail(int err, int error) "QNIO channel failed, no i/o %d, %d"
+vxhs_iio_callback_unknwn(int opcode, int err) "unexpected opcode %d, errno %d"
+vxhs_aio_rw_invalid(int req) "Invalid I/O request iodir %d"
+vxhs_aio_rw_ioerr(char *guid, int iodir, uint64_t size, uint64_t off, void *acb, int ret, int err) "IO ERROR (vDisk %s) FOR : Read/Write = %d size = %"PRIu64" offset = %"PRIu64" ACB = %p. Error = %d, errno = %d"
+vxhs_get_vdisk_stat_err(char *guid, int ret, int err) "vDisk (%s) stat ioctl failed, ret = %d, errno = %d"
+vxhs_get_vdisk_stat(char *vdisk_guid, uint64_t vdisk_size) "vDisk %s stat ioctl returned size %"PRIu64
+vxhs_complete_aio(void *acb, uint64_t ret) "aio failed acb %p ret %"PRIu64
+vxhs_parse_uri_filename(const char *filename) "URI passed via bdrv_parse_filename %s"
+vxhs_open_vdiskid(const char *vdisk_id) "Opening vdisk-id %s"
+vxhs_open_hostinfo(char *of_vsa_addr, int port) "Adding host %s:%d to BDRVVXHSState"
+vxhs_open_iio_open(const char *host) "Failed to connect to storage agent on host %s"
+vxhs_parse_uri_hostinfo(char *host, int port) "Host: IP %s, Port %d"
+vxhs_close(char *vdisk_guid) "Closing vdisk %s"
+vxhs_get_creds(const char *cacert, const char *client_key, const char *client_cert) "cacert %s, client_key %s, client_cert %s"
diff --git a/block/vvfat.c b/block/vvfat.c
index af5153d27d..b509d55642 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -1156,8 +1156,6 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
 
     s->current_cluster=0xffffffff;
 
-    /* read only is the default for safety */
-    bs->read_only = true;
     s->qcow = NULL;
     s->qcow_filename = NULL;
     s->fat2 = NULL;
@@ -1169,11 +1167,24 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
     s->sector_count = cyls * heads * secs - (s->first_sectors_number - 1);
 
     if (qemu_opt_get_bool(opts, "rw", false)) {
-        ret = enable_write_target(bs, errp);
+        if (!bdrv_is_read_only(bs)) {
+            ret = enable_write_target(bs, errp);
+            if (ret < 0) {
+                goto fail;
+            }
+        } else {
+            ret = -EPERM;
+            error_setg(errp,
+                       "Unable to set VVFAT to 'rw' when drive is read-only");
+            goto fail;
+        }
+    } else  {
+        /* read only is the default for safety */
+        ret = bdrv_set_read_only(bs, true, &local_err);
         if (ret < 0) {
+            error_propagate(errp, local_err);
             goto fail;
         }
-        bs->read_only = false;
     }
 
     bs->total_sectors = cyls * heads * secs;
diff --git a/block/vxhs.c b/block/vxhs.c
new file mode 100644
index 0000000000..9ffe9d3814
--- /dev/null
+++ b/block/vxhs.c
@@ -0,0 +1,575 @@
+/*
+ * QEMU Block driver for Veritas HyperScale (VxHS)
+ *
+ * Copyright (c) 2017 Veritas Technologies LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include <qnio/qnio_api.h>
+#include <sys/param.h>
+#include "block/block_int.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
+#include "trace.h"
+#include "qemu/uri.h"
+#include "qapi/error.h"
+#include "qemu/uuid.h"
+#include "crypto/tlscredsx509.h"
+
+#define VXHS_OPT_FILENAME           "filename"
+#define VXHS_OPT_VDISK_ID           "vdisk-id"
+#define VXHS_OPT_SERVER             "server"
+#define VXHS_OPT_HOST               "host"
+#define VXHS_OPT_PORT               "port"
+
+/* Only accessed under QEMU global mutex */
+static uint32_t vxhs_ref;
+
+typedef enum {
+    VDISK_AIO_READ,
+    VDISK_AIO_WRITE,
+} VDISKAIOCmd;
+
+/*
+ * HyperScale AIO callbacks structure
+ */
+typedef struct VXHSAIOCB {
+    BlockAIOCB common;
+    int err;
+} VXHSAIOCB;
+
+typedef struct VXHSvDiskHostsInfo {
+    void *dev_handle; /* Device handle */
+    char *host; /* Host name or IP */
+    int port; /* Host's port number */
+} VXHSvDiskHostsInfo;
+
+/*
+ * Structure per vDisk maintained for state
+ */
+typedef struct BDRVVXHSState {
+    VXHSvDiskHostsInfo vdisk_hostinfo; /* Per host info */
+    char *vdisk_guid;
+    char *tlscredsid; /* tlscredsid */
+} BDRVVXHSState;
+
+static void vxhs_complete_aio_bh(void *opaque)
+{
+    VXHSAIOCB *acb = opaque;
+    BlockCompletionFunc *cb = acb->common.cb;
+    void *cb_opaque = acb->common.opaque;
+    int ret = 0;
+
+    if (acb->err != 0) {
+        trace_vxhs_complete_aio(acb, acb->err);
+        ret = (-EIO);
+    }
+
+    qemu_aio_unref(acb);
+    cb(cb_opaque, ret);
+}
+
+/*
+ * Called from a libqnio thread
+ */
+static void vxhs_iio_callback(void *ctx, uint32_t opcode, uint32_t error)
+{
+    VXHSAIOCB *acb = NULL;
+
+    switch (opcode) {
+    case IRP_READ_REQUEST:
+    case IRP_WRITE_REQUEST:
+
+        /*
+         * ctx is VXHSAIOCB*
+         * ctx is NULL if error is QNIOERROR_CHANNEL_HUP
+         */
+        if (ctx) {
+            acb = ctx;
+        } else {
+            trace_vxhs_iio_callback(error);
+            goto out;
+        }
+
+        if (error) {
+            if (!acb->err) {
+                acb->err = error;
+            }
+            trace_vxhs_iio_callback(error);
+        }
+
+        aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs),
+                                vxhs_complete_aio_bh, acb);
+        break;
+
+    default:
+        if (error == QNIOERROR_HUP) {
+            /*
+             * Channel failed, spontaneous notification,
+             * not in response to I/O
+             */
+            trace_vxhs_iio_callback_chnfail(error, errno);
+        } else {
+            trace_vxhs_iio_callback_unknwn(opcode, error);
+        }
+        break;
+    }
+out:
+    return;
+}
+
+static QemuOptsList runtime_opts = {
+    .name = "vxhs",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+    .desc = {
+        {
+            .name = VXHS_OPT_FILENAME,
+            .type = QEMU_OPT_STRING,
+            .help = "URI to the Veritas HyperScale image",
+        },
+        {
+            .name = VXHS_OPT_VDISK_ID,
+            .type = QEMU_OPT_STRING,
+            .help = "UUID of the VxHS vdisk",
+        },
+        {
+            .name = "tls-creds",
+            .type = QEMU_OPT_STRING,
+            .help = "ID of the TLS/SSL credentials to use",
+        },
+        { /* end of list */ }
+    },
+};
+
+static QemuOptsList runtime_tcp_opts = {
+    .name = "vxhs_tcp",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_tcp_opts.head),
+    .desc = {
+        {
+            .name = VXHS_OPT_HOST,
+            .type = QEMU_OPT_STRING,
+            .help = "host address (ipv4 addresses)",
+        },
+        {
+            .name = VXHS_OPT_PORT,
+            .type = QEMU_OPT_NUMBER,
+            .help = "port number on which VxHSD is listening (default 9999)",
+            .def_value_str = "9999"
+        },
+        { /* end of list */ }
+    },
+};
+
+/*
+ * Parse incoming URI and populate *options with the host
+ * and device information
+ */
+static int vxhs_parse_uri(const char *filename, QDict *options)
+{
+    URI *uri = NULL;
+    char *port;
+    int ret = 0;
+
+    trace_vxhs_parse_uri_filename(filename);
+    uri = uri_parse(filename);
+    if (!uri || !uri->server || !uri->path) {
+        uri_free(uri);
+        return -EINVAL;
+    }
+
+    qdict_put(options, VXHS_OPT_SERVER".host", qstring_from_str(uri->server));
+
+    if (uri->port) {
+        port = g_strdup_printf("%d", uri->port);
+        qdict_put(options, VXHS_OPT_SERVER".port", qstring_from_str(port));
+        g_free(port);
+    }
+
+    qdict_put(options, "vdisk-id", qstring_from_str(uri->path));
+
+    trace_vxhs_parse_uri_hostinfo(uri->server, uri->port);
+    uri_free(uri);
+
+    return ret;
+}
+
+static void vxhs_parse_filename(const char *filename, QDict *options,
+                                Error **errp)
+{
+    if (qdict_haskey(options, "vdisk-id") || qdict_haskey(options, "server")) {
+        error_setg(errp, "vdisk-id/server and a file name may not be specified "
+                         "at the same time");
+        return;
+    }
+
+    if (strstr(filename, "://")) {
+        int ret = vxhs_parse_uri(filename, options);
+        if (ret < 0) {
+            error_setg(errp, "Invalid URI. URI should be of the form "
+                       "  vxhs://<host_ip>:<port>/<vdisk-id>");
+        }
+    }
+}
+
+static int vxhs_init_and_ref(void)
+{
+    if (vxhs_ref++ == 0) {
+        if (iio_init(QNIO_VERSION, vxhs_iio_callback)) {
+            return -ENODEV;
+        }
+    }
+    return 0;
+}
+
+static void vxhs_unref(void)
+{
+    if (--vxhs_ref == 0) {
+        iio_fini();
+    }
+}
+
+static void vxhs_get_tls_creds(const char *id, char **cacert,
+                               char **key, char **cert, Error **errp)
+{
+    Object *obj;
+    QCryptoTLSCreds *creds;
+    QCryptoTLSCredsX509 *creds_x509;
+
+    obj = object_resolve_path_component(
+        object_get_objects_root(), id);
+
+    if (!obj) {
+        error_setg(errp, "No TLS credentials with id '%s'",
+                   id);
+        return;
+    }
+
+    creds_x509 = (QCryptoTLSCredsX509 *)
+        object_dynamic_cast(obj, TYPE_QCRYPTO_TLS_CREDS_X509);
+
+    if (!creds_x509) {
+        error_setg(errp, "Object with id '%s' is not TLS credentials",
+                   id);
+        return;
+    }
+
+    creds = &creds_x509->parent_obj;
+
+    if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) {
+        error_setg(errp,
+                   "Expecting TLS credentials with a client endpoint");
+        return;
+    }
+
+    /*
+     * Get the cacert, client_cert and client_key file names.
+     */
+    if (!creds->dir) {
+        error_setg(errp, "TLS object missing 'dir' property value");
+        return;
+    }
+
+    *cacert = g_strdup_printf("%s/%s", creds->dir,
+                              QCRYPTO_TLS_CREDS_X509_CA_CERT);
+    *cert = g_strdup_printf("%s/%s", creds->dir,
+                            QCRYPTO_TLS_CREDS_X509_CLIENT_CERT);
+    *key = g_strdup_printf("%s/%s", creds->dir,
+                           QCRYPTO_TLS_CREDS_X509_CLIENT_KEY);
+}
+
+static int vxhs_open(BlockDriverState *bs, QDict *options,
+                     int bdrv_flags, Error **errp)
+{
+    BDRVVXHSState *s = bs->opaque;
+    void *dev_handlep;
+    QDict *backing_options = NULL;
+    QemuOpts *opts = NULL;
+    QemuOpts *tcp_opts = NULL;
+    char *of_vsa_addr = NULL;
+    Error *local_err = NULL;
+    const char *vdisk_id_opt;
+    const char *server_host_opt;
+    int ret = 0;
+    char *cacert = NULL;
+    char *client_key = NULL;
+    char *client_cert = NULL;
+
+    ret = vxhs_init_and_ref();
+    if (ret < 0) {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    /* Create opts info from runtime_opts and runtime_tcp_opts list */
+    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    tcp_opts = qemu_opts_create(&runtime_tcp_opts, NULL, 0, &error_abort);
+
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (local_err) {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    /* vdisk-id is the disk UUID */
+    vdisk_id_opt = qemu_opt_get(opts, VXHS_OPT_VDISK_ID);
+    if (!vdisk_id_opt) {
+        error_setg(&local_err, QERR_MISSING_PARAMETER, VXHS_OPT_VDISK_ID);
+        ret = -EINVAL;
+        goto out;
+    }
+
+    /* vdisk-id may contain a leading '/' */
+    if (strlen(vdisk_id_opt) > UUID_FMT_LEN + 1) {
+        error_setg(&local_err, "vdisk-id cannot be more than %d characters",
+                   UUID_FMT_LEN);
+        ret = -EINVAL;
+        goto out;
+    }
+
+    s->vdisk_guid = g_strdup(vdisk_id_opt);
+    trace_vxhs_open_vdiskid(vdisk_id_opt);
+
+    /* get the 'server.' arguments */
+    qdict_extract_subqdict(options, &backing_options, VXHS_OPT_SERVER".");
+
+    qemu_opts_absorb_qdict(tcp_opts, backing_options, &local_err);
+    if (local_err != NULL) {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    server_host_opt = qemu_opt_get(tcp_opts, VXHS_OPT_HOST);
+    if (!server_host_opt) {
+        error_setg(&local_err, QERR_MISSING_PARAMETER,
+                   VXHS_OPT_SERVER"."VXHS_OPT_HOST);
+        ret = -EINVAL;
+        goto out;
+    }
+
+    if (strlen(server_host_opt) > MAXHOSTNAMELEN) {
+        error_setg(&local_err, "server.host cannot be more than %d characters",
+                   MAXHOSTNAMELEN);
+        ret = -EINVAL;
+        goto out;
+    }
+
+    /* check if we got tls-creds via the --object argument */
+    s->tlscredsid = g_strdup(qemu_opt_get(opts, "tls-creds"));
+    if (s->tlscredsid) {
+        vxhs_get_tls_creds(s->tlscredsid, &cacert, &client_key,
+                           &client_cert, &local_err);
+        if (local_err != NULL) {
+            ret = -EINVAL;
+            goto out;
+        }
+        trace_vxhs_get_creds(cacert, client_key, client_cert);
+    }
+
+    s->vdisk_hostinfo.host = g_strdup(server_host_opt);
+    s->vdisk_hostinfo.port = g_ascii_strtoll(qemu_opt_get(tcp_opts,
+                                                          VXHS_OPT_PORT),
+                                                          NULL, 0);
+
+    trace_vxhs_open_hostinfo(s->vdisk_hostinfo.host,
+                             s->vdisk_hostinfo.port);
+
+    of_vsa_addr = g_strdup_printf("of://%s:%d",
+                                  s->vdisk_hostinfo.host,
+                                  s->vdisk_hostinfo.port);
+
+    /*
+     * Open qnio channel to storage agent if not opened before
+     */
+    dev_handlep = iio_open(of_vsa_addr, s->vdisk_guid, 0,
+                           cacert, client_key, client_cert);
+    if (dev_handlep == NULL) {
+        trace_vxhs_open_iio_open(of_vsa_addr);
+        ret = -ENODEV;
+        goto out;
+    }
+    s->vdisk_hostinfo.dev_handle = dev_handlep;
+
+out:
+    g_free(of_vsa_addr);
+    QDECREF(backing_options);
+    qemu_opts_del(tcp_opts);
+    qemu_opts_del(opts);
+    g_free(cacert);
+    g_free(client_key);
+    g_free(client_cert);
+
+    if (ret < 0) {
+        vxhs_unref();
+        error_propagate(errp, local_err);
+        g_free(s->vdisk_hostinfo.host);
+        g_free(s->vdisk_guid);
+        g_free(s->tlscredsid);
+        s->vdisk_guid = NULL;
+    }
+
+    return ret;
+}
+
+static const AIOCBInfo vxhs_aiocb_info = {
+    .aiocb_size = sizeof(VXHSAIOCB)
+};
+
+/*
+ * This allocates QEMU-VXHS callback for each IO
+ * and is passed to QNIO. When QNIO completes the work,
+ * it will be passed back through the callback.
+ */
+static BlockAIOCB *vxhs_aio_rw(BlockDriverState *bs, int64_t sector_num,
+                               QEMUIOVector *qiov, int nb_sectors,
+                               BlockCompletionFunc *cb, void *opaque,
+                               VDISKAIOCmd iodir)
+{
+    VXHSAIOCB *acb = NULL;
+    BDRVVXHSState *s = bs->opaque;
+    size_t size;
+    uint64_t offset;
+    int iio_flags = 0;
+    int ret = 0;
+    void *dev_handle = s->vdisk_hostinfo.dev_handle;
+
+    offset = sector_num * BDRV_SECTOR_SIZE;
+    size = nb_sectors * BDRV_SECTOR_SIZE;
+    acb = qemu_aio_get(&vxhs_aiocb_info, bs, cb, opaque);
+
+    /*
+     * Initialize VXHSAIOCB.
+     */
+    acb->err = 0;
+
+    iio_flags = IIO_FLAG_ASYNC;
+
+    switch (iodir) {
+    case VDISK_AIO_WRITE:
+            ret = iio_writev(dev_handle, acb, qiov->iov, qiov->niov,
+                             offset, (uint64_t)size, iio_flags);
+            break;
+    case VDISK_AIO_READ:
+            ret = iio_readv(dev_handle, acb, qiov->iov, qiov->niov,
+                            offset, (uint64_t)size, iio_flags);
+            break;
+    default:
+            trace_vxhs_aio_rw_invalid(iodir);
+            goto errout;
+    }
+
+    if (ret != 0) {
+        trace_vxhs_aio_rw_ioerr(s->vdisk_guid, iodir, size, offset,
+                                acb, ret, errno);
+        goto errout;
+    }
+    return &acb->common;
+
+errout:
+    qemu_aio_unref(acb);
+    return NULL;
+}
+
+static BlockAIOCB *vxhs_aio_readv(BlockDriverState *bs,
+                                   int64_t sector_num, QEMUIOVector *qiov,
+                                   int nb_sectors,
+                                   BlockCompletionFunc *cb, void *opaque)
+{
+    return vxhs_aio_rw(bs, sector_num, qiov, nb_sectors, cb,
+                       opaque, VDISK_AIO_READ);
+}
+
+static BlockAIOCB *vxhs_aio_writev(BlockDriverState *bs,
+                                   int64_t sector_num, QEMUIOVector *qiov,
+                                   int nb_sectors,
+                                   BlockCompletionFunc *cb, void *opaque)
+{
+    return vxhs_aio_rw(bs, sector_num, qiov, nb_sectors,
+                       cb, opaque, VDISK_AIO_WRITE);
+}
+
+static void vxhs_close(BlockDriverState *bs)
+{
+    BDRVVXHSState *s = bs->opaque;
+
+    trace_vxhs_close(s->vdisk_guid);
+
+    g_free(s->vdisk_guid);
+    s->vdisk_guid = NULL;
+
+    /*
+     * Close vDisk device
+     */
+    if (s->vdisk_hostinfo.dev_handle) {
+        iio_close(s->vdisk_hostinfo.dev_handle);
+        s->vdisk_hostinfo.dev_handle = NULL;
+    }
+
+    vxhs_unref();
+
+    /*
+     * Free the dynamically allocated host string etc
+     */
+    g_free(s->vdisk_hostinfo.host);
+    g_free(s->tlscredsid);
+    s->tlscredsid = NULL;
+    s->vdisk_hostinfo.host = NULL;
+    s->vdisk_hostinfo.port = 0;
+}
+
+static int64_t vxhs_get_vdisk_stat(BDRVVXHSState *s)
+{
+    int64_t vdisk_size = -1;
+    int ret = 0;
+    void *dev_handle = s->vdisk_hostinfo.dev_handle;
+
+    ret = iio_ioctl(dev_handle, IOR_VDISK_STAT, &vdisk_size, 0);
+    if (ret < 0) {
+        trace_vxhs_get_vdisk_stat_err(s->vdisk_guid, ret, errno);
+        return -EIO;
+    }
+
+    trace_vxhs_get_vdisk_stat(s->vdisk_guid, vdisk_size);
+    return vdisk_size;
+}
+
+/*
+ * Returns the size of vDisk in bytes. This is required
+ * by QEMU block upper block layer so that it is visible
+ * to guest.
+ */
+static int64_t vxhs_getlength(BlockDriverState *bs)
+{
+    BDRVVXHSState *s = bs->opaque;
+    int64_t vdisk_size;
+
+    vdisk_size = vxhs_get_vdisk_stat(s);
+    if (vdisk_size < 0) {
+        return -EIO;
+    }
+
+    return vdisk_size;
+}
+
+static BlockDriver bdrv_vxhs = {
+    .format_name                  = "vxhs",
+    .protocol_name                = "vxhs",
+    .instance_size                = sizeof(BDRVVXHSState),
+    .bdrv_file_open               = vxhs_open,
+    .bdrv_parse_filename          = vxhs_parse_filename,
+    .bdrv_close                   = vxhs_close,
+    .bdrv_getlength               = vxhs_getlength,
+    .bdrv_aio_readv               = vxhs_aio_readv,
+    .bdrv_aio_writev              = vxhs_aio_writev,
+};
+
+static void bdrv_vxhs_init(void)
+{
+    bdrv_register(&bdrv_vxhs);
+}
+
+block_init(bdrv_vxhs_init);
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
index 7ea836b46e..8a11807df3 100644
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -124,6 +124,7 @@ void qmp_nbd_server_start(SocketAddress *addr,
             goto error;
         }
 
+        /* TODO SOCKET_ADDRESS_KIND_FD where fd has AF_INET or AF_INET6 */
         if (addr->type != SOCKET_ADDRESS_KIND_INET) {
             error_setg(errp, "TLS is only supported with IPv4/IPv6");
             goto error;
diff --git a/blockdev.c b/blockdev.c
index c5b2c2c209..64282065d8 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1728,7 +1728,7 @@ static void external_snapshot_prepare(BlkActionState *common,
             bdrv_img_create(new_image_file, format,
                             state->old_bs->filename,
                             state->old_bs->drv->format_name,
-                            NULL, size, flags, &local_err, false);
+                            NULL, size, flags, false, &local_err);
             if (local_err) {
                 error_propagate(errp, local_err);
                 return;
@@ -1772,6 +1772,8 @@ static void external_snapshot_prepare(BlkActionState *common,
         return;
     }
 
+    bdrv_set_aio_context(state->new_bs, state->aio_context);
+
     /* This removes our old bs and adds the new bs. This is an operation that
      * can fail, so we need to do it in .prepare; undoing it for abort is
      * always possible. */
@@ -1789,8 +1791,6 @@ static void external_snapshot_commit(BlkActionState *common)
     ExternalSnapshotState *state =
                              DO_UPCAST(ExternalSnapshotState, common, common);
 
-    bdrv_set_aio_context(state->new_bs, state->aio_context);
-
     /* We don't need (or want) to use the transactional
      * bdrv_reopen_multiple() across all the entries at once, because we
      * don't want to abort all of them if one of them fails the reopen */
@@ -2835,7 +2835,7 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict)
 
     bs = bdrv_find_node(id);
     if (bs) {
-        qmp_x_blockdev_del(id, &local_err);
+        qmp_blockdev_del(id, &local_err);
         if (local_err) {
             error_report_err(local_err);
         }
@@ -3142,7 +3142,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
         }
         commit_active_start(has_job_id ? job_id : NULL, bs, base_bs,
                             BLOCK_JOB_DEFAULT, speed, on_error,
-                            filter_node_name, NULL, NULL, &local_err, false);
+                            filter_node_name, NULL, NULL, false, &local_err);
     } else {
         BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs);
         if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
@@ -3237,10 +3237,10 @@ static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn,
         if (source) {
             bdrv_img_create(backup->target, backup->format, source->filename,
                             source->drv->format_name, NULL,
-                            size, flags, &local_err, false);
+                            size, flags, false, &local_err);
         } else {
             bdrv_img_create(backup->target, backup->format, NULL, NULL, NULL,
-                            size, flags, &local_err, false);
+                            size, flags, false, &local_err);
         }
     }
 
@@ -3531,7 +3531,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
         /* create new image w/o backing file */
         assert(format);
         bdrv_img_create(arg->target, format,
-                        NULL, NULL, NULL, size, flags, &local_err, false);
+                        NULL, NULL, NULL, size, flags, false, &local_err);
     } else {
         switch (arg->mode) {
         case NEW_IMAGE_MODE_EXISTING:
@@ -3541,7 +3541,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
             bdrv_img_create(arg->target, format,
                             source->filename,
                             source->drv->format_name,
-                            NULL, size, flags, &local_err, false);
+                            NULL, size, flags, false, &local_err);
             break;
         default:
             abort();
@@ -3900,7 +3900,7 @@ fail:
     visit_free(v);
 }
 
-void qmp_x_blockdev_del(const char *node_name, Error **errp)
+void qmp_blockdev_del(const char *node_name, Error **errp)
 {
     AioContext *aio_context;
     BlockDriverState *bs;
diff --git a/blockjob.c b/blockjob.c
index 69126af97f..6e489327ff 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -68,6 +68,23 @@ static const BdrvChildRole child_job = {
     .stay_at_node       = true,
 };
 
+static void block_job_drained_begin(void *opaque)
+{
+    BlockJob *job = opaque;
+    block_job_pause(job);
+}
+
+static void block_job_drained_end(void *opaque)
+{
+    BlockJob *job = opaque;
+    block_job_resume(job);
+}
+
+static const BlockDevOps block_job_dev_ops = {
+    .drained_begin = block_job_drained_begin,
+    .drained_end = block_job_drained_end,
+};
+
 BlockJob *block_job_next(BlockJob *job)
 {
     if (!job) {
@@ -205,11 +222,6 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
     }
 
     job = g_malloc0(driver->instance_size);
-    error_setg(&job->blocker, "block device is in use by block job: %s",
-               BlockJobType_lookup[driver->job_type]);
-    block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort);
-    bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
-
     job->driver        = driver;
     job->id            = g_strdup(job_id);
     job->blk           = blk;
@@ -219,8 +231,15 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
     job->paused        = true;
     job->pause_count   = 1;
     job->refcnt        = 1;
+
+    error_setg(&job->blocker, "block device is in use by block job: %s",
+               BlockJobType_lookup[driver->job_type]);
+    block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort);
     bs->job = job;
 
+    blk_set_dev_ops(blk, &block_job_dev_ops, job);
+    bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
+
     QLIST_INSERT_HEAD(&block_jobs, job, job_list);
 
     blk_add_aio_context_notifier(blk, block_job_attached_aio_context,
@@ -250,16 +269,28 @@ static bool block_job_started(BlockJob *job)
     return job->co;
 }
 
+/**
+ * All jobs must allow a pause point before entering their job proper. This
+ * ensures that jobs can be paused prior to being started, then resumed later.
+ */
+static void coroutine_fn block_job_co_entry(void *opaque)
+{
+    BlockJob *job = opaque;
+
+    assert(job && job->driver && job->driver->start);
+    block_job_pause_point(job);
+    job->driver->start(job);
+}
+
 void block_job_start(BlockJob *job)
 {
     assert(job && !block_job_started(job) && job->paused &&
-           !job->busy && job->driver->start);
-    job->co = qemu_coroutine_create(job->driver->start, job);
-    if (--job->pause_count == 0) {
-        job->paused = false;
-        job->busy = true;
-        qemu_coroutine_enter(job->co);
-    }
+           job->driver && job->driver->start);
+    job->co = qemu_coroutine_create(block_job_co_entry, job);
+    job->pause_count--;
+    job->busy = true;
+    job->paused = false;
+    bdrv_coroutine_enter(blk_bs(job->blk), job->co);
 }
 
 void block_job_ref(BlockJob *job)
@@ -501,7 +532,7 @@ void block_job_user_resume(BlockJob *job)
 void block_job_enter(BlockJob *job)
 {
     if (job->co && !job->busy) {
-        qemu_coroutine_enter(job->co);
+        bdrv_coroutine_enter(blk_bs(job->blk), job->co);
     }
 }
 
@@ -755,12 +786,16 @@ static void block_job_defer_to_main_loop_bh(void *opaque)
 
     /* Fetch BDS AioContext again, in case it has changed */
     aio_context = blk_get_aio_context(data->job->blk);
-    aio_context_acquire(aio_context);
+    if (aio_context != data->aio_context) {
+        aio_context_acquire(aio_context);
+    }
 
     data->job->deferred_to_main_loop = false;
     data->fn(data->job, data->opaque);
 
-    aio_context_release(aio_context);
+    if (aio_context != data->aio_context) {
+        aio_context_release(aio_context);
+    }
 
     aio_context_release(data->aio_context);
 
diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c
index 1ad018a127..7f2018ede0 100644
--- a/bsd-user/mmap.c
+++ b/bsd-user/mmap.c
@@ -24,8 +24,7 @@
 
 //#define DEBUG_MMAP
 
-#if defined(CONFIG_USE_NPTL)
-pthread_mutex_t mmap_mutex;
+static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
 static int __thread mmap_lock_count;
 
 void mmap_lock(void)
@@ -62,16 +61,6 @@ void mmap_fork_end(int child)
     else
         pthread_mutex_unlock(&mmap_mutex);
 }
-#else
-/* We aren't threadsafe to start with, so no need to worry about locking.  */
-void mmap_lock(void)
-{
-}
-
-void mmap_unlock(void)
-{
-}
-#endif
 
 /* NOTE: all the constants are the HOST ones, but addresses are target. */
 int target_mprotect(abi_ulong start, abi_ulong len, int prot)
diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h
index 2b2b9184e0..b550cee0cb 100644
--- a/bsd-user/qemu.h
+++ b/bsd-user/qemu.h
@@ -209,10 +209,8 @@ abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
                        abi_ulong new_addr);
 int target_msync(abi_ulong start, abi_ulong len, int flags);
 extern unsigned long last_brk;
-#if defined(CONFIG_USE_NPTL)
 void mmap_fork_start(void);
 void mmap_fork_end(int child);
-#endif
 
 /* main.c */
 extern unsigned long x86_stack_size;
diff --git a/chardev/char-socket.c b/chardev/char-socket.c
index d7e92e1bd3..36ab0d633a 100644
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -47,7 +47,6 @@ typedef struct {
     int max_size;
     int do_telnetopt;
     int do_nodelay;
-    int is_unix;
     int *read_msgfds;
     size_t read_msgfds_num;
     int *write_msgfds;
@@ -358,6 +357,10 @@ static char *SocketAddress_to_str(const char *prefix, SocketAddress *addr,
         return g_strdup_printf("%sfd:%s%s", prefix, addr->u.fd.data->str,
                                is_listen ? ",server" : "");
         break;
+    case SOCKET_ADDRESS_KIND_VSOCK:
+        return g_strdup_printf("%svsock:%s:%s", prefix,
+                               addr->u.vsock.data->cid,
+                               addr->u.vsock.data->port);
     default:
         abort();
     }
@@ -825,7 +828,6 @@ static void qmp_chardev_open_socket(Chardev *chr,
     int64_t reconnect   = sock->has_reconnect ? sock->reconnect : 0;
     QIOChannelSocket *sioc = NULL;
 
-    s->is_unix = addr->type == SOCKET_ADDRESS_KIND_UNIX;
     s->is_listen = is_listen;
     s->is_telnet = is_telnet;
     s->do_nodelay = do_nodelay;
@@ -865,7 +867,8 @@ static void qmp_chardev_open_socket(Chardev *chr,
     s->addr = QAPI_CLONE(SocketAddress, sock->addr);
 
     qemu_chr_set_feature(chr, QEMU_CHAR_FEATURE_RECONNECTABLE);
-    if (s->is_unix) {
+    /* TODO SOCKET_ADDRESS_FD where fd has AF_UNIX */
+    if (addr->type == SOCKET_ADDRESS_KIND_UNIX) {
         qemu_chr_set_feature(chr, QEMU_CHAR_FEATURE_FD_PASS);
     }
 
diff --git a/configure b/configure
index 3133ef8418..c35acf1192 100755
--- a/configure
+++ b/configure
@@ -320,9 +320,11 @@ numa=""
 tcmalloc="no"
 jemalloc="no"
 replication="yes"
+vxhs=""
 
 supported_cpu="no"
 supported_os="no"
+bogus_os="no"
 
 # parse CC options first
 for opt do
@@ -520,11 +522,11 @@ ARCH=
 # Normalise host CPU name and set ARCH.
 # Note that this case should only have supported host CPUs, not guests.
 case "$cpu" in
-  ppc|ppc64|s390|s390x|x32)
+  ppc|ppc64|s390|s390x|sparc64|x32)
     cpu="$cpu"
     supported_cpu="yes"
   ;;
-  ia64|sparc64)
+  ia64)
     cpu="$cpu"
   ;;
   i386|i486|i586|i686|i86pc|BePC)
@@ -549,6 +551,7 @@ case "$cpu" in
   ;;
   sparc|sun4[cdmuv])
     cpu="sparc"
+    supported_cpu="yes"
   ;;
   *)
     # This will result in either an error or falling back to TCI later
@@ -694,7 +697,10 @@ Linux)
   supported_os="yes"
 ;;
 *)
-  error_exit "Unsupported host OS $targetos"
+  # This is a fatal error, but don't report it yet, because we
+  # might be going to just print the --help text, or it might
+  # be the result of a missing compiler.
+  bogus_os="yes"
 ;;
 esac
 
@@ -1178,6 +1184,10 @@ for opt do
   ;;
   --enable-replication) replication="yes"
   ;;
+  --disable-vxhs) vxhs="no"
+  ;;
+  --enable-vxhs) vxhs="yes"
+  ;;
   *)
       echo "ERROR: unknown option $opt"
       echo "Try '$0 --help' for more information"
@@ -1186,21 +1196,6 @@ for opt do
   esac
 done
 
-if ! has $python; then
-  error_exit "Python not found. Use --python=/path/to/python"
-fi
-
-# Note that if the Python conditional here evaluates True we will exit
-# with status 1 which is a shell 'false' value.
-if ! $python -c 'import sys; sys.exit(sys.version_info < (2,6) or sys.version_info >= (3,))'; then
-  error_exit "Cannot use '$python', Python 2.6 or later is required." \
-      "Note that Python 3 or later is not yet supported." \
-      "Use --python=/path/to/python to specify a supported Python."
-fi
-
-# Suppress writing compiled files
-python="$python -B"
-
 case "$cpu" in
     ppc)
            CPU_CFLAGS="-m32"
@@ -1275,6 +1270,9 @@ for config in $mak_wilds; do
     default_target_list="${default_target_list} $(basename "$config" .mak)"
 done
 
+# Enumerate public trace backends for --help output
+trace_backend_list=$(echo $(grep -le '^PUBLIC = True$' scripts/tracetool/backend/*.py | sed -e 's/^.*\/\(.*\)\.py$/\1/'))
+
 if test x"$show_help" = x"yes" ; then
 cat << EOF
 
@@ -1328,7 +1326,7 @@ Advanced options (experts only):
                            set block driver read-only whitelist
                            (affects only QEMU, not qemu-img)
   --enable-trace-backends=B Set trace backend
-                           Available backends: $($python $source_path/scripts/tracetool.py --list-backends)
+                           Available backends: $trace_backend_list
   --with-trace-file=NAME   Full PATH,NAME of file to store traces
                            Default:trace-<pid>
   --disable-slirp          disable SLIRP userspace network connectivity
@@ -1422,12 +1420,28 @@ disabled with --disable-FEATURE, default is enabled if available:
   xfsctl          xfsctl support
   qom-cast-debug  cast debugging support
   tools           build qemu-io, qemu-nbd and qemu-image tools
+  vxhs            Veritas HyperScale vDisk backend support
 
 NOTE: The object files are built at the place where configure is launched
 EOF
 exit 0
 fi
 
+if ! has $python; then
+  error_exit "Python not found. Use --python=/path/to/python"
+fi
+
+# Note that if the Python conditional here evaluates True we will exit
+# with status 1 which is a shell 'false' value.
+if ! $python -c 'import sys; sys.exit(sys.version_info < (2,6) or sys.version_info >= (3,))'; then
+  error_exit "Cannot use '$python', Python 2.6 or later is required." \
+      "Note that Python 3 or later is not yet supported." \
+      "Use --python=/path/to/python to specify a supported Python."
+fi
+
+# Suppress writing compiled files
+python="$python -B"
+
 # Now we have handled --enable-tcg-interpreter and know we're not just
 # printing the help message, bail out if the host CPU isn't supported.
 if test "$ARCH" = "unknown"; then
@@ -1460,6 +1474,14 @@ if ! compile_prog ; then
     error_exit "\"$cc\" cannot build an executable (is your linker broken?)"
 fi
 
+if test "$bogus_os" = "yes"; then
+    # Now that we know that we're not printing the help and that
+    # the compiler works (so the results of the check_defines we used
+    # to identify the OS are reliable), if we didn't recognize the
+    # host OS we should stop now.
+    error_exit "Unrecognized host OS $targetos"
+fi
+
 # Check that the C++ compiler exists and works with the C compiler
 if has $cxx; then
     cat > $TMPC <<EOF
@@ -3096,12 +3118,23 @@ fi
 ##########################################
 # glib support probe
 
-glib_req_ver=2.22
+if test "$mingw32" = yes; then
+    glib_req_ver=2.30
+else
+    glib_req_ver=2.22
+fi
 glib_modules=gthread-2.0
 if test "$modules" = yes; then
     glib_modules="$glib_modules gmodule-2.0"
 fi
 
+# This workaround is required due to a bug in pkg-config file for glib as it
+# doesn't define GLIB_STATIC_COMPILATION for pkg-config --static
+
+if test "$static" = yes -a "$mingw32" = yes; then
+    QEMU_CFLAGS="-DGLIB_STATIC_COMPILATION $QEMU_CFLAGS"
+fi
+
 for i in $glib_modules; do
     if $pkg_config --atleast-version=$glib_req_ver $i; then
         glib_cflags=$($pkg_config --cflags $i)
@@ -4793,6 +4826,33 @@ if compile_prog "" "" ; then
 fi
 
 ##########################################
+# Veritas HyperScale block driver VxHS
+# Check if libvxhs is installed
+
+if test "$vxhs" != "no" ; then
+  cat > $TMPC <<EOF
+#include <stdint.h>
+#include <qnio/qnio_api.h>
+
+void *vxhs_callback;
+
+int main(void) {
+    iio_init(QNIO_VERSION, vxhs_callback);
+    return 0;
+}
+EOF
+  vxhs_libs="-lvxhs -lssl"
+  if compile_prog "" "$vxhs_libs" ; then
+    vxhs=yes
+  else
+    if test "$vxhs" = "yes" ; then
+      feature_not_found "vxhs block device" "Install libvxhs See github"
+    fi
+    vxhs=no
+  fi
+fi
+
+##########################################
 # End of CC checks
 # After here, no more $cc or $ld runs
 
@@ -5158,6 +5218,7 @@ echo "tcmalloc support  $tcmalloc"
 echo "jemalloc support  $jemalloc"
 echo "avx2 optimization $avx2_opt"
 echo "replication support $replication"
+echo "VxHS block device $vxhs"
 
 if test "$sdl_too_old" = "yes"; then
 echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -5180,8 +5241,8 @@ if test "$supported_os" = "no"; then
     echo
     echo "WARNING: SUPPORT FOR THIS HOST OS WILL GO AWAY IN FUTURE RELEASES!"
     echo
-    echo "CPU host OS $targetos support is not currently maintained."
-    echo "The QEMU project intends to remove support for this host CPU in"
+    echo "Host OS $targetos support is not currently maintained."
+    echo "The QEMU project intends to remove support for this host OS in"
     echo "a future release if nobody volunteers to maintain it and to"
     echo "provide a build host for our continuous integration setup."
     echo "configure has succeeded and you can continue to build, but"
@@ -5797,6 +5858,11 @@ if test "$pthread_setname_np" = "yes" ; then
   echo "CONFIG_PTHREAD_SETNAME_NP=y" >> $config_host_mak
 fi
 
+if test "$vxhs" = "yes" ; then
+  echo "CONFIG_VXHS=y" >> $config_host_mak
+  echo "VXHS_LIBS=$vxhs_libs" >> $config_host_mak
+fi
+
 if test "$tcg_interpreter" = "yes"; then
   QEMU_INCLUDES="-I\$(SRC_PATH)/tcg/tci $QEMU_INCLUDES"
 elif test "$ARCH" = "sparc64" ; then
diff --git a/cpu-exec-common.c b/cpu-exec-common.c
index 0504a9457b..e81da276bb 100644
--- a/cpu-exec-common.c
+++ b/cpu-exec-common.c
@@ -35,7 +35,7 @@ void cpu_loop_exit_noexc(CPUState *cpu)
 #if defined(CONFIG_SOFTMMU)
 void cpu_reloading_memory_map(void)
 {
-    if (qemu_in_vcpu_thread()) {
+    if (qemu_in_vcpu_thread() && current_cpu->running) {
         /* The guest can in theory prolong the RCU critical section as long
          * as it feels like. The major problem with this is that because it
          * can do multiple reconfigurations of the memory map within the
diff --git a/cpu-exec.c b/cpu-exec.c
index 748cb66bca..63a56d0407 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -600,13 +600,13 @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
     /* Instruction counter expired.  */
     assert(use_icount);
 #ifndef CONFIG_USER_ONLY
-    if (cpu->icount_extra) {
-        /* Refill decrementer and continue execution.  */
-        cpu->icount_extra += insns_left;
-        insns_left = MIN(0xffff, cpu->icount_extra);
-        cpu->icount_extra -= insns_left;
-        cpu->icount_decr.u16.low = insns_left;
-    } else {
+    /* Ensure global icount has gone forward */
+    cpu_update_icount(cpu);
+    /* Refill decrementer and continue execution.  */
+    insns_left = MIN(0xffff, cpu->icount_budget);
+    cpu->icount_decr.u16.low = insns_left;
+    cpu->icount_extra = cpu->icount_budget - insns_left;
+    if (!cpu->icount_extra) {
         /* Execute any remaining instructions, then let the main loop
          * handle the next event.
          */
diff --git a/cpus.c b/cpus.c
index 167d9615e1..740b8dc3f8 100644
--- a/cpus.c
+++ b/cpus.c
@@ -202,14 +202,14 @@ void qemu_tcg_configure(QemuOpts *opts, Error **errp)
             } else if (use_icount) {
                 error_setg(errp, "No MTTCG when icount is enabled");
             } else {
-#ifndef TARGET_SUPPORT_MTTCG
+#ifndef TARGET_SUPPORTS_MTTCG
                 error_report("Guest not yet converted to MTTCG - "
                              "you may get unexpected results");
 #endif
                 if (!check_tcg_memory_orders_compatible()) {
                     error_report("Guest expects a stronger memory ordering "
                                  "than the host provides");
-                    error_printf("This may cause strange/hard to debug errors");
+                    error_printf("This may cause strange/hard to debug errors\n");
                 }
                 mttcg_enabled = true;
             }
@@ -223,20 +223,51 @@ void qemu_tcg_configure(QemuOpts *opts, Error **errp)
     }
 }
 
+/* The current number of executed instructions is based on what we
+ * originally budgeted minus the current state of the decrementing
+ * icount counters in extra/u16.low.
+ */
+static int64_t cpu_get_icount_executed(CPUState *cpu)
+{
+    return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra);
+}
+
+/*
+ * Update the global shared timer_state.qemu_icount to take into
+ * account executed instructions. This is done by the TCG vCPU
+ * thread so the main-loop can see time has moved forward.
+ */
+void cpu_update_icount(CPUState *cpu)
+{
+    int64_t executed = cpu_get_icount_executed(cpu);
+    cpu->icount_budget -= executed;
+
+#ifdef CONFIG_ATOMIC64
+    atomic_set__nocheck(&timers_state.qemu_icount,
+                        atomic_read__nocheck(&timers_state.qemu_icount) +
+                        executed);
+#else /* FIXME: we need 64bit atomics to do this safely */
+    timers_state.qemu_icount += executed;
+#endif
+}
+
 int64_t cpu_get_icount_raw(void)
 {
-    int64_t icount;
     CPUState *cpu = current_cpu;
 
-    icount = timers_state.qemu_icount;
-    if (cpu) {
+    if (cpu && cpu->running) {
         if (!cpu->can_do_io) {
             fprintf(stderr, "Bad icount read\n");
             exit(1);
         }
-        icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
+        /* Take into account what has run */
+        cpu_update_icount(cpu);
     }
-    return icount;
+#ifdef CONFIG_ATOMIC64
+    return atomic_read__nocheck(&timers_state.qemu_icount);
+#else /* FIXME: we need 64bit atomics to do this safely */
+    return timers_state.qemu_icount;
+#endif
 }
 
 /* Return the virtual CPU time, based on the instruction counter.  */
@@ -1179,6 +1210,41 @@ static void handle_icount_deadline(void)
     }
 }
 
+static void prepare_icount_for_run(CPUState *cpu)
+{
+    if (use_icount) {
+        int insns_left;
+
+        /* These should always be cleared by process_icount_data after
+         * each vCPU execution. However u16.high can be raised
+         * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
+         */
+        g_assert(cpu->icount_decr.u16.low == 0);
+        g_assert(cpu->icount_extra == 0);
+
+        cpu->icount_budget = tcg_get_icount_limit();
+        insns_left = MIN(0xffff, cpu->icount_budget);
+        cpu->icount_decr.u16.low = insns_left;
+        cpu->icount_extra = cpu->icount_budget - insns_left;
+    }
+}
+
+static void process_icount_data(CPUState *cpu)
+{
+    if (use_icount) {
+        /* Account for executed instructions */
+        cpu_update_icount(cpu);
+
+        /* Reset the counters */
+        cpu->icount_decr.u16.low = 0;
+        cpu->icount_extra = 0;
+        cpu->icount_budget = 0;
+
+        replay_account_executed_instructions();
+    }
+}
+
+
 static int tcg_cpu_exec(CPUState *cpu)
 {
     int ret;
@@ -1189,20 +1255,6 @@ static int tcg_cpu_exec(CPUState *cpu)
 #ifdef CONFIG_PROFILER
     ti = profile_getclock();
 #endif
-    if (use_icount) {
-        int64_t count;
-        int decr;
-        timers_state.qemu_icount -= (cpu->icount_decr.u16.low
-                                    + cpu->icount_extra);
-        cpu->icount_decr.u16.low = 0;
-        cpu->icount_extra = 0;
-        count = tcg_get_icount_limit();
-        timers_state.qemu_icount += count;
-        decr = (count > 0xffff) ? 0xffff : count;
-        count -= decr;
-        cpu->icount_decr.u16.low = decr;
-        cpu->icount_extra = count;
-    }
     qemu_mutex_unlock_iothread();
     cpu_exec_start(cpu);
     ret = cpu_exec(cpu);
@@ -1211,15 +1263,6 @@ static int tcg_cpu_exec(CPUState *cpu)
 #ifdef CONFIG_PROFILER
     tcg_time += profile_getclock() - ti;
 #endif
-    if (use_icount) {
-        /* Fold pending instructions back into the
-           instruction counter, and clear the interrupt flag.  */
-        timers_state.qemu_icount -= (cpu->icount_decr.u16.low
-                        + cpu->icount_extra);
-        cpu->icount_decr.u32 = 0;
-        cpu->icount_extra = 0;
-        replay_account_executed_instructions();
-    }
     return ret;
 }
 
@@ -1306,7 +1349,13 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
 
             if (cpu_can_run(cpu)) {
                 int r;
+
+                prepare_icount_for_run(cpu);
+
                 r = tcg_cpu_exec(cpu);
+
+                process_icount_data(cpu);
+
                 if (r == EXCP_DEBUG) {
                     cpu_handle_guest_debug(cpu);
                     break;
@@ -1392,6 +1441,8 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
 {
     CPUState *cpu = arg;
 
+    g_assert(!use_icount);
+
     rcu_register_thread();
 
     qemu_mutex_lock_iothread();
@@ -1434,8 +1485,6 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
             }
         }
 
-        handle_icount_deadline();
-
         atomic_mb_set(&cpu->exit_request, 0);
         qemu_tcg_wait_io_event(cpu);
     }
diff --git a/crypto/block-luks.c b/crypto/block-luks.c
index 4530f8241c..d5a31bbaeb 100644
--- a/crypto/block-luks.c
+++ b/crypto/block-luks.c
@@ -473,10 +473,10 @@ qcrypto_block_luks_load_key(QCryptoBlock *block,
      * then encrypted.
      */
     rv = readfunc(block,
+                  opaque,
                   slot->key_offset * QCRYPTO_BLOCK_LUKS_SECTOR_SIZE,
                   splitkey, splitkeylen,
-                  errp,
-                  opaque);
+                  errp);
     if (rv < 0) {
         goto cleanup;
     }
@@ -676,11 +676,10 @@ qcrypto_block_luks_open(QCryptoBlock *block,
 
     /* Read the entire LUKS header, minus the key material from
      * the underlying device */
-    rv = readfunc(block, 0,
+    rv = readfunc(block, opaque, 0,
                   (uint8_t *)&luks->header,
                   sizeof(luks->header),
-                  errp,
-                  opaque);
+                  errp);
     if (rv < 0) {
         ret = rv;
         goto fail;
@@ -1246,7 +1245,7 @@ qcrypto_block_luks_create(QCryptoBlock *block,
         QCRYPTO_BLOCK_LUKS_SECTOR_SIZE;
 
     /* Reserve header space to match payload offset */
-    initfunc(block, block->payload_offset, &local_err, opaque);
+    initfunc(block, opaque, block->payload_offset, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         goto error;
@@ -1268,11 +1267,10 @@ qcrypto_block_luks_create(QCryptoBlock *block,
 
 
     /* Write out the partition header and key slot headers */
-    writefunc(block, 0,
+    writefunc(block, opaque, 0,
               (const uint8_t *)&luks->header,
               sizeof(luks->header),
-              &local_err,
-              opaque);
+              &local_err);
 
     /* Delay checking local_err until we've byte-swapped */
 
@@ -1297,12 +1295,11 @@ qcrypto_block_luks_create(QCryptoBlock *block,
 
     /* Write out the master key material, starting at the
      * sector immediately following the partition header. */
-    if (writefunc(block,
+    if (writefunc(block, opaque,
                   luks->header.key_slots[0].key_offset *
                   QCRYPTO_BLOCK_LUKS_SECTOR_SIZE,
                   splitkey, splitkeylen,
-                  errp,
-                  opaque) != splitkeylen) {
+                  errp) != splitkeylen) {
         goto error;
     }
 
diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak
index 1e3bd2b8ca..78d7af03a2 100644
--- a/default-configs/arm-softmmu.mak
+++ b/default-configs/arm-softmmu.mak
@@ -29,6 +29,7 @@ CONFIG_LAN9118=y
 CONFIG_SMC91C111=y
 CONFIG_ALLWINNER_EMAC=y
 CONFIG_IMX_FEC=y
+CONFIG_FTGMAC100=y
 CONFIG_DS1338=y
 CONFIG_PFLASH_CFI01=y
 CONFIG_PFLASH_CFI02=y
diff --git a/default-configs/ppc-softmmu.mak b/default-configs/ppc-softmmu.mak
index 09c1d45633..1f1cd85b12 100644
--- a/default-configs/ppc-softmmu.mak
+++ b/default-configs/ppc-softmmu.mak
@@ -45,6 +45,7 @@ CONFIG_OPENPIC_KVM=$(and $(CONFIG_E500),$(CONFIG_KVM))
 CONFIG_PLATFORM_BUS=y
 CONFIG_ETSEC=y
 CONFIG_LIBDECNUMBER=y
+CONFIG_SM501=y
 # For PReP
 CONFIG_SERIAL_ISA=y
 CONFIG_MC146818RTC=y
diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
index 05c83356e1..f6ccb1bd86 100644
--- a/default-configs/ppc64-softmmu.mak
+++ b/default-configs/ppc64-softmmu.mak
@@ -47,6 +47,7 @@ CONFIG_OPENPIC_KVM=$(and $(CONFIG_E500),$(CONFIG_KVM))
 CONFIG_PLATFORM_BUS=y
 CONFIG_ETSEC=y
 CONFIG_LIBDECNUMBER=y
+CONFIG_SM501=y
 # For pSeries
 CONFIG_XICS=$(CONFIG_PSERIES)
 CONFIG_XICS_SPAPR=$(CONFIG_PSERIES)
diff --git a/default-configs/ppcemb-softmmu.mak b/default-configs/ppcemb-softmmu.mak
index 7f56004cda..94340de356 100644
--- a/default-configs/ppcemb-softmmu.mak
+++ b/default-configs/ppcemb-softmmu.mak
@@ -15,3 +15,4 @@ CONFIG_I8259=y
 CONFIG_XILINX=y
 CONFIG_XILINX_ETHLITE=y
 CONFIG_LIBDECNUMBER=y
+CONFIG_SM501=y
diff --git a/disas/cris.c b/disas/cris.c
index 30217f17f9..2dd56deea4 100644
--- a/disas/cris.c
+++ b/disas/cris.c
@@ -2048,7 +2048,7 @@ print_with_operands (const struct cris_opcode *opcodep,
 	  {
 	    /* We're looking at [pc+], i.e. we need to output an immediate
 	       number, where the size can depend on different things.  */
-	    long number;
+	    int32_t number;
 	    int signedp
 	      = ((*cs == 'z' && (insn & 0x20))
 		 || opcodep->match == BDAP_QUICK_OPCODE);
@@ -2290,7 +2290,7 @@ print_with_operands (const struct cris_opcode *opcodep,
 
 		    if ((prefix_insn & 0x400) && (prefix_insn & 15) == 15)
 		      {
-			long number;
+			int32_t number;
 			unsigned int nbytes;
 
 			/* It's a value.  Get its size.  */
diff --git a/disas/microblaze.c b/disas/microblaze.c
index 407c0a3ffa..7795a0bdb9 100644
--- a/disas/microblaze.c
+++ b/disas/microblaze.c
@@ -159,7 +159,7 @@ enum microblaze_instr_type {
 #define MIN_PVR_REGNUM 0
 #define MAX_PVR_REGNUM 15
 
-#define REG_PC  32 /* PC */
+/* 32 is REG_PC */
 #define REG_MSR 33 /* machine status reg */
 #define REG_EAR 35 /* Exception reg */
 #define REG_ESR 37 /* Exception reg */
diff --git a/docs/tracing.txt b/docs/tracing.txt
index e14bb6dccc..8c0029beca 100644
--- a/docs/tracing.txt
+++ b/docs/tracing.txt
@@ -405,6 +405,9 @@ information. If used together with the "tcg" property, it adds a second
 "TCGv_env" argument that must point to the per-target global TCG register that
 points to the vCPU when guest code is executed (usually the "cpu_env" variable).
 
+The "tcg" and "vcpu" properties are currently only honored in the root
+./trace-events file.
+
 The following example events:
 
     foo(uint32_t a) "a=%x"
diff --git a/exec.c b/exec.c
index e57a8a2178..eac6085760 100644
--- a/exec.c
+++ b/exec.c
@@ -223,6 +223,12 @@ struct CPUAddressSpace {
     MemoryListener tcg_as_listener;
 };
 
+struct DirtyBitmapSnapshot {
+    ram_addr_t start;
+    ram_addr_t end;
+    unsigned long dirty[];
+};
+
 #endif
 
 #if !defined(CONFIG_USER_ONLY)
@@ -1061,6 +1067,75 @@ bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
     return dirty;
 }
 
+DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty
+     (ram_addr_t start, ram_addr_t length, unsigned client)
+{
+    DirtyMemoryBlocks *blocks;
+    unsigned long align = 1UL << (TARGET_PAGE_BITS + BITS_PER_LEVEL);
+    ram_addr_t first = QEMU_ALIGN_DOWN(start, align);
+    ram_addr_t last  = QEMU_ALIGN_UP(start + length, align);
+    DirtyBitmapSnapshot *snap;
+    unsigned long page, end, dest;
+
+    snap = g_malloc0(sizeof(*snap) +
+                     ((last - first) >> (TARGET_PAGE_BITS + 3)));
+    snap->start = first;
+    snap->end   = last;
+
+    page = first >> TARGET_PAGE_BITS;
+    end  = last  >> TARGET_PAGE_BITS;
+    dest = 0;
+
+    rcu_read_lock();
+
+    blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
+
+    while (page < end) {
+        unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
+        unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
+        unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
+
+        assert(QEMU_IS_ALIGNED(offset, (1 << BITS_PER_LEVEL)));
+        assert(QEMU_IS_ALIGNED(num,    (1 << BITS_PER_LEVEL)));
+        offset >>= BITS_PER_LEVEL;
+
+        bitmap_copy_and_clear_atomic(snap->dirty + dest,
+                                     blocks->blocks[idx] + offset,
+                                     num);
+        page += num;
+        dest += num >> BITS_PER_LEVEL;
+    }
+
+    rcu_read_unlock();
+
+    if (tcg_enabled()) {
+        tlb_reset_dirty_range_all(start, length);
+    }
+
+    return snap;
+}
+
+bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap,
+                                            ram_addr_t start,
+                                            ram_addr_t length)
+{
+    unsigned long page, end;
+
+    assert(start >= snap->start);
+    assert(start + length <= snap->end);
+
+    end = TARGET_PAGE_ALIGN(start + length - snap->start) >> TARGET_PAGE_BITS;
+    page = (start - snap->start) >> TARGET_PAGE_BITS;
+
+    while (page < end) {
+        if (test_bit(page, snap->dirty)) {
+            return true;
+        }
+        page++;
+    }
+    return false;
+}
+
 /* Called from RCU critical section */
 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
                                        MemoryRegionSection *section,
@@ -1528,7 +1603,7 @@ static ram_addr_t find_ram_offset(ram_addr_t size)
     return offset;
 }
 
-ram_addr_t last_ram_offset(void)
+unsigned long last_ram_page(void)
 {
     RAMBlock *block;
     ram_addr_t last = 0;
@@ -1538,7 +1613,7 @@ ram_addr_t last_ram_offset(void)
         last = MAX(last, block->offset + block->max_length);
     }
     rcu_read_unlock();
-    return last;
+    return last >> TARGET_PAGE_BITS;
 }
 
 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
@@ -1727,7 +1802,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
     ram_addr_t old_ram_size, new_ram_size;
     Error *err = NULL;
 
-    old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
+    old_ram_size = last_ram_page();
 
     qemu_mutex_lock_ramlist();
     new_block->offset = find_ram_offset(new_block->max_length);
@@ -1758,7 +1833,6 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
     new_ram_size = MAX(old_ram_size,
               (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
     if (new_ram_size > old_ram_size) {
-        migration_bitmap_extend(old_ram_size, new_ram_size);
         dirty_memory_extend(old_ram_size, new_ram_size);
     }
     /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
@@ -3236,75 +3310,33 @@ int64_t address_space_cache_init(MemoryRegionCache *cache,
                                  hwaddr len,
                                  bool is_write)
 {
-    hwaddr l, xlat;
-    MemoryRegion *mr;
-    void *ptr;
-
-    assert(len > 0);
-
-    l = len;
-    mr = address_space_translate(as, addr, &xlat, &l, is_write);
-    if (!memory_access_is_direct(mr, is_write)) {
-        return -EINVAL;
-    }
-
-    l = address_space_extend_translation(as, addr, len, mr, xlat, l, is_write);
-    ptr = qemu_ram_ptr_length(mr->ram_block, xlat, &l);
-
-    cache->xlat = xlat;
-    cache->is_write = is_write;
-    cache->mr = mr;
-    cache->ptr = ptr;
-    cache->len = l;
-    memory_region_ref(cache->mr);
-
-    return l;
+    cache->len = len;
+    cache->as = as;
+    cache->xlat = addr;
+    return len;
 }
 
 void address_space_cache_invalidate(MemoryRegionCache *cache,
                                     hwaddr addr,
                                     hwaddr access_len)
 {
-    assert(cache->is_write);
-    invalidate_and_set_dirty(cache->mr, addr + cache->xlat, access_len);
 }
 
 void address_space_cache_destroy(MemoryRegionCache *cache)
 {
-    if (!cache->mr) {
-        return;
-    }
-
-    if (xen_enabled()) {
-        xen_invalidate_map_cache_entry(cache->ptr);
-    }
-    memory_region_unref(cache->mr);
-    cache->mr = NULL;
-}
-
-/* Called from RCU critical section.  This function has the same
- * semantics as address_space_translate, but it only works on a
- * predefined range of a MemoryRegion that was mapped with
- * address_space_cache_init.
- */
-static inline MemoryRegion *address_space_translate_cached(
-    MemoryRegionCache *cache, hwaddr addr, hwaddr *xlat,
-    hwaddr *plen, bool is_write)
-{
-    assert(addr < cache->len && *plen <= cache->len - addr);
-    *xlat = addr + cache->xlat;
-    return cache->mr;
+    cache->as = NULL;
 }
 
 #define ARG1_DECL                MemoryRegionCache *cache
 #define ARG1                     cache
 #define SUFFIX                   _cached
-#define TRANSLATE(...)           address_space_translate_cached(cache, __VA_ARGS__)
+#define TRANSLATE(addr, ...)     \
+    address_space_translate(cache->as, cache->xlat + (addr), __VA_ARGS__)
 #define IS_DIRECT(mr, is_write)  true
-#define MAP_RAM(mr, ofs)         (cache->ptr + (ofs - cache->xlat))
-#define INVALIDATE(mr, ofs, len) ((void)0)
-#define RCU_READ_LOCK()          ((void)0)
-#define RCU_READ_UNLOCK()        ((void)0)
+#define MAP_RAM(mr, ofs)         qemu_map_ram_ptr((mr)->ram_block, ofs)
+#define INVALIDATE(mr, ofs, len) invalidate_and_set_dirty(mr, ofs, len)
+#define RCU_READ_LOCK()          rcu_read_lock()
+#define RCU_READ_UNLOCK()        rcu_read_unlock()
 #include "memory_ldst.inc.c"
 
 /* virtual memory access for debug (includes writing to ROM) */
@@ -3349,9 +3381,9 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
  * Allows code that needs to deal with migration bitmaps etc to still be built
  * target independent.
  */
-size_t qemu_target_page_bits(void)
+size_t qemu_target_page_size(void)
 {
-    return TARGET_PAGE_BITS;
+    return TARGET_PAGE_SIZE;
 }
 
 #endif
diff --git a/hmp.c b/hmp.c
index edb8970461..ab407d6fd0 100644
--- a/hmp.c
+++ b/hmp.c
@@ -215,6 +215,9 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
                        info->ram->normal_bytes >> 10);
         monitor_printf(mon, "dirty sync count: %" PRIu64 "\n",
                        info->ram->dirty_sync_count);
+        monitor_printf(mon, "page size: %" PRIu64 " kbytes\n",
+                       info->ram->page_size >> 10);
+
         if (info->ram->dirty_pages_rate) {
             monitor_printf(mon, "dirty pages rate: %" PRIu64 " pages\n",
                            info->ram->dirty_pages_rate);
@@ -265,13 +268,11 @@ void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict)
     caps = qmp_query_migrate_capabilities(NULL);
 
     if (caps) {
-        monitor_printf(mon, "capabilities: ");
         for (cap = caps; cap; cap = cap->next) {
-            monitor_printf(mon, "%s: %s ",
+            monitor_printf(mon, "%s: %s\n",
                            MigrationCapability_lookup[cap->value->capability],
                            cap->value->state ? "on" : "off");
         }
-        monitor_printf(mon, "\n");
     }
 
     qapi_free_MigrationCapabilityStatusList(caps);
@@ -284,46 +285,44 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict)
     params = qmp_query_migrate_parameters(NULL);
 
     if (params) {
-        monitor_printf(mon, "parameters:");
         assert(params->has_compress_level);
-        monitor_printf(mon, " %s: %" PRId64,
+        monitor_printf(mon, "%s: %" PRId64 "\n",
             MigrationParameter_lookup[MIGRATION_PARAMETER_COMPRESS_LEVEL],
             params->compress_level);
         assert(params->has_compress_threads);
-        monitor_printf(mon, " %s: %" PRId64,
+        monitor_printf(mon, "%s: %" PRId64 "\n",
             MigrationParameter_lookup[MIGRATION_PARAMETER_COMPRESS_THREADS],
             params->compress_threads);
         assert(params->has_decompress_threads);
-        monitor_printf(mon, " %s: %" PRId64,
+        monitor_printf(mon, "%s: %" PRId64 "\n",
             MigrationParameter_lookup[MIGRATION_PARAMETER_DECOMPRESS_THREADS],
             params->decompress_threads);
         assert(params->has_cpu_throttle_initial);
-        monitor_printf(mon, " %s: %" PRId64,
+        monitor_printf(mon, "%s: %" PRId64 "\n",
             MigrationParameter_lookup[MIGRATION_PARAMETER_CPU_THROTTLE_INITIAL],
             params->cpu_throttle_initial);
         assert(params->has_cpu_throttle_increment);
-        monitor_printf(mon, " %s: %" PRId64,
+        monitor_printf(mon, "%s: %" PRId64 "\n",
             MigrationParameter_lookup[MIGRATION_PARAMETER_CPU_THROTTLE_INCREMENT],
             params->cpu_throttle_increment);
-        monitor_printf(mon, " %s: '%s'",
+        monitor_printf(mon, "%s: '%s'\n",
             MigrationParameter_lookup[MIGRATION_PARAMETER_TLS_CREDS],
             params->has_tls_creds ? params->tls_creds : "");
-        monitor_printf(mon, " %s: '%s'",
+        monitor_printf(mon, "%s: '%s'\n",
             MigrationParameter_lookup[MIGRATION_PARAMETER_TLS_HOSTNAME],
             params->has_tls_hostname ? params->tls_hostname : "");
         assert(params->has_max_bandwidth);
-        monitor_printf(mon, " %s: %" PRId64 " bytes/second",
+        monitor_printf(mon, "%s: %" PRId64 " bytes/second\n",
             MigrationParameter_lookup[MIGRATION_PARAMETER_MAX_BANDWIDTH],
             params->max_bandwidth);
         assert(params->has_downtime_limit);
-        monitor_printf(mon, " %s: %" PRId64 " milliseconds",
+        monitor_printf(mon, "%s: %" PRId64 " milliseconds\n",
             MigrationParameter_lookup[MIGRATION_PARAMETER_DOWNTIME_LIMIT],
             params->downtime_limit);
         assert(params->has_x_checkpoint_delay);
-        monitor_printf(mon, " %s: %" PRId64,
+        monitor_printf(mon, "%s: %" PRId64 "\n",
             MigrationParameter_lookup[MIGRATION_PARAMETER_X_CHECKPOINT_DELAY],
             params->x_checkpoint_delay);
-        monitor_printf(mon, "\n");
     }
 
     qapi_free_MigrationParameters(params);
diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
index 45e9a1f9b0..f3ebca4f7a 100644
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@@ -1098,8 +1098,13 @@ static int local_name_to_path(FsContext *ctx, V9fsPath *dir_path,
 {
     if (dir_path) {
         v9fs_path_sprintf(target, "%s/%s", dir_path->data, name);
-    } else {
+    } else if (strcmp(name, "/")) {
         v9fs_path_sprintf(target, "%s", name);
+    } else {
+        /* We want the path of the export root to be relative, otherwise
+         * "*at()" syscalls would treat it as "/" in the host.
+         */
+        v9fs_path_sprintf(target, "%s", ".");
     }
     return 0;
 }
diff --git a/hw/9pfs/9p-xattr.c b/hw/9pfs/9p-xattr.c
index eec160b3c2..d05c1a1c1d 100644
--- a/hw/9pfs/9p-xattr.c
+++ b/hw/9pfs/9p-xattr.c
@@ -108,6 +108,7 @@ ssize_t v9fs_list_xattr(FsContext *ctx, const char *path,
     g_free(name);
     close_preserve_errno(dirfd);
     if (xattr_len < 0) {
+        g_free(orig_value);
         return -1;
     }
 
diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index b8c0b99358..c80ba67389 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -539,14 +539,15 @@ static void coroutine_fn virtfs_reset(V9fsPDU *pdu)
 
     /* Free all fids */
     while (s->fid_list) {
+        /* Get fid */
         fidp = s->fid_list;
+        fidp->ref++;
+
+        /* Clunk fid */
         s->fid_list = fidp->next;
+        fidp->clunked = 1;
 
-        if (fidp->ref) {
-            fidp->clunked = 1;
-        } else {
-            free_fid(pdu, fidp);
-        }
+        put_fid(pdu, fidp);
     }
 }
 
@@ -1550,6 +1551,10 @@ static void coroutine_fn v9fs_lcreate(void *opaque)
         err = -ENOENT;
         goto out_nofid;
     }
+    if (fidp->fid_type != P9_FID_NONE) {
+        err = -EINVAL;
+        goto out;
+    }
 
     flags = get_dotl_openflags(pdu->s, flags);
     err = v9fs_co_open2(pdu, fidp, &name, gid,
@@ -2153,6 +2158,10 @@ static void coroutine_fn v9fs_create(void *opaque)
         err = -EINVAL;
         goto out_nofid;
     }
+    if (fidp->fid_type != P9_FID_NONE) {
+        err = -EINVAL;
+        goto out;
+    }
     if (perm & P9_STAT_MODE_DIR) {
         err = v9fs_co_mkdir(pdu, fidp, &name, perm & 0777,
                             fidp->uid, -1, &stbuf);
@@ -2379,8 +2388,10 @@ static void coroutine_fn v9fs_flush(void *opaque)
          * Wait for pdu to complete.
          */
         qemu_co_queue_wait(&cancel_pdu->complete, NULL);
-        cancel_pdu->cancelled = 0;
-        pdu_free(cancel_pdu);
+        if (!qemu_co_queue_next(&cancel_pdu->complete)) {
+            cancel_pdu->cancelled = 0;
+            pdu_free(cancel_pdu);
+        }
     }
     pdu_complete(pdu, 7);
 }
diff --git a/hw/acpi/tco.c b/hw/acpi/tco.c
index b4adac88cd..05b9d7ba36 100644
--- a/hw/acpi/tco.c
+++ b/hw/acpi/tco.c
@@ -75,8 +75,6 @@ static void tco_timer_expired(void *opaque)
 
     if (pm->smi_en & ICH9_PMIO_SMI_EN_TCO_EN) {
         ich9_generate_smi();
-    } else {
-        ich9_generate_nmi();
     }
     tr->tco.rld = tr->tco.tmr;
     tco_timer_reload(tr);
diff --git a/hw/acpi/vmgenid.c b/hw/acpi/vmgenid.c
index 7a3ad17d66..a32b847fe0 100644
--- a/hw/acpi/vmgenid.c
+++ b/hw/acpi/vmgenid.c
@@ -205,9 +205,30 @@ static void vmgenid_handle_reset(void *opaque)
     memset(vms->vmgenid_addr_le, 0, ARRAY_SIZE(vms->vmgenid_addr_le));
 }
 
+static Property vmgenid_properties[] = {
+    DEFINE_PROP_BOOL("x-write-pointer-available", VmGenIdState,
+                     write_pointer_available, true),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static void vmgenid_realize(DeviceState *dev, Error **errp)
 {
     VmGenIdState *vms = VMGENID(dev);
+
+    if (!vms->write_pointer_available) {
+        error_setg(errp, "%s requires DMA write support in fw_cfg, "
+                   "which this machine type does not provide", VMGENID_DEVICE);
+        return;
+    }
+
+    /* Given that this function is executing, there is at least one VMGENID
+     * device. Check if there are several.
+     */
+    if (!find_vmgenid_dev()) {
+        error_setg(errp, "at most one %s device is permitted", VMGENID_DEVICE);
+        return;
+    }
+
     qemu_register_reset(vmgenid_handle_reset, vms);
 }
 
@@ -218,6 +239,7 @@ static void vmgenid_device_class_init(ObjectClass *klass, void *data)
     dc->vmsd = &vmstate_vmgenid;
     dc->realize = vmgenid_realize;
     dc->hotpluggable = false;
+    dc->props = vmgenid_properties;
 
     object_class_property_add_str(klass, VMGENID_GUID, NULL,
                                   vmgenid_set_guid, NULL);
diff --git a/hw/arm/allwinner-a10.c b/hw/arm/allwinner-a10.c
index ca15d1c8cc..f62a9a3541 100644
--- a/hw/arm/allwinner-a10.c
+++ b/hw/arm/allwinner-a10.c
@@ -118,12 +118,6 @@ static void aw_a10_class_init(ObjectClass *oc, void *data)
     DeviceClass *dc = DEVICE_CLASS(oc);
 
     dc->realize = aw_a10_realize;
-
-    /*
-     * Reason: creates an ARM CPU, thus use after free(), see
-     * arm_cpu_class_init()
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static const TypeInfo aw_a10_type_info = {
diff --git a/hw/arm/aspeed_soc.c b/hw/arm/aspeed_soc.c
index 571e4f097b..4937e2bc83 100644
--- a/hw/arm/aspeed_soc.c
+++ b/hw/arm/aspeed_soc.c
@@ -19,6 +19,7 @@
 #include "hw/char/serial.h"
 #include "qemu/log.h"
 #include "hw/i2c/aspeed_i2c.h"
+#include "net/net.h"
 
 #define ASPEED_SOC_UART_5_BASE      0x00184000
 #define ASPEED_SOC_IOMEM_SIZE       0x00200000
@@ -33,6 +34,8 @@
 #define ASPEED_SOC_TIMER_BASE       0x1E782000
 #define ASPEED_SOC_WDT_BASE         0x1E785000
 #define ASPEED_SOC_I2C_BASE         0x1E78A000
+#define ASPEED_SOC_ETH1_BASE        0x1E660000
+#define ASPEED_SOC_ETH2_BASE        0x1E680000
 
 static const int uart_irqs[] = { 9, 32, 33, 34, 10 };
 static const int timer_irqs[] = { 16, 17, 18, 35, 36, 37, 38, 39, };
@@ -175,6 +178,10 @@ static void aspeed_soc_init(Object *obj)
     object_initialize(&s->wdt, sizeof(s->wdt), TYPE_ASPEED_WDT);
     object_property_add_child(obj, "wdt", OBJECT(&s->wdt), NULL);
     qdev_set_parent_bus(DEVICE(&s->wdt), sysbus_get_default());
+
+    object_initialize(&s->ftgmac100, sizeof(s->ftgmac100), TYPE_FTGMAC100);
+    object_property_add_child(obj, "ftgmac100", OBJECT(&s->ftgmac100), NULL);
+    qdev_set_parent_bus(DEVICE(&s->ftgmac100), sysbus_get_default());
 }
 
 static void aspeed_soc_realize(DeviceState *dev, Error **errp)
@@ -299,6 +306,20 @@ static void aspeed_soc_realize(DeviceState *dev, Error **errp)
         return;
     }
     sysbus_mmio_map(SYS_BUS_DEVICE(&s->wdt), 0, ASPEED_SOC_WDT_BASE);
+
+    /* Net */
+    qdev_set_nic_properties(DEVICE(&s->ftgmac100), &nd_table[0]);
+    object_property_set_bool(OBJECT(&s->ftgmac100), true, "aspeed", &err);
+    object_property_set_bool(OBJECT(&s->ftgmac100), true, "realized",
+                             &local_err);
+    error_propagate(&err, local_err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->ftgmac100), 0, ASPEED_SOC_ETH1_BASE);
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->ftgmac100), 0,
+                       qdev_get_gpio_in(DEVICE(&s->vic), 2));
 }
 
 static void aspeed_soc_class_init(ObjectClass *oc, void *data)
diff --git a/hw/arm/bcm2836.c b/hw/arm/bcm2836.c
index 8451190a19..8c43291112 100644
--- a/hw/arm/bcm2836.c
+++ b/hw/arm/bcm2836.c
@@ -160,12 +160,6 @@ static void bcm2836_class_init(ObjectClass *oc, void *data)
 
     dc->props = bcm2836_props;
     dc->realize = bcm2836_realize;
-
-    /*
-     * Reason: creates an ARM CPU, thus use after free(), see
-     * arm_cpu_class_init()
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static const TypeInfo bcm2836_type_info = {
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index ff621e4b6a..c2720c8046 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -31,6 +31,9 @@
 #define KERNEL_LOAD_ADDR 0x00010000
 #define KERNEL64_LOAD_ADDR 0x00080000
 
+#define ARM64_TEXT_OFFSET_OFFSET    8
+#define ARM64_MAGIC_OFFSET          56
+
 typedef enum {
     FIXUP_NONE = 0,     /* do nothing */
     FIXUP_TERMINATOR,   /* end of insns */
@@ -768,6 +771,49 @@ static uint64_t arm_load_elf(struct arm_boot_info *info, uint64_t *pentry,
     return ret;
 }
 
+static uint64_t load_aarch64_image(const char *filename, hwaddr mem_base,
+                                   hwaddr *entry)
+{
+    hwaddr kernel_load_offset = KERNEL64_LOAD_ADDR;
+    uint8_t *buffer;
+    int size;
+
+    /* On aarch64, it's the bootloader's job to uncompress the kernel. */
+    size = load_image_gzipped_buffer(filename, LOAD_IMAGE_MAX_GUNZIP_BYTES,
+                                     &buffer);
+
+    if (size < 0) {
+        gsize len;
+
+        /* Load as raw file otherwise */
+        if (!g_file_get_contents(filename, (char **)&buffer, &len, NULL)) {
+            return -1;
+        }
+        size = len;
+    }
+
+    /* check the arm64 magic header value -- very old kernels may not have it */
+    if (memcmp(buffer + ARM64_MAGIC_OFFSET, "ARM\x64", 4) == 0) {
+        uint64_t hdrvals[2];
+
+        /* The arm64 Image header has text_offset and image_size fields at 8 and
+         * 16 bytes into the Image header, respectively. The text_offset field
+         * is only valid if the image_size is non-zero.
+         */
+        memcpy(&hdrvals, buffer + ARM64_TEXT_OFFSET_OFFSET, sizeof(hdrvals));
+        if (hdrvals[1] != 0) {
+            kernel_load_offset = le64_to_cpu(hdrvals[0]);
+        }
+    }
+
+    *entry = mem_base + kernel_load_offset;
+    rom_add_blob_fixed(filename, buffer, size, *entry);
+
+    g_free(buffer);
+
+    return size;
+}
+
 static void arm_load_kernel_notify(Notifier *notifier, void *data)
 {
     CPUState *cs;
@@ -776,7 +822,7 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data)
     int is_linux = 0;
     uint64_t elf_entry, elf_low_addr, elf_high_addr;
     int elf_machine;
-    hwaddr entry, kernel_load_offset;
+    hwaddr entry;
     static const ARMInsnFixup *primary_loader;
     ArmLoadKernelNotifier *n = DO_UPCAST(ArmLoadKernelNotifier,
                                          notifier, notifier);
@@ -841,14 +887,12 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data)
 
     if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
         primary_loader = bootloader_aarch64;
-        kernel_load_offset = KERNEL64_LOAD_ADDR;
         elf_machine = EM_AARCH64;
     } else {
         primary_loader = bootloader;
         if (!info->write_board_setup) {
             primary_loader += BOOTLOADER_NO_BOARD_SETUP_OFFSET;
         }
-        kernel_load_offset = KERNEL_LOAD_ADDR;
         elf_machine = EM_ARM;
     }
 
@@ -900,17 +944,15 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data)
         kernel_size = load_uimage(info->kernel_filename, &entry, NULL,
                                   &is_linux, NULL, NULL);
     }
-    /* On aarch64, it's the bootloader's job to uncompress the kernel. */
     if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64) && kernel_size < 0) {
-        entry = info->loader_start + kernel_load_offset;
-        kernel_size = load_image_gzipped(info->kernel_filename, entry,
-                                         info->ram_size - kernel_load_offset);
+        kernel_size = load_aarch64_image(info->kernel_filename,
+                                         info->loader_start, &entry);
         is_linux = 1;
-    }
-    if (kernel_size < 0) {
-        entry = info->loader_start + kernel_load_offset;
+    } else if (kernel_size < 0) {
+        /* 32-bit ARM */
+        entry = info->loader_start + KERNEL_LOAD_ADDR;
         kernel_size = load_image_targphys(info->kernel_filename, entry,
-                                          info->ram_size - kernel_load_offset);
+                                          info->ram_size - KERNEL_LOAD_ADDR);
         is_linux = 1;
     }
     if (kernel_size < 0) {
diff --git a/hw/arm/digic.c b/hw/arm/digic.c
index d60ea395f4..94f32637f0 100644
--- a/hw/arm/digic.c
+++ b/hw/arm/digic.c
@@ -101,12 +101,6 @@ static void digic_class_init(ObjectClass *oc, void *data)
     DeviceClass *dc = DEVICE_CLASS(oc);
 
     dc->realize = digic_realize;
-
-    /*
-     * Reason: creates an ARM CPU, thus use after free(), see
-     * arm_cpu_class_init()
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static const TypeInfo digic_type_info = {
diff --git a/hw/arm/exynos4210.c b/hw/arm/exynos4210.c
index 1d2b50cc4e..960f27e45a 100644
--- a/hw/arm/exynos4210.c
+++ b/hw/arm/exynos4210.c
@@ -32,6 +32,7 @@
 #include "hw/arm/arm.h"
 #include "hw/loader.h"
 #include "hw/arm/exynos4210.h"
+#include "hw/sd/sd.h"
 #include "hw/usb/hcd-ehci.h"
 
 #define EXYNOS4210_CHIPID_ADDR         0x10000000
@@ -72,6 +73,13 @@
 #define EXYNOS4210_EXT_COMBINER_BASE_ADDR   0x10440000
 #define EXYNOS4210_INT_COMBINER_BASE_ADDR   0x10448000
 
+/* SD/MMC host controllers */
+#define EXYNOS4210_SDHCI_CAPABILITIES       0x05E80080
+#define EXYNOS4210_SDHCI_BASE_ADDR          0x12510000
+#define EXYNOS4210_SDHCI_ADDR(n)            (EXYNOS4210_SDHCI_BASE_ADDR + \
+                                                0x00010000 * (n))
+#define EXYNOS4210_SDHCI_NUMBER             4
+
 /* PMU SFR base address */
 #define EXYNOS4210_PMU_BASE_ADDR            0x10020000
 
@@ -382,6 +390,27 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem,
                            EXYNOS4210_UART3_FIFO_SIZE, 3, NULL,
                   s->irq_table[exynos4210_get_irq(EXYNOS4210_UART_INT_GRP, 3)]);
 
+    /*** SD/MMC host controllers ***/
+    for (n = 0; n < EXYNOS4210_SDHCI_NUMBER; n++) {
+        DeviceState *carddev;
+        BlockBackend *blk;
+        DriveInfo *di;
+
+        dev = qdev_create(NULL, "generic-sdhci");
+        qdev_prop_set_uint32(dev, "capareg", EXYNOS4210_SDHCI_CAPABILITIES);
+        qdev_init_nofail(dev);
+
+        busdev = SYS_BUS_DEVICE(dev);
+        sysbus_mmio_map(busdev, 0, EXYNOS4210_SDHCI_ADDR(n));
+        sysbus_connect_irq(busdev, 0, s->irq_table[exynos4210_get_irq(29, n)]);
+
+        di = drive_get(IF_SD, 0, n);
+        blk = di ? blk_by_legacy_dinfo(di) : NULL;
+        carddev = qdev_create(qdev_get_child_bus(dev, "sd-bus"), TYPE_SD_CARD);
+        qdev_prop_set_drive(carddev, "drive", blk, &error_abort);
+        qdev_init_nofail(carddev);
+    }
+
     /*** Display controller (FIMD) ***/
     sysbus_create_varargs("exynos4210.fimd", EXYNOS4210_FIMD0_BASE_ADDR,
             s->irq_table[exynos4210_get_irq(11, 0)],
diff --git a/hw/arm/exynos4_boards.c b/hw/arm/exynos4_boards.c
index 0efa194054..4853c31802 100644
--- a/hw/arm/exynos4_boards.c
+++ b/hw/arm/exynos4_boards.c
@@ -22,6 +22,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "qemu-common.h"
 #include "cpu.h"
 #include "sysemu/sysemu.h"
@@ -101,9 +102,9 @@ static Exynos4210State *exynos4_boards_init_common(MachineState *machine,
     MachineClass *mc = MACHINE_GET_CLASS(machine);
 
     if (smp_cpus != EXYNOS4210_NCPUS && !qtest_enabled()) {
-        fprintf(stderr, "%s board supports only %d CPU cores. Ignoring smp_cpus"
-                " value.\n",
-                mc->name, EXYNOS4210_NCPUS);
+        error_report("%s board supports only %d CPU cores, ignoring smp_cpus"
+                     " value",
+                     mc->name, EXYNOS4210_NCPUS);
     }
 
     exynos4_board_binfo.ram_size = exynos4_board_ram_size[board_type];
diff --git a/hw/arm/fsl-imx25.c b/hw/arm/fsl-imx25.c
index 2126f73ca0..9056f27bf8 100644
--- a/hw/arm/fsl-imx25.c
+++ b/hw/arm/fsl-imx25.c
@@ -290,11 +290,6 @@ static void fsl_imx25_class_init(ObjectClass *oc, void *data)
 
     dc->realize = fsl_imx25_realize;
 
-    /*
-     * Reason: creates an ARM CPU, thus use after free(), see
-     * arm_cpu_class_init()
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
     dc->desc = "i.MX25 SOC";
 }
 
diff --git a/hw/arm/fsl-imx31.c b/hw/arm/fsl-imx31.c
index dd1c713ae3..d7e2d832b2 100644
--- a/hw/arm/fsl-imx31.c
+++ b/hw/arm/fsl-imx31.c
@@ -262,11 +262,6 @@ static void fsl_imx31_class_init(ObjectClass *oc, void *data)
 
     dc->realize = fsl_imx31_realize;
 
-    /*
-     * Reason: creates an ARM CPU, thus use after free(), see
-     * arm_cpu_class_init()
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
     dc->desc = "i.MX31 SOC";
 }
 
diff --git a/hw/arm/fsl-imx6.c b/hw/arm/fsl-imx6.c
index 76dd8a48ca..6969e734ad 100644
--- a/hw/arm/fsl-imx6.c
+++ b/hw/arm/fsl-imx6.c
@@ -442,11 +442,6 @@ static void fsl_imx6_class_init(ObjectClass *oc, void *data)
 
     dc->realize = fsl_imx6_realize;
 
-    /*
-     * Reason: creates an ARM CPU, thus use after free(), see
-     * arm_cpu_class_init()
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
     dc->desc = "i.MX6 SOC";
 }
 
diff --git a/hw/arm/pxa2xx.c b/hw/arm/pxa2xx.c
index cfee3929d9..eea551dc16 100644
--- a/hw/arm/pxa2xx.c
+++ b/hw/arm/pxa2xx.c
@@ -755,19 +755,18 @@ static void pxa2xx_ssp_reset(DeviceState *d)
     s->rx_start = s->rx_level = 0;
 }
 
-static int pxa2xx_ssp_init(SysBusDevice *sbd)
+static void pxa2xx_ssp_init(Object *obj)
 {
-    DeviceState *dev = DEVICE(sbd);
-    PXA2xxSSPState *s = PXA2XX_SSP(dev);
-
+    DeviceState *dev = DEVICE(obj);
+    PXA2xxSSPState *s = PXA2XX_SSP(obj);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
     sysbus_init_irq(sbd, &s->irq);
 
-    memory_region_init_io(&s->iomem, OBJECT(s), &pxa2xx_ssp_ops, s,
+    memory_region_init_io(&s->iomem, obj, &pxa2xx_ssp_ops, s,
                           "pxa2xx-ssp", 0x1000);
     sysbus_init_mmio(sbd, &s->iomem);
 
     s->bus = ssi_create_bus(dev, "ssi");
-    return 0;
 }
 
 /* Real-Time Clock */
@@ -2321,10 +2320,8 @@ PXA2xxState *pxa255_init(MemoryRegion *address_space, unsigned int sdram_size)
 
 static void pxa2xx_ssp_class_init(ObjectClass *klass, void *data)
 {
-    SysBusDeviceClass *sdc = SYS_BUS_DEVICE_CLASS(klass);
     DeviceClass *dc = DEVICE_CLASS(klass);
 
-    sdc->init = pxa2xx_ssp_init;
     dc->reset = pxa2xx_ssp_reset;
     dc->vmsd = &vmstate_pxa2xx_ssp;
 }
@@ -2333,6 +2330,7 @@ static const TypeInfo pxa2xx_ssp_info = {
     .name          = TYPE_PXA2XX_SSP,
     .parent        = TYPE_SYS_BUS_DEVICE,
     .instance_size = sizeof(PXA2xxSSPState),
+    .instance_init = pxa2xx_ssp_init,
     .class_init    = pxa2xx_ssp_class_init,
 };
 
diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c
index 9edcd49740..ea7a8094e1 100644
--- a/hw/arm/stellaris.c
+++ b/hw/arm/stellaris.c
@@ -108,7 +108,10 @@ static void gptm_reload(gptm_state *s, int n, int reset)
     } else if (s->mode[n] == 0xa) {
         /* PWM mode.  Not implemented.  */
     } else {
-        hw_error("TODO: 16-bit timer mode 0x%x\n", s->mode[n]);
+        qemu_log_mask(LOG_UNIMP,
+                      "GPTM: 16-bit timer mode unimplemented: 0x%x\n",
+                      s->mode[n]);
+        return;
     }
     s->tick[n] = tick;
     timer_mod(s->timer[n], tick);
@@ -149,7 +152,9 @@ static void gptm_tick(void *opaque)
     } else if (s->mode[n] == 0xa) {
         /* PWM mode.  Not implemented.  */
     } else {
-        hw_error("TODO: 16-bit timer mode 0x%x\n", s->mode[n]);
+        qemu_log_mask(LOG_UNIMP,
+                      "GPTM: 16-bit timer mode unimplemented: 0x%x\n",
+                      s->mode[n]);
     }
     gptm_update_irq(s);
 }
@@ -286,7 +291,8 @@ static void gptm_write(void *opaque, hwaddr offset,
         s->match_prescale[0] = value;
         break;
     default:
-        hw_error("gptm_write: Bad offset 0x%x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "GPTM: read at bad offset 0x%x\n", (int)offset);
     }
     gptm_update_irq(s);
 }
@@ -425,7 +431,10 @@ static int ssys_board_class(const ssys_state *s)
         }
         /* for unknown classes, fall through */
     default:
-        hw_error("ssys_board_class: Unknown class 0x%08x\n", did0);
+        /* This can only happen if the hardwired constant did0 value
+         * in this board's stellaris_board_info struct is wrong.
+         */
+        g_assert_not_reached();
     }
 }
 
@@ -479,8 +488,7 @@ static uint64_t ssys_read(void *opaque, hwaddr offset,
             case DID0_CLASS_SANDSTORM:
                 return pllcfg_sandstorm[xtal];
             default:
-                hw_error("ssys_read: Unhandled class for PLLCFG read.\n");
-                return 0;
+                g_assert_not_reached();
             }
         }
     case 0x070: /* RCC2 */
@@ -512,7 +520,8 @@ static uint64_t ssys_read(void *opaque, hwaddr offset,
     case 0x1e4: /* USER1 */
         return s->user1;
     default:
-        hw_error("ssys_read: Bad offset 0x%x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "SSYS: read at bad offset 0x%x\n", (int)offset);
         return 0;
     }
 }
@@ -614,7 +623,8 @@ static void ssys_write(void *opaque, hwaddr offset,
         s->ldoarst = value;
         break;
     default:
-        hw_error("ssys_write: Bad offset 0x%x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "SSYS: write at bad offset 0x%x\n", (int)offset);
     }
     ssys_update(s);
 }
@@ -748,7 +758,8 @@ static uint64_t stellaris_i2c_read(void *opaque, hwaddr offset,
     case 0x20: /* MCR */
         return s->mcr;
     default:
-        hw_error("strllaris_i2c_read: Bad offset 0x%x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "stellaris_i2c: read at bad offset 0x%x\n", (int)offset);
         return 0;
     }
 }
@@ -823,17 +834,18 @@ static void stellaris_i2c_write(void *opaque, hwaddr offset,
         s->mris &= ~value;
         break;
     case 0x20: /* MCR */
-        if (value & 1)
-            hw_error(
-                      "stellaris_i2c_write: Loopback not implemented\n");
-        if (value & 0x20)
-            hw_error(
-                      "stellaris_i2c_write: Slave mode not implemented\n");
+        if (value & 1) {
+            qemu_log_mask(LOG_UNIMP, "stellaris_i2c: Loopback not implemented");
+        }
+        if (value & 0x20) {
+            qemu_log_mask(LOG_UNIMP,
+                          "stellaris_i2c: Slave mode not implemented");
+        }
         s->mcr = value & 0x31;
         break;
     default:
-        hw_error("stellaris_i2c_write: Bad offset 0x%x\n",
-                  (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "stellaris_i2c: write at bad offset 0x%x\n", (int)offset);
     }
     stellaris_i2c_update(s);
 }
@@ -1057,8 +1069,8 @@ static uint64_t stellaris_adc_read(void *opaque, hwaddr offset,
     case 0x30: /* SAC */
         return s->sac;
     default:
-        hw_error("strllaris_adc_read: Bad offset 0x%x\n",
-                  (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "stellaris_adc: read at bad offset 0x%x\n", (int)offset);
         return 0;
     }
 }
@@ -1078,8 +1090,9 @@ static void stellaris_adc_write(void *opaque, hwaddr offset,
             return;
         case 0x04: /* SSCTL */
             if (value != 6) {
-                hw_error("ADC: Unimplemented sequence %" PRIx64 "\n",
-                          value);
+                qemu_log_mask(LOG_UNIMP,
+                              "ADC: Unimplemented sequence %" PRIx64 "\n",
+                              value);
             }
             s->ssctl[n] = value;
             return;
@@ -1110,13 +1123,14 @@ static void stellaris_adc_write(void *opaque, hwaddr offset,
         s->sspri = value;
         break;
     case 0x28: /* PSSI */
-        hw_error("Not implemented:  ADC sample initiate\n");
+        qemu_log_mask(LOG_UNIMP, "ADC: sample initiate unimplemented");
         break;
     case 0x30: /* SAC */
         s->sac = value;
         break;
     default:
-        hw_error("stellaris_adc_write: Bad offset 0x%x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "stellaris_adc: write at bad offset 0x%x\n", (int)offset);
     }
     stellaris_adc_update(s);
 }
diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c
index bc4e66b862..64f52f80a5 100644
--- a/hw/arm/xlnx-zynqmp.c
+++ b/hw/arm/xlnx-zynqmp.c
@@ -30,6 +30,8 @@
 #define ARM_PHYS_TIMER_PPI  30
 #define ARM_VIRT_TIMER_PPI  27
 
+#define GEM_REVISION        0x40070106
+
 #define GIC_BASE_ADDR       0xf9000000
 #define GIC_DIST_ADDR       0xf9010000
 #define GIC_CPU_ADDR        0xf9020000
@@ -334,8 +336,10 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp)
             qemu_check_nic_model(nd, TYPE_CADENCE_GEM);
             qdev_set_nic_properties(DEVICE(&s->gem[i]), nd);
         }
+        object_property_set_int(OBJECT(&s->gem[i]), GEM_REVISION, "revision",
+                                &error_abort);
         object_property_set_int(OBJECT(&s->gem[i]), 2, "num-priority-queues",
-                                  &error_abort);
+                                &error_abort);
         object_property_set_bool(OBJECT(&s->gem[i]), true, "realized", &err);
         if (err) {
             error_propagate(errp, err);
@@ -439,12 +443,6 @@ static void xlnx_zynqmp_class_init(ObjectClass *oc, void *data)
 
     dc->props = xlnx_zynqmp_props;
     dc->realize = xlnx_zynqmp_realize;
-
-    /*
-     * Reason: creates an ARM CPU, thus use after free(), see
-     * arm_cpu_class_init()
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static const TypeInfo xlnx_zynqmp_type_info = {
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index a328693d15..2e629b398b 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -2521,8 +2521,8 @@ static void fdctrl_result_timer(void *opaque)
 }
 
 /* Init functions */
-static void fdctrl_connect_drives(FDCtrl *fdctrl, Error **errp,
-                                  DeviceState *fdc_dev)
+static void fdctrl_connect_drives(FDCtrl *fdctrl, DeviceState *fdc_dev,
+                                  Error **errp)
 {
     unsigned int i;
     FDrive *drive;
@@ -2675,7 +2675,7 @@ static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl,
     }
 
     floppy_bus_create(fdctrl, &fdctrl->bus, dev);
-    fdctrl_connect_drives(fdctrl, errp, dev);
+    fdctrl_connect_drives(fdctrl, dev, errp);
 }
 
 static const MemoryRegionPortio fdc_portio_list[] = {
diff --git a/hw/char/exynos4210_uart.c b/hw/char/exynos4210_uart.c
index b75f28d473..bff706ab3a 100644
--- a/hw/char/exynos4210_uart.c
+++ b/hw/char/exynos4210_uart.c
@@ -102,7 +102,7 @@ typedef struct Exynos4210UartReg {
     uint32_t            reset_value;
 } Exynos4210UartReg;
 
-static Exynos4210UartReg exynos4210_uart_regs[] = {
+static const Exynos4210UartReg exynos4210_uart_regs[] = {
     {"ULCON",    ULCON,    0x00000000},
     {"UCON",     UCON,     0x00003000},
     {"UFCON",    UFCON,    0x00000000},
@@ -220,7 +220,7 @@ static uint8_t fifo_retrieve(Exynos4210UartFIFO *q)
     return  ret;
 }
 
-static int fifo_elements_number(Exynos4210UartFIFO *q)
+static int fifo_elements_number(const Exynos4210UartFIFO *q)
 {
     if (q->sp < q->rp) {
         return q->size - q->rp + q->sp;
@@ -229,7 +229,7 @@ static int fifo_elements_number(Exynos4210UartFIFO *q)
     return q->sp - q->rp;
 }
 
-static int fifo_empty_elements_number(Exynos4210UartFIFO *q)
+static int fifo_empty_elements_number(const Exynos4210UartFIFO *q)
 {
     return q->size - fifo_elements_number(q);
 }
@@ -245,7 +245,7 @@ static void fifo_reset(Exynos4210UartFIFO *q)
     q->rp = 0;
 }
 
-static uint32_t exynos4210_uart_Tx_FIFO_trigger_level(Exynos4210UartState *s)
+static uint32_t exynos4210_uart_Tx_FIFO_trigger_level(const Exynos4210UartState *s)
 {
     uint32_t level = 0;
     uint32_t reg;
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 0d92672203..ada9eea483 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -396,6 +396,11 @@ static void machine_class_init(ObjectClass *oc, void *data)
     mc->default_ram_size = 128 * M_BYTE;
     mc->rom_file_has_mr = true;
 
+    /* numa node memory size aligned on 8MB by default.
+     * On Linux, each node's border has to be 8MB aligned
+     */
+    mc->numa_mem_align_shift = 23;
+
     object_class_property_add_str(oc, "accel",
         machine_get_accel, machine_set_accel, &error_abort);
     object_class_property_set_description(oc, "accel",
diff --git a/hw/core/null-machine.c b/hw/core/null-machine.c
index 27c8369b57..864832db34 100644
--- a/hw/core/null-machine.c
+++ b/hw/core/null-machine.c
@@ -40,6 +40,12 @@ static void machine_none_init(MachineState *mch)
         memory_region_allocate_system_memory(ram, NULL, "ram", mch->ram_size);
         memory_region_add_subregion(get_system_memory(), 0, ram);
     }
+
+    if (mch->kernel_filename) {
+        error_report("The -kernel parameter is not supported "
+                     "(use the generic 'loader' device instead).");
+        exit(1);
+    }
 }
 
 static void machine_none_machine_init(MachineClass *mc)
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index c34be1c1ba..79c2014135 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -124,8 +124,12 @@ static void release_drive(Object *obj, const char *name, void *opaque)
     BlockBackend **ptr = qdev_get_prop_ptr(dev, prop);
 
     if (*ptr) {
+        AioContext *ctx = blk_get_aio_context(*ptr);
+
+        aio_context_acquire(ctx);
         blockdev_auto_del(*ptr);
         blk_detach_dev(*ptr, dev);
+        aio_context_release(ctx);
     }
 }
 
@@ -405,7 +409,7 @@ void qdev_prop_set_drive(DeviceState *dev, const char *name,
     if (value) {
         ref = blk_name(value);
         if (!*ref) {
-            BlockDriverState *bs = blk_bs(value);
+            const BlockDriverState *bs = blk_bs(value);
             if (bs) {
                 ref = bdrv_get_node_name(bs);
             }
diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c
index 6ab4265eb4..fa3617db2d 100644
--- a/hw/core/qdev-properties.c
+++ b/hw/core/qdev-properties.c
@@ -1010,7 +1010,8 @@ void qdev_prop_set_string(DeviceState *dev, const char *name, const char *value)
     object_property_set_str(OBJECT(dev), value, name, &error_abort);
 }
 
-void qdev_prop_set_macaddr(DeviceState *dev, const char *name, uint8_t *value)
+void qdev_prop_set_macaddr(DeviceState *dev, const char *name,
+                           const uint8_t *value)
 {
     char str[2 * 6 + 5 + 1];
     snprintf(str, sizeof(str), "%02x:%02x:%02x:%02x:%02x:%02x",
diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index 1e7fb33246..02b632f6b3 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -39,9 +39,9 @@
 #include "qapi-event.h"
 #include "migration/migration.h"
 
-int qdev_hotplug = 0;
+bool qdev_hotplug = false;
 static bool qdev_hot_added = false;
-static bool qdev_hot_removed = false;
+bool qdev_hot_removed = false;
 
 const VMStateDescription *qdev_get_vmsd(DeviceState *dev)
 {
@@ -271,40 +271,6 @@ HotplugHandler *qdev_get_hotplug_handler(DeviceState *dev)
     return hotplug_ctrl;
 }
 
-void qdev_unplug(DeviceState *dev, Error **errp)
-{
-    DeviceClass *dc = DEVICE_GET_CLASS(dev);
-    HotplugHandler *hotplug_ctrl;
-    HotplugHandlerClass *hdc;
-
-    if (dev->parent_bus && !qbus_is_hotpluggable(dev->parent_bus)) {
-        error_setg(errp, QERR_BUS_NO_HOTPLUG, dev->parent_bus->name);
-        return;
-    }
-
-    if (!dc->hotpluggable) {
-        error_setg(errp, QERR_DEVICE_NO_HOTPLUG,
-                   object_get_typename(OBJECT(dev)));
-        return;
-    }
-
-    qdev_hot_removed = true;
-
-    hotplug_ctrl = qdev_get_hotplug_handler(dev);
-    /* hotpluggable device MUST have HotplugHandler, if it doesn't
-     * then something is very wrong with it */
-    g_assert(hotplug_ctrl);
-
-    /* If device supports async unplug just request it to be done,
-     * otherwise just remove it synchronously */
-    hdc = HOTPLUG_HANDLER_GET_CLASS(hotplug_ctrl);
-    if (hdc->unplug_request) {
-        hotplug_handler_unplug_request(hotplug_ctrl, dev, errp);
-    } else {
-        hotplug_handler_unplug(hotplug_ctrl, dev, errp);
-    }
-}
-
 static int qdev_reset_one(DeviceState *dev, void *opaque)
 {
     device_reset(dev);
@@ -385,7 +351,7 @@ void qdev_machine_creation_done(void)
      * ok, initial machine setup is done, starting from now we can
      * only create hotpluggable devices
      */
-    qdev_hotplug = 1;
+    qdev_hotplug = true;
 }
 
 bool qdev_machine_modified(void)
@@ -1037,13 +1003,6 @@ static bool device_get_hotplugged(Object *obj, Error **err)
     return dev->hotplugged;
 }
 
-static void device_set_hotplugged(Object *obj, bool value, Error **err)
-{
-    DeviceState *dev = DEVICE(obj);
-
-    dev->hotplugged = value;
-}
-
 static void device_initfn(Object *obj)
 {
     DeviceState *dev = DEVICE(obj);
@@ -1063,7 +1022,7 @@ static void device_initfn(Object *obj)
     object_property_add_bool(obj, "hotpluggable",
                              device_get_hotpluggable, NULL, NULL);
     object_property_add_bool(obj, "hotplugged",
-                             device_get_hotplugged, device_set_hotplugged,
+                             device_get_hotplugged, NULL,
                              &error_abort);
 
     class = object_get_class(OBJECT(dev));
diff --git a/hw/display/cg3.c b/hw/display/cg3.c
index 1174220394..03d9197f71 100644
--- a/hw/display/cg3.c
+++ b/hw/display/cg3.c
@@ -26,7 +26,6 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
-#include "cpu.h"
 #include "qemu/error-report.h"
 #include "ui/console.h"
 #include "hw/sysbus.h"
@@ -114,8 +113,8 @@ static void cg3_update_display(void *opaque)
     for (y = 0; y < height; y++) {
         int update = s->full_update;
 
-        page = (y * width) & TARGET_PAGE_MASK;
-        update |= memory_region_get_dirty(&s->vram_mem, page, page + width,
+        page = y * width;
+        update |= memory_region_get_dirty(&s->vram_mem, page, width,
                                           DIRTY_MEMORY_VGA);
         if (update) {
             if (y_start < 0) {
@@ -148,8 +147,7 @@ static void cg3_update_display(void *opaque)
     }
     if (page_max >= page_min) {
         memory_region_reset_dirty(&s->vram_mem,
-                              page_min, page_max - page_min + TARGET_PAGE_SIZE,
-                              DIRTY_MEMORY_VGA);
+                              page_min, page_max - page_min, DIRTY_MEMORY_VGA);
     }
     /* vsync interrupt? */
     if (s->regs[0] & CG3_CR_ENABLE_INTS) {
@@ -305,8 +303,7 @@ static void cg3_realizefn(DeviceState *dev, Error **errp)
     vmstate_register_ram_global(&s->rom);
     fcode_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, CG3_ROM_FILE);
     if (fcode_filename) {
-        ret = load_image_targphys(fcode_filename, s->prom_addr,
-                                  FCODE_MAX_ROM_SIZE);
+        ret = load_image_mr(fcode_filename, &s->rom);
         g_free(fcode_filename);
         if (ret < 0 || ret > FCODE_MAX_ROM_SIZE) {
             error_report("cg3: could not load prom '%s'", CG3_ROM_FILE);
@@ -371,7 +368,6 @@ static Property cg3_properties[] = {
     DEFINE_PROP_UINT16("width",        CG3State, width,     -1),
     DEFINE_PROP_UINT16("height",       CG3State, height,    -1),
     DEFINE_PROP_UINT16("depth",        CG3State, depth,     -1),
-    DEFINE_PROP_UINT64("prom-addr",    CG3State, prom_addr, -1),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/display/cirrus_vga_rop2.h b/hw/display/cirrus_vga_rop2.h
index b86bcd6e09..b208b7348a 100644
--- a/hw/display/cirrus_vga_rop2.h
+++ b/hw/display/cirrus_vga_rop2.h
@@ -29,8 +29,8 @@
 #elif DEPTH == 24
 #define PUTPIXEL(s, a, c)    do {          \
         ROP_OP(s, a,     c);               \
-        ROP_OP(s, a + 1, (col >> 8));      \
-        ROP_OP(s, a + 2, (col >> 16));     \
+        ROP_OP(s, a + 1, (c >> 8));        \
+        ROP_OP(s, a + 2, (c >> 16));       \
     } while (0)
 #elif DEPTH == 32
 #define PUTPIXEL(s, a, c)    ROP_OP_32(s, a, c)
diff --git a/hw/display/exynos4210_fimd.c b/hw/display/exynos4210_fimd.c
index e5be713406..fd0b2bec65 100644
--- a/hw/display/exynos4210_fimd.c
+++ b/hw/display/exynos4210_fimd.c
@@ -1263,6 +1263,7 @@ static void exynos4210_fimd_update(void *opaque)
     Exynos4210fimdState *s = (Exynos4210fimdState *)opaque;
     DisplaySurface *surface;
     Exynos4210fimdWindow *w;
+    DirtyBitmapSnapshot *snap;
     int i, line;
     hwaddr fb_line_addr, inc_size;
     int scrn_height;
@@ -1291,10 +1292,12 @@ static void exynos4210_fimd_update(void *opaque)
             memory_region_sync_dirty_bitmap(w->mem_section.mr);
             host_fb_addr = w->host_fb_addr;
             fb_line_addr = w->mem_section.offset_within_region;
+            snap = memory_region_snapshot_and_clear_dirty(w->mem_section.mr,
+                    fb_line_addr, inc_size * scrn_height, DIRTY_MEMORY_VGA);
 
             for (line = 0; line < scrn_height; line++) {
-                is_dirty = memory_region_get_dirty(w->mem_section.mr,
-                            fb_line_addr, scrn_width, DIRTY_MEMORY_VGA);
+                is_dirty = memory_region_snapshot_get_dirty(w->mem_section.mr,
+                            snap, fb_line_addr, scrn_width);
 
                 if (s->invalidate || is_dirty) {
                     if (first_line == -1) {
@@ -1309,9 +1312,7 @@ static void exynos4210_fimd_update(void *opaque)
                 fb_line_addr += inc_size;
                 is_dirty = false;
             }
-            memory_region_reset_dirty(w->mem_section.mr,
-                w->mem_section.offset_within_region,
-                w->fb_len, DIRTY_MEMORY_VGA);
+            g_free(snap);
             blend = true;
         }
     }
diff --git a/hw/display/framebuffer.c b/hw/display/framebuffer.c
index 25aa46c8c7..d7310d25f2 100644
--- a/hw/display/framebuffer.c
+++ b/hw/display/framebuffer.c
@@ -67,7 +67,7 @@ void framebuffer_update_display(
     int *first_row, /* Input and output.  */
     int *last_row /* Output only */)
 {
-    hwaddr src_len;
+    DirtyBitmapSnapshot *snap;
     uint8_t *dest;
     uint8_t *src;
     int first, last = 0;
@@ -78,7 +78,6 @@ void framebuffer_update_display(
 
     i = *first_row;
     *first_row = -1;
-    src_len = (hwaddr)src_width * rows;
 
     mem = mem_section->mr;
     if (!mem) {
@@ -102,9 +101,10 @@ void framebuffer_update_display(
     src += i * src_width;
     dest += i * dest_row_pitch;
 
+    snap = memory_region_snapshot_and_clear_dirty(mem, addr, src_width * rows,
+                                                  DIRTY_MEMORY_VGA);
     for (; i < rows; i++) {
-        dirty = memory_region_get_dirty(mem, addr, src_width,
-                                             DIRTY_MEMORY_VGA);
+        dirty = memory_region_snapshot_get_dirty(mem, snap, addr, src_width);
         if (dirty || invalidate) {
             fn(opaque, dest, src, cols, dest_col_pitch);
             if (first == -1)
@@ -115,11 +115,10 @@ void framebuffer_update_display(
         src += src_width;
         dest += dest_row_pitch;
     }
+    g_free(snap);
     if (first < 0) {
         return;
     }
-    memory_region_reset_dirty(mem, mem_section->offset_within_region, src_len,
-                              DIRTY_MEMORY_VGA);
     *first_row = first;
     *last_row = last;
 }
diff --git a/hw/display/g364fb.c b/hw/display/g364fb.c
index 8cdc205dd9..86557d14a9 100644
--- a/hw/display/g364fb.c
+++ b/hw/display/g364fb.c
@@ -64,17 +64,8 @@ typedef struct G364State {
 
 static inline int check_dirty(G364State *s, ram_addr_t page)
 {
-    return memory_region_get_dirty(&s->mem_vram, page, G364_PAGE_SIZE,
-                                   DIRTY_MEMORY_VGA);
-}
-
-static inline void reset_dirty(G364State *s,
-                               ram_addr_t page_min, ram_addr_t page_max)
-{
-    memory_region_reset_dirty(&s->mem_vram,
-                              page_min,
-                              page_max + G364_PAGE_SIZE - page_min - 1,
-                              DIRTY_MEMORY_VGA);
+    return memory_region_test_and_clear_dirty(&s->mem_vram, page, G364_PAGE_SIZE,
+                                              DIRTY_MEMORY_VGA);
 }
 
 static void g364fb_draw_graphic8(G364State *s)
@@ -83,7 +74,7 @@ static void g364fb_draw_graphic8(G364State *s)
     int i, w;
     uint8_t *vram;
     uint8_t *data_display, *dd;
-    ram_addr_t page, page_min, page_max;
+    ram_addr_t page;
     int x, y;
     int xmin, xmax;
     int ymin, ymax;
@@ -114,8 +105,6 @@ static void g364fb_draw_graphic8(G364State *s)
     }
 
     page = 0;
-    page_min = (ram_addr_t)-1;
-    page_max = 0;
 
     x = y = 0;
     xmin = s->width;
@@ -137,9 +126,6 @@ static void g364fb_draw_graphic8(G364State *s)
         if (check_dirty(s, page)) {
             if (y < ymin)
                 ymin = ymax = y;
-            if (page_min == (ram_addr_t)-1)
-                page_min = page;
-            page_max = page;
             if (x < xmin)
                 xmin = x;
             for (i = 0; i < G364_PAGE_SIZE; i++) {
@@ -196,10 +182,7 @@ static void g364fb_draw_graphic8(G364State *s)
                 ymax = y;
         } else {
             int dy;
-            if (page_min != (ram_addr_t)-1) {
-                reset_dirty(s, page_min, page_max);
-                page_min = (ram_addr_t)-1;
-                page_max = 0;
+            if (xmax || ymax) {
                 dpy_gfx_update(s->con, xmin, ymin,
                                xmax - xmin + 1, ymax - ymin + 1);
                 xmin = s->width;
@@ -219,9 +202,8 @@ static void g364fb_draw_graphic8(G364State *s)
     }
 
 done:
-    if (page_min != (ram_addr_t)-1) {
+    if (xmax || ymax) {
         dpy_gfx_update(s->con, xmin, ymin, xmax - xmin + 1, ymax - ymin + 1);
-        reset_dirty(s, page_min, page_max);
     }
 }
 
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
index 0d02f0efe6..4d94cecd72 100644
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -26,6 +26,7 @@
 #include "qemu/queue.h"
 #include "qemu/atomic.h"
 #include "sysemu/sysemu.h"
+#include "migration/migration.h"
 #include "trace.h"
 
 #include "qxl.h"
@@ -304,6 +305,16 @@ void qxl_spice_reset_cursor(PCIQXLDevice *qxl)
     qxl->ssd.cursor = cursor_builtin_hidden();
 }
 
+static uint32_t qxl_crc32(const uint8_t *p, unsigned len)
+{
+    /*
+     * zlib xors the seed with 0xffffffff, and xors the result
+     * again with 0xffffffff; Both are not done with linux's crc32,
+     * which we want to be compatible with, so undo that.
+     */
+    return crc32(0xffffffff, p, len) ^ 0xffffffff;
+}
+
 static ram_addr_t qxl_rom_size(void)
 {
 #define QXL_REQUIRED_SZ (sizeof(QXLRom) + sizeof(QXLModes) + sizeof(qxl_modes))
@@ -368,6 +379,18 @@ static void init_qxl_rom(PCIQXLDevice *d)
     rom->num_pages          = cpu_to_le32(num_pages);
     rom->ram_header_offset  = cpu_to_le32(d->vga.vram_size - ram_header_size);
 
+    if (d->xres && d->yres) {
+        /* needs linux kernel 4.12+ to work */
+        rom->client_monitors_config.count = 1;
+        rom->client_monitors_config.heads[0].left = 0;
+        rom->client_monitors_config.heads[0].top = 0;
+        rom->client_monitors_config.heads[0].right = cpu_to_le32(d->xres);
+        rom->client_monitors_config.heads[0].bottom = cpu_to_le32(d->yres);
+        rom->client_monitors_config_crc = qxl_crc32(
+            (const uint8_t *)&rom->client_monitors_config,
+            sizeof(rom->client_monitors_config));
+    }
+
     d->shadow_rom = *rom;
     d->rom        = rom;
     d->modes      = modes;
@@ -639,6 +662,30 @@ static int interface_get_command(QXLInstance *sin, struct QXLCommandExt *ext)
         qxl->guest_primary.commands++;
         qxl_track_command(qxl, ext);
         qxl_log_command(qxl, "cmd", ext);
+        {
+            /*
+             * Windows 8 drivers place qxl commands in the vram
+             * (instead of the ram) bar.  We can't live migrate such a
+             * guest, so add a migration blocker in case we detect
+             * this, to avoid triggering the assert in pre_save().
+             *
+             * https://cgit.freedesktop.org/spice/win32/qxl-wddm-dod/commit/?id=f6e099db39e7d0787f294d5fd0dce328b5210faa
+             */
+            void *msg = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id);
+            if (msg != NULL && (
+                    msg < (void *)qxl->vga.vram_ptr ||
+                    msg > ((void *)qxl->vga.vram_ptr + qxl->vga.vram_size))) {
+                if (!qxl->migration_blocker) {
+                    Error *local_err = NULL;
+                    error_setg(&qxl->migration_blocker,
+                               "qxl: guest bug: command not in ram bar");
+                    migrate_add_blocker(qxl->migration_blocker, &local_err);
+                    if (local_err) {
+                        error_report_err(local_err);
+                    }
+                }
+            }
+        }
         trace_qxl_ring_command_get(qxl->id, qxl_mode_to_string(qxl->mode));
         return true;
     default:
@@ -986,16 +1033,6 @@ static void interface_set_client_capabilities(QXLInstance *sin,
     qxl_send_events(qxl, QXL_INTERRUPT_CLIENT);
 }
 
-static uint32_t qxl_crc32(const uint8_t *p, unsigned len)
-{
-    /*
-     * zlib xors the seed with 0xffffffff, and xors the result
-     * again with 0xffffffff; Both are not done with linux's crc32,
-     * which we want to be compatible with, so undo that.
-     */
-    return crc32(0xffffffff, p, len) ^ 0xffffffff;
-}
-
 static bool qxl_rom_monitors_config_changed(QXLRom *rom,
         VDAgentMonitorsConfig *monitors_config,
         unsigned int max_outputs)
@@ -1146,6 +1183,7 @@ static void qxl_enter_vga_mode(PCIQXLDevice *d)
     update_displaychangelistener(&d->ssd.dcl, GUI_REFRESH_INTERVAL_DEFAULT);
     qemu_spice_create_host_primary(&d->ssd);
     d->mode = QXL_MODE_VGA;
+    qemu_spice_display_switch(&d->ssd, d->ssd.ds);
     vga_dirty_log_start(&d->vga);
     graphic_hw_update(d->vga.con);
 }
@@ -1235,6 +1273,12 @@ static void qxl_hard_reset(PCIQXLDevice *d, int loadvm)
     qemu_spice_create_host_memslot(&d->ssd);
     qxl_soft_reset(d);
 
+    if (d->migration_blocker) {
+        migrate_del_blocker(d->migration_blocker);
+        error_free(d->migration_blocker);
+        d->migration_blocker = NULL;
+    }
+
     if (startstop) {
         qemu_spice_display_start();
     }
@@ -2365,6 +2409,8 @@ static Property qxl_properties[] = {
 #if SPICE_SERVER_VERSION >= 0x000c06 /* release 0.12.6 */
         DEFINE_PROP_UINT16("max_outputs", PCIQXLDevice, max_outputs, 0),
 #endif
+        DEFINE_PROP_UINT32("xres", PCIQXLDevice, xres, 0),
+        DEFINE_PROP_UINT32("yres", PCIQXLDevice, yres, 0),
         DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/display/qxl.h b/hw/display/qxl.h
index d2d49dd933..f6556adb73 100644
--- a/hw/display/qxl.h
+++ b/hw/display/qxl.h
@@ -40,6 +40,7 @@ typedef struct PCIQXLDevice {
     uint32_t           cmdlog;
 
     uint32_t           guest_bug;
+    Error              *migration_blocker;
 
     enum qxl_mode      mode;
     uint32_t           cmdflags;
@@ -118,6 +119,8 @@ typedef struct PCIQXLDevice {
     uint32_t          vram_size_mb;
     uint32_t          vram32_size_mb;
     uint32_t          vgamem_size_mb;
+    uint32_t          xres;
+    uint32_t          yres;
 
     /* qxl_render_update state */
     int                render_update_cookie_num;
diff --git a/hw/display/sm501.c b/hw/display/sm501.c
index 040a0b93f2..2094adbc9c 100644
--- a/hw/display/sm501.c
+++ b/hw/display/sm501.c
@@ -2,6 +2,7 @@
  * QEMU SM501 Device
  *
  * Copyright (c) 2008 Shin-ichiro KAWASAKI
+ * Copyright (c) 2016 BALATON Zoltan
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -23,6 +24,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/cutils.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "cpu.h"
@@ -31,6 +33,7 @@
 #include "ui/console.h"
 #include "hw/devices.h"
 #include "hw/sysbus.h"
+#include "hw/pci/pci.h"
 #include "qemu/range.h"
 #include "ui/pixel_ops.h"
 #include "exec/address-spaces.h"
@@ -38,10 +41,13 @@
 /*
  * Status: 2010/05/07
  *   - Minimum implementation for Linux console : mmio regs and CRT layer.
- *   - 2D grapihcs acceleration partially supported : only fill rectangle.
+ *   - 2D graphics acceleration partially supported : only fill rectangle.
  *
- * TODO:
+ * Status: 2016/12/04
+ *   - Misc fixes: endianness, hardware cursor
  *   - Panel support
+ *
+ * TODO:
  *   - Touch panel support
  *   - USB support
  *   - UART support
@@ -49,395 +55,396 @@
  *   - Performance tuning
  */
 
-//#define DEBUG_SM501
-//#define DEBUG_BITBLT
+/*#define DEBUG_SM501*/
+/*#define DEBUG_BITBLT*/
 
 #ifdef DEBUG_SM501
 #define SM501_DPRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__)
 #else
-#define SM501_DPRINTF(fmt, ...) do {} while(0)
+#define SM501_DPRINTF(fmt, ...) do {} while (0)
 #endif
 
-
 #define MMIO_BASE_OFFSET 0x3e00000
+#define MMIO_SIZE 0x200000
+#define DC_PALETTE_ENTRIES (0x400 * 3)
 
 /* SM501 register definitions taken from "linux/include/linux/sm501-regs.h" */
 
 /* System Configuration area */
 /* System config base */
-#define SM501_SYS_CONFIG		(0x000000)
+#define SM501_SYS_CONFIG                (0x000000)
 
 /* config 1 */
-#define SM501_SYSTEM_CONTROL 		(0x000000)
+#define SM501_SYSTEM_CONTROL            (0x000000)
 
-#define SM501_SYSCTRL_PANEL_TRISTATE	(1<<0)
-#define SM501_SYSCTRL_MEM_TRISTATE	(1<<1)
-#define SM501_SYSCTRL_CRT_TRISTATE	(1<<2)
+#define SM501_SYSCTRL_PANEL_TRISTATE    (1 << 0)
+#define SM501_SYSCTRL_MEM_TRISTATE      (1 << 1)
+#define SM501_SYSCTRL_CRT_TRISTATE      (1 << 2)
 
-#define SM501_SYSCTRL_PCI_SLAVE_BURST_MASK (3<<4)
-#define SM501_SYSCTRL_PCI_SLAVE_BURST_1	(0<<4)
-#define SM501_SYSCTRL_PCI_SLAVE_BURST_2	(1<<4)
-#define SM501_SYSCTRL_PCI_SLAVE_BURST_4	(2<<4)
-#define SM501_SYSCTRL_PCI_SLAVE_BURST_8	(3<<4)
+#define SM501_SYSCTRL_PCI_SLAVE_BURST_MASK (3 << 4)
+#define SM501_SYSCTRL_PCI_SLAVE_BURST_1 (0 << 4)
+#define SM501_SYSCTRL_PCI_SLAVE_BURST_2 (1 << 4)
+#define SM501_SYSCTRL_PCI_SLAVE_BURST_4 (2 << 4)
+#define SM501_SYSCTRL_PCI_SLAVE_BURST_8 (3 << 4)
 
-#define SM501_SYSCTRL_PCI_CLOCK_RUN_EN	(1<<6)
-#define SM501_SYSCTRL_PCI_RETRY_DISABLE	(1<<7)
-#define SM501_SYSCTRL_PCI_SUBSYS_LOCK	(1<<11)
-#define SM501_SYSCTRL_PCI_BURST_READ_EN	(1<<15)
+#define SM501_SYSCTRL_PCI_CLOCK_RUN_EN  (1 << 6)
+#define SM501_SYSCTRL_PCI_RETRY_DISABLE (1 << 7)
+#define SM501_SYSCTRL_PCI_SUBSYS_LOCK   (1 << 11)
+#define SM501_SYSCTRL_PCI_BURST_READ_EN (1 << 15)
 
 /* miscellaneous control */
 
-#define SM501_MISC_CONTROL		(0x000004)
+#define SM501_MISC_CONTROL              (0x000004)
 
-#define SM501_MISC_BUS_SH		(0x0)
-#define SM501_MISC_BUS_PCI		(0x1)
-#define SM501_MISC_BUS_XSCALE		(0x2)
-#define SM501_MISC_BUS_NEC		(0x6)
-#define SM501_MISC_BUS_MASK		(0x7)
+#define SM501_MISC_BUS_SH               (0x0)
+#define SM501_MISC_BUS_PCI              (0x1)
+#define SM501_MISC_BUS_XSCALE           (0x2)
+#define SM501_MISC_BUS_NEC              (0x6)
+#define SM501_MISC_BUS_MASK             (0x7)
 
-#define SM501_MISC_VR_62MB		(1<<3)
-#define SM501_MISC_CDR_RESET		(1<<7)
-#define SM501_MISC_USB_LB		(1<<8)
-#define SM501_MISC_USB_SLAVE		(1<<9)
-#define SM501_MISC_BL_1			(1<<10)
-#define SM501_MISC_MC			(1<<11)
-#define SM501_MISC_DAC_POWER		(1<<12)
-#define SM501_MISC_IRQ_INVERT		(1<<16)
-#define SM501_MISC_SH			(1<<17)
+#define SM501_MISC_VR_62MB              (1 << 3)
+#define SM501_MISC_CDR_RESET            (1 << 7)
+#define SM501_MISC_USB_LB               (1 << 8)
+#define SM501_MISC_USB_SLAVE            (1 << 9)
+#define SM501_MISC_BL_1                 (1 << 10)
+#define SM501_MISC_MC                   (1 << 11)
+#define SM501_MISC_DAC_POWER            (1 << 12)
+#define SM501_MISC_IRQ_INVERT           (1 << 16)
+#define SM501_MISC_SH                   (1 << 17)
 
-#define SM501_MISC_HOLD_EMPTY		(0<<18)
-#define SM501_MISC_HOLD_8		(1<<18)
-#define SM501_MISC_HOLD_16		(2<<18)
-#define SM501_MISC_HOLD_24		(3<<18)
-#define SM501_MISC_HOLD_32		(4<<18)
-#define SM501_MISC_HOLD_MASK		(7<<18)
+#define SM501_MISC_HOLD_EMPTY           (0 << 18)
+#define SM501_MISC_HOLD_8               (1 << 18)
+#define SM501_MISC_HOLD_16              (2 << 18)
+#define SM501_MISC_HOLD_24              (3 << 18)
+#define SM501_MISC_HOLD_32              (4 << 18)
+#define SM501_MISC_HOLD_MASK            (7 << 18)
 
-#define SM501_MISC_FREQ_12		(1<<24)
-#define SM501_MISC_PNL_24BIT		(1<<25)
-#define SM501_MISC_8051_LE		(1<<26)
+#define SM501_MISC_FREQ_12              (1 << 24)
+#define SM501_MISC_PNL_24BIT            (1 << 25)
+#define SM501_MISC_8051_LE              (1 << 26)
 
 
 
-#define SM501_GPIO31_0_CONTROL		(0x000008)
-#define SM501_GPIO63_32_CONTROL		(0x00000C)
-#define SM501_DRAM_CONTROL		(0x000010)
+#define SM501_GPIO31_0_CONTROL          (0x000008)
+#define SM501_GPIO63_32_CONTROL         (0x00000C)
+#define SM501_DRAM_CONTROL              (0x000010)
 
 /* command list */
-#define SM501_ARBTRTN_CONTROL		(0x000014)
+#define SM501_ARBTRTN_CONTROL           (0x000014)
 
 /* command list */
-#define SM501_COMMAND_LIST_STATUS	(0x000024)
+#define SM501_COMMAND_LIST_STATUS       (0x000024)
 
 /* interrupt debug */
-#define SM501_RAW_IRQ_STATUS		(0x000028)
-#define SM501_RAW_IRQ_CLEAR		(0x000028)
-#define SM501_IRQ_STATUS		(0x00002C)
-#define SM501_IRQ_MASK			(0x000030)
-#define SM501_DEBUG_CONTROL		(0x000034)
+#define SM501_RAW_IRQ_STATUS            (0x000028)
+#define SM501_RAW_IRQ_CLEAR             (0x000028)
+#define SM501_IRQ_STATUS                (0x00002C)
+#define SM501_IRQ_MASK                  (0x000030)
+#define SM501_DEBUG_CONTROL             (0x000034)
 
 /* power management */
-#define SM501_POWERMODE_P2X_SRC		(1<<29)
-#define SM501_POWERMODE_V2X_SRC		(1<<20)
-#define SM501_POWERMODE_M_SRC		(1<<12)
-#define SM501_POWERMODE_M1_SRC		(1<<4)
-
-#define SM501_CURRENT_GATE		(0x000038)
-#define SM501_CURRENT_CLOCK		(0x00003C)
-#define SM501_POWER_MODE_0_GATE		(0x000040)
-#define SM501_POWER_MODE_0_CLOCK	(0x000044)
-#define SM501_POWER_MODE_1_GATE		(0x000048)
-#define SM501_POWER_MODE_1_CLOCK	(0x00004C)
-#define SM501_SLEEP_MODE_GATE		(0x000050)
-#define SM501_POWER_MODE_CONTROL	(0x000054)
+#define SM501_POWERMODE_P2X_SRC         (1 << 29)
+#define SM501_POWERMODE_V2X_SRC         (1 << 20)
+#define SM501_POWERMODE_M_SRC           (1 << 12)
+#define SM501_POWERMODE_M1_SRC          (1 << 4)
+
+#define SM501_CURRENT_GATE              (0x000038)
+#define SM501_CURRENT_CLOCK             (0x00003C)
+#define SM501_POWER_MODE_0_GATE         (0x000040)
+#define SM501_POWER_MODE_0_CLOCK        (0x000044)
+#define SM501_POWER_MODE_1_GATE         (0x000048)
+#define SM501_POWER_MODE_1_CLOCK        (0x00004C)
+#define SM501_SLEEP_MODE_GATE           (0x000050)
+#define SM501_POWER_MODE_CONTROL        (0x000054)
 
 /* power gates for units within the 501 */
-#define SM501_GATE_HOST			(0)
-#define SM501_GATE_MEMORY		(1)
-#define SM501_GATE_DISPLAY		(2)
-#define SM501_GATE_2D_ENGINE		(3)
-#define SM501_GATE_CSC			(4)
-#define SM501_GATE_ZVPORT		(5)
-#define SM501_GATE_GPIO			(6)
-#define SM501_GATE_UART0		(7)
-#define SM501_GATE_UART1		(8)
-#define SM501_GATE_SSP			(10)
-#define SM501_GATE_USB_HOST		(11)
-#define SM501_GATE_USB_GADGET		(12)
-#define SM501_GATE_UCONTROLLER		(17)
-#define SM501_GATE_AC97			(18)
+#define SM501_GATE_HOST                 (0)
+#define SM501_GATE_MEMORY               (1)
+#define SM501_GATE_DISPLAY              (2)
+#define SM501_GATE_2D_ENGINE            (3)
+#define SM501_GATE_CSC                  (4)
+#define SM501_GATE_ZVPORT               (5)
+#define SM501_GATE_GPIO                 (6)
+#define SM501_GATE_UART0                (7)
+#define SM501_GATE_UART1                (8)
+#define SM501_GATE_SSP                  (10)
+#define SM501_GATE_USB_HOST             (11)
+#define SM501_GATE_USB_GADGET           (12)
+#define SM501_GATE_UCONTROLLER          (17)
+#define SM501_GATE_AC97                 (18)
 
 /* panel clock */
-#define SM501_CLOCK_P2XCLK		(24)
+#define SM501_CLOCK_P2XCLK              (24)
 /* crt clock */
-#define SM501_CLOCK_V2XCLK		(16)
+#define SM501_CLOCK_V2XCLK              (16)
 /* main clock */
-#define SM501_CLOCK_MCLK		(8)
+#define SM501_CLOCK_MCLK                (8)
 /* SDRAM controller clock */
-#define SM501_CLOCK_M1XCLK		(0)
+#define SM501_CLOCK_M1XCLK              (0)
 
 /* config 2 */
-#define SM501_PCI_MASTER_BASE		(0x000058)
-#define SM501_ENDIAN_CONTROL		(0x00005C)
-#define SM501_DEVICEID			(0x000060)
+#define SM501_PCI_MASTER_BASE           (0x000058)
+#define SM501_ENDIAN_CONTROL            (0x00005C)
+#define SM501_DEVICEID                  (0x000060)
 /* 0x050100A0 */
 
-#define SM501_DEVICEID_SM501		(0x05010000)
-#define SM501_DEVICEID_IDMASK		(0xffff0000)
-#define SM501_DEVICEID_REVMASK		(0x000000ff)
+#define SM501_DEVICEID_SM501            (0x05010000)
+#define SM501_DEVICEID_IDMASK           (0xffff0000)
+#define SM501_DEVICEID_REVMASK          (0x000000ff)
 
-#define SM501_PLLCLOCK_COUNT		(0x000064)
-#define SM501_MISC_TIMING		(0x000068)
-#define SM501_CURRENT_SDRAM_CLOCK	(0x00006C)
+#define SM501_PLLCLOCK_COUNT            (0x000064)
+#define SM501_MISC_TIMING               (0x000068)
+#define SM501_CURRENT_SDRAM_CLOCK       (0x00006C)
 
-#define SM501_PROGRAMMABLE_PLL_CONTROL	(0x000074)
+#define SM501_PROGRAMMABLE_PLL_CONTROL  (0x000074)
 
 /* GPIO base */
-#define SM501_GPIO			(0x010000)
-#define SM501_GPIO_DATA_LOW		(0x00)
-#define SM501_GPIO_DATA_HIGH		(0x04)
-#define SM501_GPIO_DDR_LOW		(0x08)
-#define SM501_GPIO_DDR_HIGH		(0x0C)
-#define SM501_GPIO_IRQ_SETUP		(0x10)
-#define SM501_GPIO_IRQ_STATUS		(0x14)
-#define SM501_GPIO_IRQ_RESET		(0x14)
+#define SM501_GPIO                      (0x010000)
+#define SM501_GPIO_DATA_LOW             (0x00)
+#define SM501_GPIO_DATA_HIGH            (0x04)
+#define SM501_GPIO_DDR_LOW              (0x08)
+#define SM501_GPIO_DDR_HIGH             (0x0C)
+#define SM501_GPIO_IRQ_SETUP            (0x10)
+#define SM501_GPIO_IRQ_STATUS           (0x14)
+#define SM501_GPIO_IRQ_RESET            (0x14)
 
 /* I2C controller base */
-#define SM501_I2C			(0x010040)
-#define SM501_I2C_BYTE_COUNT		(0x00)
-#define SM501_I2C_CONTROL		(0x01)
-#define SM501_I2C_STATUS		(0x02)
-#define SM501_I2C_RESET			(0x02)
-#define SM501_I2C_SLAVE_ADDRESS		(0x03)
-#define SM501_I2C_DATA			(0x04)
+#define SM501_I2C                       (0x010040)
+#define SM501_I2C_BYTE_COUNT            (0x00)
+#define SM501_I2C_CONTROL               (0x01)
+#define SM501_I2C_STATUS                (0x02)
+#define SM501_I2C_RESET                 (0x02)
+#define SM501_I2C_SLAVE_ADDRESS         (0x03)
+#define SM501_I2C_DATA                  (0x04)
 
 /* SSP base */
-#define SM501_SSP			(0x020000)
+#define SM501_SSP                       (0x020000)
 
 /* Uart 0 base */
-#define SM501_UART0			(0x030000)
+#define SM501_UART0                     (0x030000)
 
 /* Uart 1 base */
-#define SM501_UART1			(0x030020)
+#define SM501_UART1                     (0x030020)
 
 /* USB host port base */
-#define SM501_USB_HOST			(0x040000)
+#define SM501_USB_HOST                  (0x040000)
 
 /* USB slave/gadget base */
-#define SM501_USB_GADGET		(0x060000)
+#define SM501_USB_GADGET                (0x060000)
 
 /* USB slave/gadget data port base */
-#define SM501_USB_GADGET_DATA		(0x070000)
+#define SM501_USB_GADGET_DATA           (0x070000)
 
 /* Display controller/video engine base */
-#define SM501_DC			(0x080000)
+#define SM501_DC                        (0x080000)
 
 /* common defines for the SM501 address registers */
-#define SM501_ADDR_FLIP			(1<<31)
-#define SM501_ADDR_EXT			(1<<27)
-#define SM501_ADDR_CS1			(1<<26)
-#define SM501_ADDR_MASK			(0x3f << 26)
+#define SM501_ADDR_FLIP                 (1 << 31)
+#define SM501_ADDR_EXT                  (1 << 27)
+#define SM501_ADDR_CS1                  (1 << 26)
+#define SM501_ADDR_MASK                 (0x3f << 26)
 
-#define SM501_FIFO_MASK			(0x3 << 16)
-#define SM501_FIFO_1			(0x0 << 16)
-#define SM501_FIFO_3			(0x1 << 16)
-#define SM501_FIFO_7			(0x2 << 16)
-#define SM501_FIFO_11			(0x3 << 16)
+#define SM501_FIFO_MASK                 (0x3 << 16)
+#define SM501_FIFO_1                    (0x0 << 16)
+#define SM501_FIFO_3                    (0x1 << 16)
+#define SM501_FIFO_7                    (0x2 << 16)
+#define SM501_FIFO_11                   (0x3 << 16)
 
 /* common registers for panel and the crt */
-#define SM501_OFF_DC_H_TOT		(0x000)
-#define SM501_OFF_DC_V_TOT		(0x008)
-#define SM501_OFF_DC_H_SYNC		(0x004)
-#define SM501_OFF_DC_V_SYNC		(0x00C)
-
-#define SM501_DC_PANEL_CONTROL		(0x000)
-
-#define SM501_DC_PANEL_CONTROL_FPEN	(1<<27)
-#define SM501_DC_PANEL_CONTROL_BIAS	(1<<26)
-#define SM501_DC_PANEL_CONTROL_DATA	(1<<25)
-#define SM501_DC_PANEL_CONTROL_VDD	(1<<24)
-#define SM501_DC_PANEL_CONTROL_DP	(1<<23)
-
-#define SM501_DC_PANEL_CONTROL_TFT_888	(0<<21)
-#define SM501_DC_PANEL_CONTROL_TFT_333	(1<<21)
-#define SM501_DC_PANEL_CONTROL_TFT_444	(2<<21)
-
-#define SM501_DC_PANEL_CONTROL_DE	(1<<20)
-
-#define SM501_DC_PANEL_CONTROL_LCD_TFT	(0<<18)
-#define SM501_DC_PANEL_CONTROL_LCD_STN8	(1<<18)
-#define SM501_DC_PANEL_CONTROL_LCD_STN12 (2<<18)
-
-#define SM501_DC_PANEL_CONTROL_CP	(1<<14)
-#define SM501_DC_PANEL_CONTROL_VSP	(1<<13)
-#define SM501_DC_PANEL_CONTROL_HSP	(1<<12)
-#define SM501_DC_PANEL_CONTROL_CK	(1<<9)
-#define SM501_DC_PANEL_CONTROL_TE	(1<<8)
-#define SM501_DC_PANEL_CONTROL_VPD	(1<<7)
-#define SM501_DC_PANEL_CONTROL_VP	(1<<6)
-#define SM501_DC_PANEL_CONTROL_HPD	(1<<5)
-#define SM501_DC_PANEL_CONTROL_HP	(1<<4)
-#define SM501_DC_PANEL_CONTROL_GAMMA	(1<<3)
-#define SM501_DC_PANEL_CONTROL_EN	(1<<2)
-
-#define SM501_DC_PANEL_CONTROL_8BPP	(0<<0)
-#define SM501_DC_PANEL_CONTROL_16BPP	(1<<0)
-#define SM501_DC_PANEL_CONTROL_32BPP	(2<<0)
-
-
-#define SM501_DC_PANEL_PANNING_CONTROL	(0x004)
-#define SM501_DC_PANEL_COLOR_KEY	(0x008)
-#define SM501_DC_PANEL_FB_ADDR		(0x00C)
-#define SM501_DC_PANEL_FB_OFFSET	(0x010)
-#define SM501_DC_PANEL_FB_WIDTH		(0x014)
-#define SM501_DC_PANEL_FB_HEIGHT	(0x018)
-#define SM501_DC_PANEL_TL_LOC		(0x01C)
-#define SM501_DC_PANEL_BR_LOC		(0x020)
-#define SM501_DC_PANEL_H_TOT		(0x024)
-#define SM501_DC_PANEL_H_SYNC		(0x028)
-#define SM501_DC_PANEL_V_TOT		(0x02C)
-#define SM501_DC_PANEL_V_SYNC		(0x030)
-#define SM501_DC_PANEL_CUR_LINE		(0x034)
-
-#define SM501_DC_VIDEO_CONTROL		(0x040)
-#define SM501_DC_VIDEO_FB0_ADDR		(0x044)
-#define SM501_DC_VIDEO_FB_WIDTH		(0x048)
-#define SM501_DC_VIDEO_FB0_LAST_ADDR	(0x04C)
-#define SM501_DC_VIDEO_TL_LOC		(0x050)
-#define SM501_DC_VIDEO_BR_LOC		(0x054)
-#define SM501_DC_VIDEO_SCALE		(0x058)
-#define SM501_DC_VIDEO_INIT_SCALE	(0x05C)
-#define SM501_DC_VIDEO_YUV_CONSTANTS	(0x060)
-#define SM501_DC_VIDEO_FB1_ADDR		(0x064)
-#define SM501_DC_VIDEO_FB1_LAST_ADDR	(0x068)
-
-#define SM501_DC_VIDEO_ALPHA_CONTROL	(0x080)
-#define SM501_DC_VIDEO_ALPHA_FB_ADDR	(0x084)
-#define SM501_DC_VIDEO_ALPHA_FB_OFFSET	(0x088)
-#define SM501_DC_VIDEO_ALPHA_FB_LAST_ADDR	(0x08C)
-#define SM501_DC_VIDEO_ALPHA_TL_LOC	(0x090)
-#define SM501_DC_VIDEO_ALPHA_BR_LOC	(0x094)
-#define SM501_DC_VIDEO_ALPHA_SCALE	(0x098)
-#define SM501_DC_VIDEO_ALPHA_INIT_SCALE	(0x09C)
-#define SM501_DC_VIDEO_ALPHA_CHROMA_KEY	(0x0A0)
-#define SM501_DC_VIDEO_ALPHA_COLOR_LOOKUP	(0x0A4)
-
-#define SM501_DC_PANEL_HWC_BASE		(0x0F0)
-#define SM501_DC_PANEL_HWC_ADDR		(0x0F0)
-#define SM501_DC_PANEL_HWC_LOC		(0x0F4)
-#define SM501_DC_PANEL_HWC_COLOR_1_2	(0x0F8)
-#define SM501_DC_PANEL_HWC_COLOR_3	(0x0FC)
-
-#define SM501_HWC_EN			(1<<31)
-
-#define SM501_OFF_HWC_ADDR		(0x00)
-#define SM501_OFF_HWC_LOC		(0x04)
-#define SM501_OFF_HWC_COLOR_1_2		(0x08)
-#define SM501_OFF_HWC_COLOR_3		(0x0C)
-
-#define SM501_DC_ALPHA_CONTROL		(0x100)
-#define SM501_DC_ALPHA_FB_ADDR		(0x104)
-#define SM501_DC_ALPHA_FB_OFFSET	(0x108)
-#define SM501_DC_ALPHA_TL_LOC		(0x10C)
-#define SM501_DC_ALPHA_BR_LOC		(0x110)
-#define SM501_DC_ALPHA_CHROMA_KEY	(0x114)
-#define SM501_DC_ALPHA_COLOR_LOOKUP	(0x118)
-
-#define SM501_DC_CRT_CONTROL		(0x200)
-
-#define SM501_DC_CRT_CONTROL_TVP	(1<<15)
-#define SM501_DC_CRT_CONTROL_CP		(1<<14)
-#define SM501_DC_CRT_CONTROL_VSP	(1<<13)
-#define SM501_DC_CRT_CONTROL_HSP	(1<<12)
-#define SM501_DC_CRT_CONTROL_VS		(1<<11)
-#define SM501_DC_CRT_CONTROL_BLANK	(1<<10)
-#define SM501_DC_CRT_CONTROL_SEL	(1<<9)
-#define SM501_DC_CRT_CONTROL_TE		(1<<8)
+#define SM501_OFF_DC_H_TOT              (0x000)
+#define SM501_OFF_DC_V_TOT              (0x008)
+#define SM501_OFF_DC_H_SYNC             (0x004)
+#define SM501_OFF_DC_V_SYNC             (0x00C)
+
+#define SM501_DC_PANEL_CONTROL          (0x000)
+
+#define SM501_DC_PANEL_CONTROL_FPEN     (1 << 27)
+#define SM501_DC_PANEL_CONTROL_BIAS     (1 << 26)
+#define SM501_DC_PANEL_CONTROL_DATA     (1 << 25)
+#define SM501_DC_PANEL_CONTROL_VDD      (1 << 24)
+#define SM501_DC_PANEL_CONTROL_DP       (1 << 23)
+
+#define SM501_DC_PANEL_CONTROL_TFT_888  (0 << 21)
+#define SM501_DC_PANEL_CONTROL_TFT_333  (1 << 21)
+#define SM501_DC_PANEL_CONTROL_TFT_444  (2 << 21)
+
+#define SM501_DC_PANEL_CONTROL_DE       (1 << 20)
+
+#define SM501_DC_PANEL_CONTROL_LCD_TFT  (0 << 18)
+#define SM501_DC_PANEL_CONTROL_LCD_STN8 (1 << 18)
+#define SM501_DC_PANEL_CONTROL_LCD_STN12 (2 << 18)
+
+#define SM501_DC_PANEL_CONTROL_CP       (1 << 14)
+#define SM501_DC_PANEL_CONTROL_VSP      (1 << 13)
+#define SM501_DC_PANEL_CONTROL_HSP      (1 << 12)
+#define SM501_DC_PANEL_CONTROL_CK       (1 << 9)
+#define SM501_DC_PANEL_CONTROL_TE       (1 << 8)
+#define SM501_DC_PANEL_CONTROL_VPD      (1 << 7)
+#define SM501_DC_PANEL_CONTROL_VP       (1 << 6)
+#define SM501_DC_PANEL_CONTROL_HPD      (1 << 5)
+#define SM501_DC_PANEL_CONTROL_HP       (1 << 4)
+#define SM501_DC_PANEL_CONTROL_GAMMA    (1 << 3)
+#define SM501_DC_PANEL_CONTROL_EN       (1 << 2)
+
+#define SM501_DC_PANEL_CONTROL_8BPP     (0 << 0)
+#define SM501_DC_PANEL_CONTROL_16BPP    (1 << 0)
+#define SM501_DC_PANEL_CONTROL_32BPP    (2 << 0)
+
+
+#define SM501_DC_PANEL_PANNING_CONTROL  (0x004)
+#define SM501_DC_PANEL_COLOR_KEY        (0x008)
+#define SM501_DC_PANEL_FB_ADDR          (0x00C)
+#define SM501_DC_PANEL_FB_OFFSET        (0x010)
+#define SM501_DC_PANEL_FB_WIDTH         (0x014)
+#define SM501_DC_PANEL_FB_HEIGHT        (0x018)
+#define SM501_DC_PANEL_TL_LOC           (0x01C)
+#define SM501_DC_PANEL_BR_LOC           (0x020)
+#define SM501_DC_PANEL_H_TOT            (0x024)
+#define SM501_DC_PANEL_H_SYNC           (0x028)
+#define SM501_DC_PANEL_V_TOT            (0x02C)
+#define SM501_DC_PANEL_V_SYNC           (0x030)
+#define SM501_DC_PANEL_CUR_LINE         (0x034)
+
+#define SM501_DC_VIDEO_CONTROL          (0x040)
+#define SM501_DC_VIDEO_FB0_ADDR         (0x044)
+#define SM501_DC_VIDEO_FB_WIDTH         (0x048)
+#define SM501_DC_VIDEO_FB0_LAST_ADDR    (0x04C)
+#define SM501_DC_VIDEO_TL_LOC           (0x050)
+#define SM501_DC_VIDEO_BR_LOC           (0x054)
+#define SM501_DC_VIDEO_SCALE            (0x058)
+#define SM501_DC_VIDEO_INIT_SCALE       (0x05C)
+#define SM501_DC_VIDEO_YUV_CONSTANTS    (0x060)
+#define SM501_DC_VIDEO_FB1_ADDR         (0x064)
+#define SM501_DC_VIDEO_FB1_LAST_ADDR    (0x068)
+
+#define SM501_DC_VIDEO_ALPHA_CONTROL    (0x080)
+#define SM501_DC_VIDEO_ALPHA_FB_ADDR    (0x084)
+#define SM501_DC_VIDEO_ALPHA_FB_OFFSET  (0x088)
+#define SM501_DC_VIDEO_ALPHA_FB_LAST_ADDR (0x08C)
+#define SM501_DC_VIDEO_ALPHA_TL_LOC     (0x090)
+#define SM501_DC_VIDEO_ALPHA_BR_LOC     (0x094)
+#define SM501_DC_VIDEO_ALPHA_SCALE      (0x098)
+#define SM501_DC_VIDEO_ALPHA_INIT_SCALE (0x09C)
+#define SM501_DC_VIDEO_ALPHA_CHROMA_KEY (0x0A0)
+#define SM501_DC_VIDEO_ALPHA_COLOR_LOOKUP (0x0A4)
+
+#define SM501_DC_PANEL_HWC_BASE         (0x0F0)
+#define SM501_DC_PANEL_HWC_ADDR         (0x0F0)
+#define SM501_DC_PANEL_HWC_LOC          (0x0F4)
+#define SM501_DC_PANEL_HWC_COLOR_1_2    (0x0F8)
+#define SM501_DC_PANEL_HWC_COLOR_3      (0x0FC)
+
+#define SM501_HWC_EN                    (1 << 31)
+
+#define SM501_OFF_HWC_ADDR              (0x00)
+#define SM501_OFF_HWC_LOC               (0x04)
+#define SM501_OFF_HWC_COLOR_1_2         (0x08)
+#define SM501_OFF_HWC_COLOR_3           (0x0C)
+
+#define SM501_DC_ALPHA_CONTROL          (0x100)
+#define SM501_DC_ALPHA_FB_ADDR          (0x104)
+#define SM501_DC_ALPHA_FB_OFFSET        (0x108)
+#define SM501_DC_ALPHA_TL_LOC           (0x10C)
+#define SM501_DC_ALPHA_BR_LOC           (0x110)
+#define SM501_DC_ALPHA_CHROMA_KEY       (0x114)
+#define SM501_DC_ALPHA_COLOR_LOOKUP     (0x118)
+
+#define SM501_DC_CRT_CONTROL            (0x200)
+
+#define SM501_DC_CRT_CONTROL_TVP        (1 << 15)
+#define SM501_DC_CRT_CONTROL_CP         (1 << 14)
+#define SM501_DC_CRT_CONTROL_VSP        (1 << 13)
+#define SM501_DC_CRT_CONTROL_HSP        (1 << 12)
+#define SM501_DC_CRT_CONTROL_VS         (1 << 11)
+#define SM501_DC_CRT_CONTROL_BLANK      (1 << 10)
+#define SM501_DC_CRT_CONTROL_SEL        (1 << 9)
+#define SM501_DC_CRT_CONTROL_TE         (1 << 8)
 #define SM501_DC_CRT_CONTROL_PIXEL_MASK (0xF << 4)
-#define SM501_DC_CRT_CONTROL_GAMMA	(1<<3)
-#define SM501_DC_CRT_CONTROL_ENABLE	(1<<2)
+#define SM501_DC_CRT_CONTROL_GAMMA      (1 << 3)
+#define SM501_DC_CRT_CONTROL_ENABLE     (1 << 2)
 
-#define SM501_DC_CRT_CONTROL_8BPP	(0<<0)
-#define SM501_DC_CRT_CONTROL_16BPP	(1<<0)
-#define SM501_DC_CRT_CONTROL_32BPP	(2<<0)
+#define SM501_DC_CRT_CONTROL_8BPP       (0 << 0)
+#define SM501_DC_CRT_CONTROL_16BPP      (1 << 0)
+#define SM501_DC_CRT_CONTROL_32BPP      (2 << 0)
 
-#define SM501_DC_CRT_FB_ADDR		(0x204)
-#define SM501_DC_CRT_FB_OFFSET		(0x208)
-#define SM501_DC_CRT_H_TOT		(0x20C)
-#define SM501_DC_CRT_H_SYNC		(0x210)
-#define SM501_DC_CRT_V_TOT		(0x214)
-#define SM501_DC_CRT_V_SYNC		(0x218)
-#define SM501_DC_CRT_SIGNATURE_ANALYZER	(0x21C)
-#define SM501_DC_CRT_CUR_LINE		(0x220)
-#define SM501_DC_CRT_MONITOR_DETECT	(0x224)
+#define SM501_DC_CRT_FB_ADDR            (0x204)
+#define SM501_DC_CRT_FB_OFFSET          (0x208)
+#define SM501_DC_CRT_H_TOT              (0x20C)
+#define SM501_DC_CRT_H_SYNC             (0x210)
+#define SM501_DC_CRT_V_TOT              (0x214)
+#define SM501_DC_CRT_V_SYNC             (0x218)
+#define SM501_DC_CRT_SIGNATURE_ANALYZER (0x21C)
+#define SM501_DC_CRT_CUR_LINE           (0x220)
+#define SM501_DC_CRT_MONITOR_DETECT     (0x224)
 
-#define SM501_DC_CRT_HWC_BASE		(0x230)
-#define SM501_DC_CRT_HWC_ADDR		(0x230)
-#define SM501_DC_CRT_HWC_LOC		(0x234)
-#define SM501_DC_CRT_HWC_COLOR_1_2	(0x238)
-#define SM501_DC_CRT_HWC_COLOR_3	(0x23C)
+#define SM501_DC_CRT_HWC_BASE           (0x230)
+#define SM501_DC_CRT_HWC_ADDR           (0x230)
+#define SM501_DC_CRT_HWC_LOC            (0x234)
+#define SM501_DC_CRT_HWC_COLOR_1_2      (0x238)
+#define SM501_DC_CRT_HWC_COLOR_3        (0x23C)
 
-#define SM501_DC_PANEL_PALETTE		(0x400)
+#define SM501_DC_PANEL_PALETTE          (0x400)
 
-#define SM501_DC_VIDEO_PALETTE		(0x800)
+#define SM501_DC_VIDEO_PALETTE          (0x800)
 
-#define SM501_DC_CRT_PALETTE		(0xC00)
+#define SM501_DC_CRT_PALETTE            (0xC00)
 
 /* Zoom Video port base */
-#define SM501_ZVPORT			(0x090000)
+#define SM501_ZVPORT                    (0x090000)
 
 /* AC97/I2S base */
-#define SM501_AC97			(0x0A0000)
+#define SM501_AC97                      (0x0A0000)
 
 /* 8051 micro controller base */
-#define SM501_UCONTROLLER		(0x0B0000)
+#define SM501_UCONTROLLER               (0x0B0000)
 
 /* 8051 micro controller SRAM base */
-#define SM501_UCONTROLLER_SRAM		(0x0C0000)
+#define SM501_UCONTROLLER_SRAM          (0x0C0000)
 
 /* DMA base */
-#define SM501_DMA			(0x0D0000)
+#define SM501_DMA                       (0x0D0000)
 
 /* 2d engine base */
-#define SM501_2D_ENGINE			(0x100000)
-#define SM501_2D_SOURCE			(0x00)
-#define SM501_2D_DESTINATION		(0x04)
-#define SM501_2D_DIMENSION		(0x08)
-#define SM501_2D_CONTROL		(0x0C)
-#define SM501_2D_PITCH			(0x10)
-#define SM501_2D_FOREGROUND		(0x14)
-#define SM501_2D_BACKGROUND		(0x18)
-#define SM501_2D_STRETCH		(0x1C)
-#define SM501_2D_COLOR_COMPARE		(0x20)
-#define SM501_2D_COLOR_COMPARE_MASK 	(0x24)
-#define SM501_2D_MASK			(0x28)
-#define SM501_2D_CLIP_TL		(0x2C)
-#define SM501_2D_CLIP_BR		(0x30)
-#define SM501_2D_MONO_PATTERN_LOW	(0x34)
-#define SM501_2D_MONO_PATTERN_HIGH	(0x38)
-#define SM501_2D_WINDOW_WIDTH		(0x3C)
-#define SM501_2D_SOURCE_BASE		(0x40)
-#define SM501_2D_DESTINATION_BASE	(0x44)
-#define SM501_2D_ALPHA			(0x48)
-#define SM501_2D_WRAP			(0x4C)
-#define SM501_2D_STATUS			(0x50)
-
-#define SM501_CSC_Y_SOURCE_BASE		(0xC8)
-#define SM501_CSC_CONSTANTS		(0xCC)
-#define SM501_CSC_Y_SOURCE_X		(0xD0)
-#define SM501_CSC_Y_SOURCE_Y		(0xD4)
-#define SM501_CSC_U_SOURCE_BASE		(0xD8)
-#define SM501_CSC_V_SOURCE_BASE		(0xDC)
-#define SM501_CSC_SOURCE_DIMENSION	(0xE0)
-#define SM501_CSC_SOURCE_PITCH		(0xE4)
-#define SM501_CSC_DESTINATION		(0xE8)
-#define SM501_CSC_DESTINATION_DIMENSION	(0xEC)
-#define SM501_CSC_DESTINATION_PITCH	(0xF0)
-#define SM501_CSC_SCALE_FACTOR		(0xF4)
-#define SM501_CSC_DESTINATION_BASE	(0xF8)
-#define SM501_CSC_CONTROL		(0xFC)
+#define SM501_2D_ENGINE                 (0x100000)
+#define SM501_2D_SOURCE                 (0x00)
+#define SM501_2D_DESTINATION            (0x04)
+#define SM501_2D_DIMENSION              (0x08)
+#define SM501_2D_CONTROL                (0x0C)
+#define SM501_2D_PITCH                  (0x10)
+#define SM501_2D_FOREGROUND             (0x14)
+#define SM501_2D_BACKGROUND             (0x18)
+#define SM501_2D_STRETCH                (0x1C)
+#define SM501_2D_COLOR_COMPARE          (0x20)
+#define SM501_2D_COLOR_COMPARE_MASK     (0x24)
+#define SM501_2D_MASK                   (0x28)
+#define SM501_2D_CLIP_TL                (0x2C)
+#define SM501_2D_CLIP_BR                (0x30)
+#define SM501_2D_MONO_PATTERN_LOW       (0x34)
+#define SM501_2D_MONO_PATTERN_HIGH      (0x38)
+#define SM501_2D_WINDOW_WIDTH           (0x3C)
+#define SM501_2D_SOURCE_BASE            (0x40)
+#define SM501_2D_DESTINATION_BASE       (0x44)
+#define SM501_2D_ALPHA                  (0x48)
+#define SM501_2D_WRAP                   (0x4C)
+#define SM501_2D_STATUS                 (0x50)
+
+#define SM501_CSC_Y_SOURCE_BASE         (0xC8)
+#define SM501_CSC_CONSTANTS             (0xCC)
+#define SM501_CSC_Y_SOURCE_X            (0xD0)
+#define SM501_CSC_Y_SOURCE_Y            (0xD4)
+#define SM501_CSC_U_SOURCE_BASE         (0xD8)
+#define SM501_CSC_V_SOURCE_BASE         (0xDC)
+#define SM501_CSC_SOURCE_DIMENSION      (0xE0)
+#define SM501_CSC_SOURCE_PITCH          (0xE4)
+#define SM501_CSC_DESTINATION           (0xE8)
+#define SM501_CSC_DESTINATION_DIMENSION (0xEC)
+#define SM501_CSC_DESTINATION_PITCH     (0xF0)
+#define SM501_CSC_SCALE_FACTOR          (0xF4)
+#define SM501_CSC_DESTINATION_BASE      (0xF8)
+#define SM501_CSC_CONTROL               (0xFC)
 
 /* 2d engine data port base */
-#define SM501_2D_ENGINE_DATA		(0x110000)
+#define SM501_2D_ENGINE_DATA            (0x110000)
 
 /* end of register definitions */
 
@@ -446,12 +453,12 @@
 
 /* SM501 local memory size taken from "linux/drivers/mfd/sm501.c" */
 static const uint32_t sm501_mem_local_size[] = {
-	[0]	= 4*1024*1024,
-	[1]	= 8*1024*1024,
-	[2]	= 16*1024*1024,
-	[3]	= 32*1024*1024,
-	[4]	= 64*1024*1024,
-	[5]	= 2*1024*1024,
+    [0] = 4 * M_BYTE,
+    [1] = 8 * M_BYTE,
+    [2] = 16 * M_BYTE,
+    [3] = 32 * M_BYTE,
+    [4] = 64 * M_BYTE,
+    [5] = 2 * M_BYTE,
 };
 #define get_local_mem_size(s) sm501_mem_local_size[(s)->local_mem_size_index]
 
@@ -460,10 +467,13 @@ typedef struct SM501State {
     QemuConsole *con;
 
     /* status & internal resources */
-    hwaddr base;
     uint32_t local_mem_size_index;
-    uint8_t * local_mem;
+    uint8_t *local_mem;
     MemoryRegion local_mem_region;
+    MemoryRegion mmio_region;
+    MemoryRegion system_config_region;
+    MemoryRegion disp_ctrl_region;
+    MemoryRegion twoD_engine_region;
     uint32_t last_width;
     uint32_t last_height;
 
@@ -473,6 +483,7 @@ typedef struct SM501State {
     uint32_t gpio_31_0_control;
     uint32_t gpio_63_32_control;
     uint32_t dram_control;
+    uint32_t arbitration_control;
     uint32_t irq_mask;
     uint32_t misc_timing;
     uint32_t power_mode_control;
@@ -482,7 +493,7 @@ typedef struct SM501State {
     uint32_t uart0_mcr;
     uint32_t uart0_scr;
 
-    uint8_t dc_palette[0x400 * 3];
+    uint8_t dc_palette[DC_PALETTE_ENTRIES];
 
     uint32_t dc_panel_control;
     uint32_t dc_panel_panning_control;
@@ -502,6 +513,8 @@ typedef struct SM501State {
     uint32_t dc_panel_hwc_color_1_2;
     uint32_t dc_panel_hwc_color_3;
 
+    uint32_t dc_video_control;
+
     uint32_t dc_crt_control;
     uint32_t dc_crt_fb_addr;
     uint32_t dc_crt_fb_offset;
@@ -521,13 +534,20 @@ typedef struct SM501State {
     uint32_t twoD_control;
     uint32_t twoD_pitch;
     uint32_t twoD_foreground;
+    uint32_t twoD_background;
     uint32_t twoD_stretch;
+    uint32_t twoD_color_compare;
     uint32_t twoD_color_compare_mask;
     uint32_t twoD_mask;
+    uint32_t twoD_clip_tl;
+    uint32_t twoD_clip_br;
+    uint32_t twoD_mono_pattern_low;
+    uint32_t twoD_mono_pattern_high;
     uint32_t twoD_window_width;
     uint32_t twoD_source_base;
     uint32_t twoD_destination_base;
-
+    uint32_t twoD_alpha;
+    uint32_t twoD_wrap;
 } SM501State;
 
 static uint32_t get_local_mem_size_index(uint32_t size)
@@ -536,18 +556,36 @@ static uint32_t get_local_mem_size_index(uint32_t size)
     int i, index = 0;
 
     for (i = 0; i < ARRAY_SIZE(sm501_mem_local_size); i++) {
-	uint32_t new_size = sm501_mem_local_size[i];
-	if (new_size >= size) {
-	    if (norm_size == 0 || norm_size > new_size) {
-		norm_size = new_size;
-		index = i;
-	    }
-	}
+        uint32_t new_size = sm501_mem_local_size[i];
+        if (new_size >= size) {
+            if (norm_size == 0 || norm_size > new_size) {
+                norm_size = new_size;
+                index = i;
+            }
+        }
     }
 
     return index;
 }
 
+static inline int get_width(SM501State *s, int crt)
+{
+    int width = crt ? s->dc_crt_h_total : s->dc_panel_h_total;
+    return (width & 0x00000FFF) + 1;
+}
+
+static inline int get_height(SM501State *s, int crt)
+{
+    int height = crt ? s->dc_crt_v_total : s->dc_panel_v_total;
+    return (height & 0x00000FFF) + 1;
+}
+
+static inline int get_bpp(SM501State *s, int crt)
+{
+    int bpp = crt ? s->dc_crt_control : s->dc_panel_control;
+    return 1 << (bpp & 3);
+}
+
 /**
  * Check the availability of hardware cursor.
  * @param crt  0 for PANEL, 1 for CRT.
@@ -555,17 +593,17 @@ static uint32_t get_local_mem_size_index(uint32_t size)
 static inline int is_hwc_enabled(SM501State *state, int crt)
 {
     uint32_t addr = crt ? state->dc_crt_hwc_addr : state->dc_panel_hwc_addr;
-    return addr & 0x80000000;
+    return addr & SM501_HWC_EN;
 }
 
 /**
  * Get the address which holds cursor pattern data.
  * @param crt  0 for PANEL, 1 for CRT.
  */
-static inline uint32_t get_hwc_address(SM501State *state, int crt)
+static inline uint8_t *get_hwc_address(SM501State *state, int crt)
 {
     uint32_t addr = crt ? state->dc_crt_hwc_addr : state->dc_panel_hwc_addr;
-    return (addr & 0x03FFFFF0)/* >> 4*/;
+    return state->local_mem + (addr & 0x03FFFFF0);
 }
 
 /**
@@ -591,53 +629,51 @@ static inline uint32_t get_hwc_x(SM501State *state, int crt)
 }
 
 /**
- * Get the cursor position in x coordinate.
+ * Get the hardware cursor palette.
  * @param crt  0 for PANEL, 1 for CRT.
- * @param index  0, 1, 2 or 3 which specifies color of corsor dot.
+ * @param palette  pointer to a [3 * 3] array to store color values in
  */
-static inline uint16_t get_hwc_color(SM501State *state, int crt, int index)
+static inline void get_hwc_palette(SM501State *state, int crt, uint8_t *palette)
 {
-    uint32_t color_reg = 0;
-    uint16_t color_565 = 0;
-
-    if (index == 0) {
-        return 0;
-    }
-
-    switch (index) {
-    case 1:
-    case 2:
-        color_reg = crt ? state->dc_crt_hwc_color_1_2
-                        : state->dc_panel_hwc_color_1_2;
-        break;
-    case 3:
-        color_reg = crt ? state->dc_crt_hwc_color_3
-                        : state->dc_panel_hwc_color_3;
-        break;
-    default:
-        printf("invalid hw cursor color.\n");
-        abort();
-    }
+    int i;
+    uint32_t color_reg;
+    uint16_t rgb565;
+
+    for (i = 0; i < 3; i++) {
+        if (i + 1 == 3) {
+            color_reg = crt ? state->dc_crt_hwc_color_3
+                            : state->dc_panel_hwc_color_3;
+        } else {
+            color_reg = crt ? state->dc_crt_hwc_color_1_2
+                            : state->dc_panel_hwc_color_1_2;
+        }
 
-    switch (index) {
-    case 1:
-    case 3:
-        color_565 = (uint16_t)(color_reg & 0xFFFF);
-        break;
-    case 2:
-        color_565 = (uint16_t)((color_reg >> 16) & 0xFFFF);
-        break;
+        if (i + 1 == 2) {
+            rgb565 = (color_reg >> 16) & 0xFFFF;
+        } else {
+            rgb565 = color_reg & 0xFFFF;
+        }
+        palette[i * 3 + 0] = (rgb565 << 3) & 0xf8; /* red */
+        palette[i * 3 + 1] = (rgb565 >> 3) & 0xfc; /* green */
+        palette[i * 3 + 2] = (rgb565 >> 8) & 0xf8; /* blue */
     }
-    return color_565;
 }
 
-static int within_hwc_y_range(SM501State *state, int y, int crt)
+static inline void hwc_invalidate(SM501State *s, int crt)
 {
-    int hwc_y = get_hwc_y(state, crt);
-    return (hwc_y <= y && y < hwc_y + SM501_HWC_HEIGHT);
+    int w = get_width(s, crt);
+    int h = get_height(s, crt);
+    int bpp = get_bpp(s, crt);
+    int start = get_hwc_y(s, crt);
+    int end = MIN(h, start + SM501_HWC_HEIGHT) + 1;
+
+    start *= w * bpp;
+    end *= w * bpp;
+
+    memory_region_set_dirty(&s->local_mem_region, start, end - start);
 }
 
-static void sm501_2d_operation(SM501State * s)
+static void sm501_2d_operation(SM501State *s)
 {
     /* obtain operation parameters */
     int operation = (s->twoD_control >> 16) & 0x1f;
@@ -653,8 +689,8 @@ static void sm501_2d_operation(SM501State * s)
     int addressing = (s->twoD_stretch >> 16) & 0xF;
 
     /* get frame buffer info */
-    uint8_t * src = s->local_mem + (s->twoD_source_base & 0x03FFFFFF);
-    uint8_t * dst = s->local_mem + (s->twoD_destination_base & 0x03FFFFFF);
+    uint8_t *src = s->local_mem + (s->twoD_source_base & 0x03FFFFFF);
+    uint8_t *dst = s->local_mem + (s->twoD_destination_base & 0x03FFFFFF);
     int src_width = (s->dc_crt_h_total & 0x00000FFF) + 1;
     int dst_width = (s->dc_crt_h_total & 0x00000FFF) + 1;
 
@@ -671,20 +707,20 @@ static void sm501_2d_operation(SM501State * s)
 
     switch (operation) {
     case 0x00: /* copy area */
-#define COPY_AREA(_bpp, _pixel_type, rtl) {                                 \
-        int y, x, index_d, index_s;                                         \
-        for (y = 0; y < operation_height; y++) {                            \
-            for (x = 0; x < operation_width; x++) {                         \
-                if (rtl) {                                                  \
-                    index_s = ((src_y - y) * src_width + src_x - x) * _bpp; \
-                    index_d = ((dst_y - y) * dst_width + dst_x - x) * _bpp; \
-                } else {                                                    \
-                    index_s = ((src_y + y) * src_width + src_x + x) * _bpp; \
-                    index_d = ((dst_y + y) * dst_width + dst_x + x) * _bpp; \
-                }                                                           \
-                *(_pixel_type*)&dst[index_d] = *(_pixel_type*)&src[index_s];\
-            }                                                               \
-        }                                                                   \
+#define COPY_AREA(_bpp, _pixel_type, rtl) {                                   \
+        int y, x, index_d, index_s;                                           \
+        for (y = 0; y < operation_height; y++) {                              \
+            for (x = 0; x < operation_width; x++) {                           \
+                if (rtl) {                                                    \
+                    index_s = ((src_y - y) * src_width + src_x - x) * _bpp;   \
+                    index_d = ((dst_y - y) * dst_width + dst_x - x) * _bpp;   \
+                } else {                                                      \
+                    index_s = ((src_y + y) * src_width + src_x + x) * _bpp;   \
+                    index_d = ((dst_y + y) * dst_width + dst_x + x) * _bpp;   \
+                }                                                             \
+                *(_pixel_type *)&dst[index_d] = *(_pixel_type *)&src[index_s];\
+            }                                                                 \
+        }                                                                     \
     }
         switch (format_flags) {
         case 0:
@@ -705,7 +741,7 @@ static void sm501_2d_operation(SM501State * s)
         for (y = 0; y < operation_height; y++) {                            \
             for (x = 0; x < operation_width; x++) {                         \
                 int index = ((dst_y + y) * dst_width + dst_x + x) * _bpp;   \
-                *(_pixel_type*)&dst[index] = (_pixel_type)color;            \
+                *(_pixel_type *)&dst[index] = (_pixel_type)color;           \
             }                                                               \
         }                                                                   \
     }
@@ -733,50 +769,53 @@ static void sm501_2d_operation(SM501State * s)
 static uint64_t sm501_system_config_read(void *opaque, hwaddr addr,
                                          unsigned size)
 {
-    SM501State * s = (SM501State *)opaque;
+    SM501State *s = (SM501State *)opaque;
     uint32_t ret = 0;
     SM501_DPRINTF("sm501 system config regs : read addr=%x\n", (int)addr);
 
-    switch(addr) {
+    switch (addr) {
     case SM501_SYSTEM_CONTROL:
-	ret = s->system_control;
-	break;
+        ret = s->system_control;
+        break;
     case SM501_MISC_CONTROL:
-	ret = s->misc_control;
-	break;
+        ret = s->misc_control;
+        break;
     case SM501_GPIO31_0_CONTROL:
-	ret = s->gpio_31_0_control;
-	break;
+        ret = s->gpio_31_0_control;
+        break;
     case SM501_GPIO63_32_CONTROL:
-	ret = s->gpio_63_32_control;
-	break;
+        ret = s->gpio_63_32_control;
+        break;
     case SM501_DEVICEID:
-	ret = 0x050100A0;
-	break;
+        ret = 0x050100A0;
+        break;
     case SM501_DRAM_CONTROL:
-	ret = (s->dram_control & 0x07F107C0) | s->local_mem_size_index << 13;
-	break;
+        ret = (s->dram_control & 0x07F107C0) | s->local_mem_size_index << 13;
+        break;
+    case SM501_ARBTRTN_CONTROL:
+        ret = s->arbitration_control;
+        break;
     case SM501_IRQ_MASK:
-	ret = s->irq_mask;
-	break;
+        ret = s->irq_mask;
+        break;
     case SM501_MISC_TIMING:
-	/* TODO : simulate gate control */
-	ret = s->misc_timing;
-	break;
+        /* TODO : simulate gate control */
+        ret = s->misc_timing;
+        break;
     case SM501_CURRENT_GATE:
-	/* TODO : simulate gate control */
-	ret = 0x00021807;
-	break;
+        /* TODO : simulate gate control */
+        ret = 0x00021807;
+        break;
     case SM501_CURRENT_CLOCK:
-	ret = 0x2A1A0A09;
-	break;
+        ret = 0x2A1A0A09;
+        break;
     case SM501_POWER_MODE_CONTROL:
-	ret = s->power_mode_control;
-	break;
+        ret = s->power_mode_control;
+        break;
 
     default:
-	printf("sm501 system config : not implemented register read."
-	       " addr=%x\n", (int)addr);
+        printf("sm501 system config : not implemented register read."
+               " addr=%x\n", (int)addr);
         abort();
     }
 
@@ -786,47 +825,50 @@ static uint64_t sm501_system_config_read(void *opaque, hwaddr addr,
 static void sm501_system_config_write(void *opaque, hwaddr addr,
                                       uint64_t value, unsigned size)
 {
-    SM501State * s = (SM501State *)opaque;
+    SM501State *s = (SM501State *)opaque;
     SM501_DPRINTF("sm501 system config regs : write addr=%x, val=%x\n",
-		  (uint32_t)addr, (uint32_t)value);
+                  (uint32_t)addr, (uint32_t)value);
 
-    switch(addr) {
+    switch (addr) {
     case SM501_SYSTEM_CONTROL:
-	s->system_control = value & 0xE300B8F7;
-	break;
+        s->system_control = value & 0xE300B8F7;
+        break;
     case SM501_MISC_CONTROL:
-	s->misc_control = value & 0xFF7FFF20;
-	break;
+        s->misc_control = value & 0xFF7FFF20;
+        break;
     case SM501_GPIO31_0_CONTROL:
-	s->gpio_31_0_control = value;
-	break;
+        s->gpio_31_0_control = value;
+        break;
     case SM501_GPIO63_32_CONTROL:
-	s->gpio_63_32_control = value;
-	break;
+        s->gpio_63_32_control = value;
+        break;
     case SM501_DRAM_CONTROL:
-	s->local_mem_size_index = (value >> 13) & 0x7;
-	/* rODO : check validity of size change */
-	s->dram_control |=  value & 0x7FFFFFC3;
-	break;
+        s->local_mem_size_index = (value >> 13) & 0x7;
+        /* TODO : check validity of size change */
+        s->dram_control |=  value & 0x7FFFFFC3;
+        break;
+    case SM501_ARBTRTN_CONTROL:
+        s->arbitration_control =  value & 0x37777777;
+        break;
     case SM501_IRQ_MASK:
-	s->irq_mask = value;
-	break;
+        s->irq_mask = value;
+        break;
     case SM501_MISC_TIMING:
-	s->misc_timing = value & 0xF31F1FFF;
-	break;
+        s->misc_timing = value & 0xF31F1FFF;
+        break;
     case SM501_POWER_MODE_0_GATE:
     case SM501_POWER_MODE_1_GATE:
     case SM501_POWER_MODE_0_CLOCK:
     case SM501_POWER_MODE_1_CLOCK:
-	/* TODO : simulate gate & clock control */
-	break;
+        /* TODO : simulate gate & clock control */
+        break;
     case SM501_POWER_MODE_CONTROL:
-	s->power_mode_control = value & 0x00000003;
-	break;
+        s->power_mode_control = value & 0x00000003;
+        break;
 
     default:
-	printf("sm501 system config : not implemented register write."
-	       " addr=%x, val=%x\n", (int)addr, (uint32_t)value);
+        printf("sm501 system config : not implemented register write."
+               " addr=%x, val=%x\n", (int)addr, (uint32_t)value);
         abort();
     }
 }
@@ -838,124 +880,128 @@ static const MemoryRegionOps sm501_system_config_ops = {
         .min_access_size = 4,
         .max_access_size = 4,
     },
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
 static uint32_t sm501_palette_read(void *opaque, hwaddr addr)
 {
-    SM501State * s = (SM501State *)opaque;
+    SM501State *s = (SM501State *)opaque;
     SM501_DPRINTF("sm501 palette read addr=%x\n", (int)addr);
 
     /* TODO : consider BYTE/WORD access */
     /* TODO : consider endian */
 
     assert(range_covers_byte(0, 0x400 * 3, addr));
-    return *(uint32_t*)&s->dc_palette[addr];
+    return *(uint32_t *)&s->dc_palette[addr];
 }
 
-static void sm501_palette_write(void *opaque,
-				hwaddr addr, uint32_t value)
+static void sm501_palette_write(void *opaque, hwaddr addr,
+                                uint32_t value)
 {
-    SM501State * s = (SM501State *)opaque;
+    SM501State *s = (SM501State *)opaque;
     SM501_DPRINTF("sm501 palette write addr=%x, val=%x\n",
-		  (int)addr, value);
+                  (int)addr, value);
 
     /* TODO : consider BYTE/WORD access */
     /* TODO : consider endian */
 
     assert(range_covers_byte(0, 0x400 * 3, addr));
-    *(uint32_t*)&s->dc_palette[addr] = value;
+    *(uint32_t *)&s->dc_palette[addr] = value;
 }
 
 static uint64_t sm501_disp_ctrl_read(void *opaque, hwaddr addr,
                                      unsigned size)
 {
-    SM501State * s = (SM501State *)opaque;
+    SM501State *s = (SM501State *)opaque;
     uint32_t ret = 0;
     SM501_DPRINTF("sm501 disp ctrl regs : read addr=%x\n", (int)addr);
 
-    switch(addr) {
+    switch (addr) {
 
     case SM501_DC_PANEL_CONTROL:
-	ret = s->dc_panel_control;
-	break;
+        ret = s->dc_panel_control;
+        break;
     case SM501_DC_PANEL_PANNING_CONTROL:
-	ret = s->dc_panel_panning_control;
-	break;
+        ret = s->dc_panel_panning_control;
+        break;
     case SM501_DC_PANEL_FB_ADDR:
-	ret = s->dc_panel_fb_addr;
-	break;
+        ret = s->dc_panel_fb_addr;
+        break;
     case SM501_DC_PANEL_FB_OFFSET:
-	ret = s->dc_panel_fb_offset;
-	break;
+        ret = s->dc_panel_fb_offset;
+        break;
     case SM501_DC_PANEL_FB_WIDTH:
-	ret = s->dc_panel_fb_width;
-	break;
+        ret = s->dc_panel_fb_width;
+        break;
     case SM501_DC_PANEL_FB_HEIGHT:
-	ret = s->dc_panel_fb_height;
-	break;
+        ret = s->dc_panel_fb_height;
+        break;
     case SM501_DC_PANEL_TL_LOC:
-	ret = s->dc_panel_tl_location;
-	break;
+        ret = s->dc_panel_tl_location;
+        break;
     case SM501_DC_PANEL_BR_LOC:
-	ret = s->dc_panel_br_location;
-	break;
+        ret = s->dc_panel_br_location;
+        break;
 
     case SM501_DC_PANEL_H_TOT:
-	ret = s->dc_panel_h_total;
-	break;
+        ret = s->dc_panel_h_total;
+        break;
     case SM501_DC_PANEL_H_SYNC:
-	ret = s->dc_panel_h_sync;
-	break;
+        ret = s->dc_panel_h_sync;
+        break;
     case SM501_DC_PANEL_V_TOT:
-	ret = s->dc_panel_v_total;
-	break;
+        ret = s->dc_panel_v_total;
+        break;
     case SM501_DC_PANEL_V_SYNC:
-	ret = s->dc_panel_v_sync;
-	break;
+        ret = s->dc_panel_v_sync;
+        break;
+
+    case SM501_DC_VIDEO_CONTROL:
+        ret = s->dc_video_control;
+        break;
 
     case SM501_DC_CRT_CONTROL:
-	ret = s->dc_crt_control;
-	break;
+        ret = s->dc_crt_control;
+        break;
     case SM501_DC_CRT_FB_ADDR:
-	ret = s->dc_crt_fb_addr;
-	break;
+        ret = s->dc_crt_fb_addr;
+        break;
     case SM501_DC_CRT_FB_OFFSET:
-	ret = s->dc_crt_fb_offset;
-	break;
+        ret = s->dc_crt_fb_offset;
+        break;
     case SM501_DC_CRT_H_TOT:
-	ret = s->dc_crt_h_total;
-	break;
+        ret = s->dc_crt_h_total;
+        break;
     case SM501_DC_CRT_H_SYNC:
-	ret = s->dc_crt_h_sync;
-	break;
+        ret = s->dc_crt_h_sync;
+        break;
     case SM501_DC_CRT_V_TOT:
-	ret = s->dc_crt_v_total;
-	break;
+        ret = s->dc_crt_v_total;
+        break;
     case SM501_DC_CRT_V_SYNC:
-	ret = s->dc_crt_v_sync;
-	break;
+        ret = s->dc_crt_v_sync;
+        break;
 
     case SM501_DC_CRT_HWC_ADDR:
-	ret = s->dc_crt_hwc_addr;
-	break;
+        ret = s->dc_crt_hwc_addr;
+        break;
     case SM501_DC_CRT_HWC_LOC:
-	ret = s->dc_crt_hwc_location;
-	break;
+        ret = s->dc_crt_hwc_location;
+        break;
     case SM501_DC_CRT_HWC_COLOR_1_2:
-	ret = s->dc_crt_hwc_color_1_2;
-	break;
+        ret = s->dc_crt_hwc_color_1_2;
+        break;
     case SM501_DC_CRT_HWC_COLOR_3:
-	ret = s->dc_crt_hwc_color_3;
-	break;
+        ret = s->dc_crt_hwc_color_3;
+        break;
 
-    case SM501_DC_PANEL_PALETTE ... SM501_DC_PANEL_PALETTE + 0x400*3 - 4:
+    case SM501_DC_PANEL_PALETTE ... SM501_DC_PANEL_PALETTE + 0x400 * 3 - 4:
         ret = sm501_palette_read(opaque, addr - SM501_DC_PANEL_PALETTE);
         break;
 
     default:
-	printf("sm501 disp ctrl : not implemented register read."
-	       " addr=%x\n", (int)addr);
+        printf("sm501 disp ctrl : not implemented register read."
+               " addr=%x\n", (int)addr);
         abort();
     }
 
@@ -965,104 +1011,124 @@ static uint64_t sm501_disp_ctrl_read(void *opaque, hwaddr addr,
 static void sm501_disp_ctrl_write(void *opaque, hwaddr addr,
                                   uint64_t value, unsigned size)
 {
-    SM501State * s = (SM501State *)opaque;
+    SM501State *s = (SM501State *)opaque;
     SM501_DPRINTF("sm501 disp ctrl regs : write addr=%x, val=%x\n",
-		  (unsigned)addr, (unsigned)value);
+                  (unsigned)addr, (unsigned)value);
 
-    switch(addr) {
+    switch (addr) {
     case SM501_DC_PANEL_CONTROL:
-	s->dc_panel_control = value & 0x0FFF73FF;
-	break;
+        s->dc_panel_control = value & 0x0FFF73FF;
+        break;
     case SM501_DC_PANEL_PANNING_CONTROL:
-	s->dc_panel_panning_control = value & 0xFF3FFF3F;
-	break;
+        s->dc_panel_panning_control = value & 0xFF3FFF3F;
+        break;
     case SM501_DC_PANEL_FB_ADDR:
-	s->dc_panel_fb_addr = value & 0x8FFFFFF0;
-	break;
+        s->dc_panel_fb_addr = value & 0x8FFFFFF0;
+        break;
     case SM501_DC_PANEL_FB_OFFSET:
-	s->dc_panel_fb_offset = value & 0x3FF03FF0;
-	break;
+        s->dc_panel_fb_offset = value & 0x3FF03FF0;
+        break;
     case SM501_DC_PANEL_FB_WIDTH:
-	s->dc_panel_fb_width = value & 0x0FFF0FFF;
-	break;
+        s->dc_panel_fb_width = value & 0x0FFF0FFF;
+        break;
     case SM501_DC_PANEL_FB_HEIGHT:
-	s->dc_panel_fb_height = value & 0x0FFF0FFF;
-	break;
+        s->dc_panel_fb_height = value & 0x0FFF0FFF;
+        break;
     case SM501_DC_PANEL_TL_LOC:
-	s->dc_panel_tl_location = value & 0x07FF07FF;
-	break;
+        s->dc_panel_tl_location = value & 0x07FF07FF;
+        break;
     case SM501_DC_PANEL_BR_LOC:
-	s->dc_panel_br_location = value & 0x07FF07FF;
-	break;
+        s->dc_panel_br_location = value & 0x07FF07FF;
+        break;
 
     case SM501_DC_PANEL_H_TOT:
-	s->dc_panel_h_total = value & 0x0FFF0FFF;
-	break;
+        s->dc_panel_h_total = value & 0x0FFF0FFF;
+        break;
     case SM501_DC_PANEL_H_SYNC:
-	s->dc_panel_h_sync = value & 0x00FF0FFF;
-	break;
+        s->dc_panel_h_sync = value & 0x00FF0FFF;
+        break;
     case SM501_DC_PANEL_V_TOT:
-	s->dc_panel_v_total = value & 0x0FFF0FFF;
-	break;
+        s->dc_panel_v_total = value & 0x0FFF0FFF;
+        break;
     case SM501_DC_PANEL_V_SYNC:
-	s->dc_panel_v_sync = value & 0x003F0FFF;
-	break;
+        s->dc_panel_v_sync = value & 0x003F0FFF;
+        break;
 
     case SM501_DC_PANEL_HWC_ADDR:
-	s->dc_panel_hwc_addr = value & 0x8FFFFFF0;
-	break;
+        value &= 0x8FFFFFF0;
+        if (value != s->dc_panel_hwc_addr) {
+            hwc_invalidate(s, 0);
+            s->dc_panel_hwc_addr = value;
+        }
+        break;
     case SM501_DC_PANEL_HWC_LOC:
-	s->dc_panel_hwc_location = value & 0x0FFF0FFF;
-	break;
+        value &= 0x0FFF0FFF;
+        if (value != s->dc_panel_hwc_location) {
+            hwc_invalidate(s, 0);
+            s->dc_panel_hwc_location = value;
+        }
+        break;
     case SM501_DC_PANEL_HWC_COLOR_1_2:
-	s->dc_panel_hwc_color_1_2 = value;
-	break;
+        s->dc_panel_hwc_color_1_2 = value;
+        break;
     case SM501_DC_PANEL_HWC_COLOR_3:
-	s->dc_panel_hwc_color_3 = value & 0x0000FFFF;
-	break;
+        s->dc_panel_hwc_color_3 = value & 0x0000FFFF;
+        break;
+
+    case SM501_DC_VIDEO_CONTROL:
+        s->dc_video_control = value & 0x00037FFF;
+        break;
 
     case SM501_DC_CRT_CONTROL:
-	s->dc_crt_control = value & 0x0003FFFF;
-	break;
+        s->dc_crt_control = value & 0x0003FFFF;
+        break;
     case SM501_DC_CRT_FB_ADDR:
-	s->dc_crt_fb_addr = value & 0x8FFFFFF0;
-	break;
+        s->dc_crt_fb_addr = value & 0x8FFFFFF0;
+        break;
     case SM501_DC_CRT_FB_OFFSET:
-	s->dc_crt_fb_offset = value & 0x3FF03FF0;
-	break;
+        s->dc_crt_fb_offset = value & 0x3FF03FF0;
+        break;
     case SM501_DC_CRT_H_TOT:
-	s->dc_crt_h_total = value & 0x0FFF0FFF;
-	break;
+        s->dc_crt_h_total = value & 0x0FFF0FFF;
+        break;
     case SM501_DC_CRT_H_SYNC:
-	s->dc_crt_h_sync = value & 0x00FF0FFF;
-	break;
+        s->dc_crt_h_sync = value & 0x00FF0FFF;
+        break;
     case SM501_DC_CRT_V_TOT:
-	s->dc_crt_v_total = value & 0x0FFF0FFF;
-	break;
+        s->dc_crt_v_total = value & 0x0FFF0FFF;
+        break;
     case SM501_DC_CRT_V_SYNC:
-	s->dc_crt_v_sync = value & 0x003F0FFF;
-	break;
+        s->dc_crt_v_sync = value & 0x003F0FFF;
+        break;
 
     case SM501_DC_CRT_HWC_ADDR:
-	s->dc_crt_hwc_addr = value & 0x8FFFFFF0;
-	break;
+        value &= 0x8FFFFFF0;
+        if (value != s->dc_crt_hwc_addr) {
+            hwc_invalidate(s, 1);
+            s->dc_crt_hwc_addr = value;
+        }
+        break;
     case SM501_DC_CRT_HWC_LOC:
-	s->dc_crt_hwc_location = value & 0x0FFF0FFF;
-	break;
+        value &= 0x0FFF0FFF;
+        if (value != s->dc_crt_hwc_location) {
+            hwc_invalidate(s, 1);
+            s->dc_crt_hwc_location = value;
+        }
+        break;
     case SM501_DC_CRT_HWC_COLOR_1_2:
-	s->dc_crt_hwc_color_1_2 = value;
-	break;
+        s->dc_crt_hwc_color_1_2 = value;
+        break;
     case SM501_DC_CRT_HWC_COLOR_3:
-	s->dc_crt_hwc_color_3 = value & 0x0000FFFF;
-	break;
+        s->dc_crt_hwc_color_3 = value & 0x0000FFFF;
+        break;
 
-    case SM501_DC_PANEL_PALETTE ... SM501_DC_PANEL_PALETTE + 0x400*3 - 4:
+    case SM501_DC_PANEL_PALETTE ... SM501_DC_PANEL_PALETTE + 0x400 * 3 - 4:
         sm501_palette_write(opaque, addr - SM501_DC_PANEL_PALETTE, value);
         break;
 
     default:
-	printf("sm501 disp ctrl : not implemented register write."
-	       " addr=%x, val=%x\n", (int)addr, (unsigned)value);
+        printf("sm501 disp ctrl : not implemented register write."
+               " addr=%x, val=%x\n", (int)addr, (unsigned)value);
         abort();
     }
 }
@@ -1074,20 +1140,80 @@ static const MemoryRegionOps sm501_disp_ctrl_ops = {
         .min_access_size = 4,
         .max_access_size = 4,
     },
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
 static uint64_t sm501_2d_engine_read(void *opaque, hwaddr addr,
                                      unsigned size)
 {
-    SM501State * s = (SM501State *)opaque;
+    SM501State *s = (SM501State *)opaque;
     uint32_t ret = 0;
     SM501_DPRINTF("sm501 2d engine regs : read addr=%x\n", (int)addr);
 
-    switch(addr) {
+    switch (addr) {
+    case SM501_2D_SOURCE:
+        ret = s->twoD_source;
+        break;
+    case SM501_2D_DESTINATION:
+        ret = s->twoD_destination;
+        break;
+    case SM501_2D_DIMENSION:
+        ret = s->twoD_dimension;
+        break;
+    case SM501_2D_CONTROL:
+        ret = s->twoD_control;
+        break;
+    case SM501_2D_PITCH:
+        ret = s->twoD_pitch;
+        break;
+    case SM501_2D_FOREGROUND:
+        ret = s->twoD_foreground;
+        break;
+    case SM501_2D_BACKGROUND:
+        ret = s->twoD_background;
+        break;
+    case SM501_2D_STRETCH:
+        ret = s->twoD_stretch;
+        break;
+    case SM501_2D_COLOR_COMPARE:
+        ret = s->twoD_color_compare;
+        break;
+    case SM501_2D_COLOR_COMPARE_MASK:
+        ret = s->twoD_color_compare_mask;
+        break;
+    case SM501_2D_MASK:
+        ret = s->twoD_mask;
+        break;
+    case SM501_2D_CLIP_TL:
+        ret = s->twoD_clip_tl;
+        break;
+    case SM501_2D_CLIP_BR:
+        ret = s->twoD_clip_br;
+        break;
+    case SM501_2D_MONO_PATTERN_LOW:
+        ret = s->twoD_mono_pattern_low;
+        break;
+    case SM501_2D_MONO_PATTERN_HIGH:
+        ret = s->twoD_mono_pattern_high;
+        break;
+    case SM501_2D_WINDOW_WIDTH:
+        ret = s->twoD_window_width;
+        break;
     case SM501_2D_SOURCE_BASE:
         ret = s->twoD_source_base;
         break;
+    case SM501_2D_DESTINATION_BASE:
+        ret = s->twoD_destination_base;
+        break;
+    case SM501_2D_ALPHA:
+        ret = s->twoD_alpha;
+        break;
+    case SM501_2D_WRAP:
+        ret = s->twoD_wrap;
+        break;
+    case SM501_2D_STATUS:
+        ret = 0; /* Should return interrupt status */
+        break;
     default:
         printf("sm501 disp ctrl : not implemented register read."
                " addr=%x\n", (int)addr);
@@ -1100,11 +1226,11 @@ static uint64_t sm501_2d_engine_read(void *opaque, hwaddr addr,
 static void sm501_2d_engine_write(void *opaque, hwaddr addr,
                                   uint64_t value, unsigned size)
 {
-    SM501State * s = (SM501State *)opaque;
+    SM501State *s = (SM501State *)opaque;
     SM501_DPRINTF("sm501 2d engine regs : write addr=%x, val=%x\n",
                   (unsigned)addr, (unsigned)value);
 
-    switch(addr) {
+    switch (addr) {
     case SM501_2D_SOURCE:
         s->twoD_source = value;
         break;
@@ -1130,15 +1256,33 @@ static void sm501_2d_engine_write(void *opaque, hwaddr addr,
     case SM501_2D_FOREGROUND:
         s->twoD_foreground = value;
         break;
+    case SM501_2D_BACKGROUND:
+        s->twoD_background = value;
+        break;
     case SM501_2D_STRETCH:
         s->twoD_stretch = value;
         break;
+    case SM501_2D_COLOR_COMPARE:
+        s->twoD_color_compare = value;
+        break;
     case SM501_2D_COLOR_COMPARE_MASK:
         s->twoD_color_compare_mask = value;
         break;
     case SM501_2D_MASK:
         s->twoD_mask = value;
         break;
+    case SM501_2D_CLIP_TL:
+        s->twoD_clip_tl = value;
+        break;
+    case SM501_2D_CLIP_BR:
+        s->twoD_clip_br = value;
+        break;
+    case SM501_2D_MONO_PATTERN_LOW:
+        s->twoD_mono_pattern_low = value;
+        break;
+    case SM501_2D_MONO_PATTERN_HIGH:
+        s->twoD_mono_pattern_high = value;
+        break;
     case SM501_2D_WINDOW_WIDTH:
         s->twoD_window_width = value;
         break;
@@ -1148,6 +1292,15 @@ static void sm501_2d_engine_write(void *opaque, hwaddr addr,
     case SM501_2D_DESTINATION_BASE:
         s->twoD_destination_base = value;
         break;
+    case SM501_2D_ALPHA:
+        s->twoD_alpha = value;
+        break;
+    case SM501_2D_WRAP:
+        s->twoD_wrap = value;
+        break;
+    case SM501_2D_STATUS:
+        /* ignored, writing 0 should clear interrupt status */
+        break;
     default:
         printf("sm501 2d engine : not implemented register write."
                " addr=%x, val=%x\n", (int)addr, (unsigned)value);
@@ -1162,16 +1315,17 @@ static const MemoryRegionOps sm501_2d_engine_ops = {
         .min_access_size = 4,
         .max_access_size = 4,
     },
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
 /* draw line functions for all console modes */
 
 typedef void draw_line_func(uint8_t *d, const uint8_t *s,
-			    int width, const uint32_t *pal);
+                            int width, const uint32_t *pal);
 
-typedef void draw_hwc_line_func(SM501State * s, int crt, uint8_t * palette,
-                                int c_y, uint8_t *d, int width);
+typedef void draw_hwc_line_func(uint8_t *d, const uint8_t *s,
+                                int width, const uint8_t *palette,
+                                int c_x, int c_y);
 
 #define DEPTH 8
 #include "sm501_template.h"
@@ -1197,7 +1351,7 @@ typedef void draw_hwc_line_func(SM501State * s, int crt, uint8_t * palette,
 #define DEPTH 32
 #include "sm501_template.h"
 
-static draw_line_func * draw_line8_funcs[] = {
+static draw_line_func *draw_line8_funcs[] = {
     draw_line8_8,
     draw_line8_15,
     draw_line8_16,
@@ -1207,7 +1361,7 @@ static draw_line_func * draw_line8_funcs[] = {
     draw_line8_16bgr,
 };
 
-static draw_line_func * draw_line16_funcs[] = {
+static draw_line_func *draw_line16_funcs[] = {
     draw_line16_8,
     draw_line16_15,
     draw_line16_16,
@@ -1217,7 +1371,7 @@ static draw_line_func * draw_line16_funcs[] = {
     draw_line16_16bgr,
 };
 
-static draw_line_func * draw_line32_funcs[] = {
+static draw_line_func *draw_line32_funcs[] = {
     draw_line32_8,
     draw_line32_15,
     draw_line32_16,
@@ -1227,7 +1381,7 @@ static draw_line_func * draw_line32_funcs[] = {
     draw_line32_16bgr,
 };
 
-static draw_hwc_line_func * draw_hwc_line_funcs[] = {
+static draw_hwc_line_func *draw_hwc_line_funcs[] = {
     draw_hwc_line_8,
     draw_hwc_line_15,
     draw_hwc_line_16,
@@ -1242,7 +1396,7 @@ static inline int get_depth_index(DisplaySurface *surface)
     switch (surface_bits_per_pixel(surface)) {
     default:
     case 8:
-	return 0;
+        return 0;
     case 15:
         return 1;
     case 16:
@@ -1256,203 +1410,459 @@ static inline int get_depth_index(DisplaySurface *surface)
     }
 }
 
-static void sm501_draw_crt(SM501State * s)
+static void sm501_update_display(void *opaque)
 {
+    SM501State *s = (SM501State *)opaque;
     DisplaySurface *surface = qemu_console_surface(s->con);
-    int y;
-    int width = (s->dc_crt_h_total & 0x00000FFF) + 1;
-    int height = (s->dc_crt_v_total & 0x00000FFF) + 1;
-
-    uint8_t  * src = s->local_mem;
-    int src_bpp = 0;
+    int y, c_x = 0, c_y = 0;
+    int crt = (s->dc_crt_control & SM501_DC_CRT_CONTROL_SEL) ? 1 : 0;
+    int width = get_width(s, crt);
+    int height = get_height(s, crt);
+    int src_bpp = get_bpp(s, crt);
     int dst_bpp = surface_bytes_per_pixel(surface);
-    uint32_t * palette = (uint32_t *)&s->dc_palette[SM501_DC_CRT_PALETTE
-						    - SM501_DC_PANEL_PALETTE];
-    uint8_t hwc_palette[3 * 3];
-    int ds_depth_index = get_depth_index(surface);
-    draw_line_func * draw_line = NULL;
-    draw_hwc_line_func * draw_hwc_line = NULL;
+    int dst_depth_index = get_depth_index(surface);
+    draw_line_func *draw_line = NULL;
+    draw_hwc_line_func *draw_hwc_line = NULL;
     int full_update = 0;
     int y_start = -1;
     ram_addr_t page_min = ~0l;
     ram_addr_t page_max = 0l;
-    ram_addr_t offset = 0;
+    ram_addr_t offset;
+    uint32_t *palette;
+    uint8_t hwc_palette[3 * 3];
+    uint8_t *hwc_src = NULL;
+
+    if (!((crt ? s->dc_crt_control : s->dc_panel_control)
+          & SM501_DC_CRT_CONTROL_ENABLE)) {
+        return;
+    }
+
+    palette = (uint32_t *)(crt ? &s->dc_palette[SM501_DC_CRT_PALETTE -
+                                                SM501_DC_PANEL_PALETTE]
+                               : &s->dc_palette[0]);
 
     /* choose draw_line function */
-    switch (s->dc_crt_control & 3) {
-    case SM501_DC_CRT_CONTROL_8BPP:
-	src_bpp = 1;
-	draw_line = draw_line8_funcs[ds_depth_index];
-	break;
-    case SM501_DC_CRT_CONTROL_16BPP:
-	src_bpp = 2;
-	draw_line = draw_line16_funcs[ds_depth_index];
-	break;
-    case SM501_DC_CRT_CONTROL_32BPP:
-	src_bpp = 4;
-	draw_line = draw_line32_funcs[ds_depth_index];
-	break;
+    switch (src_bpp) {
+    case 1:
+        draw_line = draw_line8_funcs[dst_depth_index];
+        break;
+    case 2:
+        draw_line = draw_line16_funcs[dst_depth_index];
+        break;
+    case 4:
+        draw_line = draw_line32_funcs[dst_depth_index];
+        break;
     default:
-	printf("sm501 draw crt : invalid DC_CRT_CONTROL=%x.\n",
-	       s->dc_crt_control);
+        printf("sm501 update display : invalid control register value.\n");
         abort();
-	break;
+        break;
     }
 
     /* set up to draw hardware cursor */
-    if (is_hwc_enabled(s, 1)) {
-        int i;
-
-        /* get cursor palette */
-        for (i = 0; i < 3; i++) {
-            uint16_t rgb565 = get_hwc_color(s, 1, i + 1);
-            hwc_palette[i * 3 + 0] = (rgb565 & 0xf800) >> 8; /* red */
-            hwc_palette[i * 3 + 1] = (rgb565 & 0x07e0) >> 3; /* green */
-            hwc_palette[i * 3 + 2] = (rgb565 & 0x001f) << 3; /* blue */
-        }
-
+    if (is_hwc_enabled(s, crt)) {
         /* choose cursor draw line function */
-        draw_hwc_line = draw_hwc_line_funcs[ds_depth_index];
+        draw_hwc_line = draw_hwc_line_funcs[dst_depth_index];
+        hwc_src = get_hwc_address(s, crt);
+        c_x = get_hwc_x(s, crt);
+        c_y = get_hwc_y(s, crt);
+        get_hwc_palette(s, crt, hwc_palette);
     }
 
     /* adjust console size */
     if (s->last_width != width || s->last_height != height) {
         qemu_console_resize(s->con, width, height);
         surface = qemu_console_surface(s->con);
-	s->last_width = width;
-	s->last_height = height;
-	full_update = 1;
+        s->last_width = width;
+        s->last_height = height;
+        full_update = 1;
     }
 
     /* draw each line according to conditions */
     memory_region_sync_dirty_bitmap(&s->local_mem_region);
-    for (y = 0; y < height; y++) {
-	int update_hwc = draw_hwc_line ? within_hwc_y_range(s, y, 1) : 0;
-	int update = full_update || update_hwc;
+    for (y = 0, offset = 0; y < height; y++, offset += width * src_bpp) {
+        int update, update_hwc;
         ram_addr_t page0 = offset;
         ram_addr_t page1 = offset + width * src_bpp - 1;
 
-	/* check dirty flags for each line */
-        update = memory_region_get_dirty(&s->local_mem_region, page0,
-                                         page1 - page0, DIRTY_MEMORY_VGA);
+        /* check if hardware cursor is enabled and we're within its range */
+        update_hwc = draw_hwc_line && c_y <= y && y < c_y + SM501_HWC_HEIGHT;
+        update = full_update || update_hwc;
+        /* check dirty flags for each line */
+        update |= memory_region_get_dirty(&s->local_mem_region, page0,
+                                          page1 - page0, DIRTY_MEMORY_VGA);
 
-	/* draw line and change status */
-	if (update) {
+        /* draw line and change status */
+        if (update) {
             uint8_t *d = surface_data(surface);
             d +=  y * width * dst_bpp;
 
             /* draw graphics layer */
-            draw_line(d, src, width, palette);
+            draw_line(d, s->local_mem + offset, width, palette);
 
-            /* draw haredware cursor */
+            /* draw hardware cursor */
             if (update_hwc) {
-                draw_hwc_line(s, 1, hwc_palette, y - get_hwc_y(s, 1), d, width);
+                draw_hwc_line(d, hwc_src, width, hwc_palette, c_x, y - c_y);
             }
 
-	    if (y_start < 0)
-		y_start = y;
-	    if (page0 < page_min)
-		page_min = page0;
-	    if (page1 > page_max)
-		page_max = page1;
-	} else {
-	    if (y_start >= 0) {
-		/* flush to display */
+            if (y_start < 0) {
+                y_start = y;
+            }
+            if (page0 < page_min) {
+                page_min = page0;
+            }
+            if (page1 > page_max) {
+                page_max = page1;
+            }
+        } else {
+            if (y_start >= 0) {
+                /* flush to display */
                 dpy_gfx_update(s->con, 0, y_start, width, y - y_start);
-		y_start = -1;
-	    }
-	}
-
-	src += width * src_bpp;
-	offset += width * src_bpp;
+                y_start = -1;
+            }
+        }
     }
 
     /* complete flush to display */
-    if (y_start >= 0)
+    if (y_start >= 0) {
         dpy_gfx_update(s->con, 0, y_start, width, y - y_start);
+    }
 
     /* clear dirty flags */
     if (page_min != ~0l) {
-	memory_region_reset_dirty(&s->local_mem_region,
+        memory_region_reset_dirty(&s->local_mem_region,
                                   page_min, page_max + TARGET_PAGE_SIZE,
                                   DIRTY_MEMORY_VGA);
     }
 }
 
-static void sm501_update_display(void *opaque)
-{
-    SM501State * s = (SM501State *)opaque;
-
-    if (s->dc_crt_control & SM501_DC_CRT_CONTROL_ENABLE)
-	sm501_draw_crt(s);
-}
-
 static const GraphicHwOps sm501_ops = {
     .gfx_update  = sm501_update_display,
 };
 
-void sm501_init(MemoryRegion *address_space_mem, uint32_t base,
-                uint32_t local_mem_bytes, qemu_irq irq, Chardev *chr)
+static void sm501_reset(SM501State *s)
 {
-    SM501State * s;
-    DeviceState *dev;
-    MemoryRegion *sm501_system_config = g_new(MemoryRegion, 1);
-    MemoryRegion *sm501_disp_ctrl = g_new(MemoryRegion, 1);
-    MemoryRegion *sm501_2d_engine = g_new(MemoryRegion, 1);
-
-    /* allocate management data region */
-    s = (SM501State *)g_malloc0(sizeof(SM501State));
-    s->base = base;
-    s->local_mem_size_index
-	= get_local_mem_size_index(local_mem_bytes);
-    SM501_DPRINTF("local mem size=%x. index=%d\n", get_local_mem_size(s),
-		  s->local_mem_size_index);
-    s->system_control = 0x00100000;
-    s->misc_control = 0x00001000; /* assumes SH, active=low */
-    s->dc_panel_control = 0x00010000;
+    s->system_control = 0x00100000; /* 2D engine FIFO empty */
+    /* Bits 17 (SH), 7 (CDR), 6:5 (Test), 2:0 (Bus) are all supposed
+     * to be determined at reset by GPIO lines which set config bits.
+     * We hardwire them:
+     *  SH = 0 : Hitachi Ready Polarity == Active Low
+     *  CDR = 0 : do not reset clock divider
+     *  TEST = 0 : Normal mode (not testing the silicon)
+     *  BUS = 0 : Hitachi SH3/SH4
+     */
+    s->misc_control = SM501_MISC_DAC_POWER;
+    s->gpio_31_0_control = 0;
+    s->gpio_63_32_control = 0;
+    s->dram_control = 0;
+    s->arbitration_control = 0x05146732;
+    s->irq_mask = 0;
+    s->misc_timing = 0;
+    s->power_mode_control = 0;
+    s->dc_panel_control = 0x00010000; /* FIFO level 3 */
+    s->dc_video_control = 0;
     s->dc_crt_control = 0x00010000;
+    s->twoD_source = 0;
+    s->twoD_destination = 0;
+    s->twoD_dimension = 0;
+    s->twoD_control = 0;
+    s->twoD_pitch = 0;
+    s->twoD_foreground = 0;
+    s->twoD_background = 0;
+    s->twoD_stretch = 0;
+    s->twoD_color_compare = 0;
+    s->twoD_color_compare_mask = 0;
+    s->twoD_mask = 0;
+    s->twoD_clip_tl = 0;
+    s->twoD_clip_br = 0;
+    s->twoD_mono_pattern_low = 0;
+    s->twoD_mono_pattern_high = 0;
+    s->twoD_window_width = 0;
+    s->twoD_source_base = 0;
+    s->twoD_destination_base = 0;
+    s->twoD_alpha = 0;
+    s->twoD_wrap = 0;
+}
+
+static void sm501_init(SM501State *s, DeviceState *dev,
+                       uint32_t local_mem_bytes)
+{
+    s->local_mem_size_index = get_local_mem_size_index(local_mem_bytes);
+    SM501_DPRINTF("sm501 local mem size=%x. index=%d\n", get_local_mem_size(s),
+                  s->local_mem_size_index);
 
-    /* allocate local memory */
-    memory_region_init_ram(&s->local_mem_region, NULL, "sm501.local",
-                           local_mem_bytes, &error_fatal);
+    /* local memory */
+    memory_region_init_ram(&s->local_mem_region, OBJECT(dev), "sm501.local",
+                           get_local_mem_size(s), &error_fatal);
     vmstate_register_ram_global(&s->local_mem_region);
     memory_region_set_log(&s->local_mem_region, true, DIRTY_MEMORY_VGA);
     s->local_mem = memory_region_get_ram_ptr(&s->local_mem_region);
-    memory_region_add_subregion(address_space_mem, base, &s->local_mem_region);
 
-    /* map mmio */
-    memory_region_init_io(sm501_system_config, NULL, &sm501_system_config_ops, s,
+    /* mmio */
+    memory_region_init(&s->mmio_region, OBJECT(dev), "sm501.mmio", MMIO_SIZE);
+    memory_region_init_io(&s->system_config_region, OBJECT(dev),
+                          &sm501_system_config_ops, s,
                           "sm501-system-config", 0x6c);
-    memory_region_add_subregion(address_space_mem, base + MMIO_BASE_OFFSET,
-                                sm501_system_config);
-    memory_region_init_io(sm501_disp_ctrl, NULL, &sm501_disp_ctrl_ops, s,
+    memory_region_add_subregion(&s->mmio_region, SM501_SYS_CONFIG,
+                                &s->system_config_region);
+    memory_region_init_io(&s->disp_ctrl_region, OBJECT(dev),
+                          &sm501_disp_ctrl_ops, s,
                           "sm501-disp-ctrl", 0x1000);
-    memory_region_add_subregion(address_space_mem,
-                                base + MMIO_BASE_OFFSET + SM501_DC,
-                                sm501_disp_ctrl);
-    memory_region_init_io(sm501_2d_engine, NULL, &sm501_2d_engine_ops, s,
+    memory_region_add_subregion(&s->mmio_region, SM501_DC,
+                                &s->disp_ctrl_region);
+    memory_region_init_io(&s->twoD_engine_region, OBJECT(dev),
+                          &sm501_2d_engine_ops, s,
                           "sm501-2d-engine", 0x54);
-    memory_region_add_subregion(address_space_mem,
-                                base + MMIO_BASE_OFFSET + SM501_2D_ENGINE,
-                                sm501_2d_engine);
+    memory_region_add_subregion(&s->mmio_region, SM501_2D_ENGINE,
+                                &s->twoD_engine_region);
+
+    /* create qemu graphic console */
+    s->con = graphic_console_init(DEVICE(dev), 0, &sm501_ops, s);
+}
+
+static const VMStateDescription vmstate_sm501_state = {
+    .name = "sm501-state",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(local_mem_size_index, SM501State),
+        VMSTATE_UINT32(system_control, SM501State),
+        VMSTATE_UINT32(misc_control, SM501State),
+        VMSTATE_UINT32(gpio_31_0_control, SM501State),
+        VMSTATE_UINT32(gpio_63_32_control, SM501State),
+        VMSTATE_UINT32(dram_control, SM501State),
+        VMSTATE_UINT32(arbitration_control, SM501State),
+        VMSTATE_UINT32(irq_mask, SM501State),
+        VMSTATE_UINT32(misc_timing, SM501State),
+        VMSTATE_UINT32(power_mode_control, SM501State),
+        VMSTATE_UINT32(uart0_ier, SM501State),
+        VMSTATE_UINT32(uart0_lcr, SM501State),
+        VMSTATE_UINT32(uart0_mcr, SM501State),
+        VMSTATE_UINT32(uart0_scr, SM501State),
+        VMSTATE_UINT8_ARRAY(dc_palette, SM501State, DC_PALETTE_ENTRIES),
+        VMSTATE_UINT32(dc_panel_control, SM501State),
+        VMSTATE_UINT32(dc_panel_panning_control, SM501State),
+        VMSTATE_UINT32(dc_panel_fb_addr, SM501State),
+        VMSTATE_UINT32(dc_panel_fb_offset, SM501State),
+        VMSTATE_UINT32(dc_panel_fb_width, SM501State),
+        VMSTATE_UINT32(dc_panel_fb_height, SM501State),
+        VMSTATE_UINT32(dc_panel_tl_location, SM501State),
+        VMSTATE_UINT32(dc_panel_br_location, SM501State),
+        VMSTATE_UINT32(dc_panel_h_total, SM501State),
+        VMSTATE_UINT32(dc_panel_h_sync, SM501State),
+        VMSTATE_UINT32(dc_panel_v_total, SM501State),
+        VMSTATE_UINT32(dc_panel_v_sync, SM501State),
+        VMSTATE_UINT32(dc_panel_hwc_addr, SM501State),
+        VMSTATE_UINT32(dc_panel_hwc_location, SM501State),
+        VMSTATE_UINT32(dc_panel_hwc_color_1_2, SM501State),
+        VMSTATE_UINT32(dc_panel_hwc_color_3, SM501State),
+        VMSTATE_UINT32(dc_video_control, SM501State),
+        VMSTATE_UINT32(dc_crt_control, SM501State),
+        VMSTATE_UINT32(dc_crt_fb_addr, SM501State),
+        VMSTATE_UINT32(dc_crt_fb_offset, SM501State),
+        VMSTATE_UINT32(dc_crt_h_total, SM501State),
+        VMSTATE_UINT32(dc_crt_h_sync, SM501State),
+        VMSTATE_UINT32(dc_crt_v_total, SM501State),
+        VMSTATE_UINT32(dc_crt_v_sync, SM501State),
+        VMSTATE_UINT32(dc_crt_hwc_addr, SM501State),
+        VMSTATE_UINT32(dc_crt_hwc_location, SM501State),
+        VMSTATE_UINT32(dc_crt_hwc_color_1_2, SM501State),
+        VMSTATE_UINT32(dc_crt_hwc_color_3, SM501State),
+        VMSTATE_UINT32(twoD_source, SM501State),
+        VMSTATE_UINT32(twoD_destination, SM501State),
+        VMSTATE_UINT32(twoD_dimension, SM501State),
+        VMSTATE_UINT32(twoD_control, SM501State),
+        VMSTATE_UINT32(twoD_pitch, SM501State),
+        VMSTATE_UINT32(twoD_foreground, SM501State),
+        VMSTATE_UINT32(twoD_background, SM501State),
+        VMSTATE_UINT32(twoD_stretch, SM501State),
+        VMSTATE_UINT32(twoD_color_compare, SM501State),
+        VMSTATE_UINT32(twoD_color_compare_mask, SM501State),
+        VMSTATE_UINT32(twoD_mask, SM501State),
+        VMSTATE_UINT32(twoD_clip_tl, SM501State),
+        VMSTATE_UINT32(twoD_clip_br, SM501State),
+        VMSTATE_UINT32(twoD_mono_pattern_low, SM501State),
+        VMSTATE_UINT32(twoD_mono_pattern_high, SM501State),
+        VMSTATE_UINT32(twoD_window_width, SM501State),
+        VMSTATE_UINT32(twoD_source_base, SM501State),
+        VMSTATE_UINT32(twoD_destination_base, SM501State),
+        VMSTATE_UINT32(twoD_alpha, SM501State),
+        VMSTATE_UINT32(twoD_wrap, SM501State),
+        VMSTATE_END_OF_LIST()
+     }
+};
+
+#define TYPE_SYSBUS_SM501 "sysbus-sm501"
+#define SYSBUS_SM501(obj) \
+    OBJECT_CHECK(SM501SysBusState, (obj), TYPE_SYSBUS_SM501)
+
+typedef struct {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+    SM501State state;
+    uint32_t vram_size;
+    uint32_t base;
+    void *chr_state;
+} SM501SysBusState;
+
+static void sm501_realize_sysbus(DeviceState *dev, Error **errp)
+{
+    SM501SysBusState *s = SYSBUS_SM501(dev);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+    DeviceState *usb_dev;
+
+    sm501_init(&s->state, dev, s->vram_size);
+    if (get_local_mem_size(&s->state) != s->vram_size) {
+        error_setg(errp, "Invalid VRAM size, nearest valid size is %" PRIu32,
+                   get_local_mem_size(&s->state));
+        return;
+    }
+    sysbus_init_mmio(sbd, &s->state.local_mem_region);
+    sysbus_init_mmio(sbd, &s->state.mmio_region);
 
     /* bridge to usb host emulation module */
-    dev = qdev_create(NULL, "sysbus-ohci");
-    qdev_prop_set_uint32(dev, "num-ports", 2);
-    qdev_prop_set_uint64(dev, "dma-offset", base);
-    qdev_init_nofail(dev);
-    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0,
-                    base + MMIO_BASE_OFFSET + SM501_USB_HOST);
-    sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, irq);
+    usb_dev = qdev_create(NULL, "sysbus-ohci");
+    qdev_prop_set_uint32(usb_dev, "num-ports", 2);
+    qdev_prop_set_uint64(usb_dev, "dma-offset", s->base);
+    qdev_init_nofail(usb_dev);
+    memory_region_add_subregion(&s->state.mmio_region, SM501_USB_HOST,
+                       sysbus_mmio_get_region(SYS_BUS_DEVICE(usb_dev), 0));
+    sysbus_pass_irq(sbd, SYS_BUS_DEVICE(usb_dev));
 
     /* bridge to serial emulation module */
-    if (chr) {
-        serial_mm_init(address_space_mem,
-                       base + MMIO_BASE_OFFSET + SM501_UART0, 2,
+    if (s->chr_state) {
+        serial_mm_init(&s->state.mmio_region, SM501_UART0, 2,
                        NULL, /* TODO : chain irq to IRL */
-                       115200, chr, DEVICE_NATIVE_ENDIAN);
+                       115200, s->chr_state, DEVICE_LITTLE_ENDIAN);
     }
+}
 
-    /* create qemu graphic console */
-    s->con = graphic_console_init(DEVICE(dev), 0, &sm501_ops, s);
+static Property sm501_sysbus_properties[] = {
+    DEFINE_PROP_UINT32("vram-size", SM501SysBusState, vram_size, 0),
+    DEFINE_PROP_UINT32("base", SM501SysBusState, base, 0),
+    DEFINE_PROP_PTR("chr-state", SM501SysBusState, chr_state),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void sm501_reset_sysbus(DeviceState *dev)
+{
+    SM501SysBusState *s = SYSBUS_SM501(dev);
+    sm501_reset(&s->state);
 }
+
+static const VMStateDescription vmstate_sm501_sysbus = {
+    .name = TYPE_SYSBUS_SM501,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_STRUCT(state, SM501SysBusState, 1,
+                       vmstate_sm501_state, SM501State),
+        VMSTATE_END_OF_LIST()
+     }
+};
+
+static void sm501_sysbus_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = sm501_realize_sysbus;
+    set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories);
+    dc->desc = "SM501 Multimedia Companion";
+    dc->props = sm501_sysbus_properties;
+    dc->reset = sm501_reset_sysbus;
+    dc->vmsd = &vmstate_sm501_sysbus;
+    /* Note: pointer property "chr-state" may remain null, thus
+     * no need for dc->cannot_instantiate_with_device_add_yet = true;
+     */
+}
+
+static const TypeInfo sm501_sysbus_info = {
+    .name          = TYPE_SYSBUS_SM501,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(SM501SysBusState),
+    .class_init    = sm501_sysbus_class_init,
+};
+
+#define TYPE_PCI_SM501 "sm501"
+#define PCI_SM501(obj) OBJECT_CHECK(SM501PCIState, (obj), TYPE_PCI_SM501)
+
+typedef struct {
+    /*< private >*/
+    PCIDevice parent_obj;
+    /*< public >*/
+    SM501State state;
+    uint32_t vram_size;
+} SM501PCIState;
+
+static void sm501_realize_pci(PCIDevice *dev, Error **errp)
+{
+    SM501PCIState *s = PCI_SM501(dev);
+
+    sm501_init(&s->state, DEVICE(dev), s->vram_size);
+    if (get_local_mem_size(&s->state) != s->vram_size) {
+        error_setg(errp, "Invalid VRAM size, nearest valid size is %" PRIu32,
+                   get_local_mem_size(&s->state));
+        return;
+    }
+    pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY,
+                     &s->state.local_mem_region);
+    pci_register_bar(dev, 1, PCI_BASE_ADDRESS_SPACE_MEMORY,
+                     &s->state.mmio_region);
+}
+
+static Property sm501_pci_properties[] = {
+    DEFINE_PROP_UINT32("vram-size", SM501PCIState, vram_size, 64 * M_BYTE),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void sm501_reset_pci(DeviceState *dev)
+{
+    SM501PCIState *s = PCI_SM501(dev);
+    sm501_reset(&s->state);
+    /* Bits 2:0 of misc_control register is 001 for PCI */
+    s->state.misc_control |= 1;
+}
+
+static const VMStateDescription vmstate_sm501_pci = {
+    .name = TYPE_PCI_SM501,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_PCI_DEVICE(parent_obj, SM501PCIState),
+        VMSTATE_STRUCT(state, SM501PCIState, 1,
+                       vmstate_sm501_state, SM501State),
+        VMSTATE_END_OF_LIST()
+     }
+};
+
+static void sm501_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+    k->realize = sm501_realize_pci;
+    k->vendor_id = PCI_VENDOR_ID_SILICON_MOTION;
+    k->device_id = PCI_DEVICE_ID_SM501;
+    k->class_id = PCI_CLASS_DISPLAY_OTHER;
+    set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories);
+    dc->desc = "SM501 Display Controller";
+    dc->props = sm501_pci_properties;
+    dc->reset = sm501_reset_pci;
+    dc->hotpluggable = false;
+    dc->vmsd = &vmstate_sm501_pci;
+}
+
+static const TypeInfo sm501_pci_info = {
+    .name          = TYPE_PCI_SM501,
+    .parent        = TYPE_PCI_DEVICE,
+    .instance_size = sizeof(SM501PCIState),
+    .class_init    = sm501_pci_class_init,
+};
+
+static void sm501_register_types(void)
+{
+    type_register_static(&sm501_sysbus_info);
+    type_register_static(&sm501_pci_info);
+}
+
+type_init(sm501_register_types)
diff --git a/hw/display/sm501_template.h b/hw/display/sm501_template.h
index f33e499be4..a60abad019 100644
--- a/hw/display/sm501_template.h
+++ b/hw/display/sm501_template.h
@@ -47,81 +47,67 @@ static void glue(draw_line8_, PIXEL_NAME)(
 {
     uint8_t v, r, g, b;
     do {
-	v = ldub_p(s);
-	r = (pal[v] >> 16) & 0xff;
-	g = (pal[v] >>  8) & 0xff;
-	b = (pal[v] >>  0) & 0xff;
-	((PIXEL_TYPE *) d)[0] = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
-	s ++;
-	d += BPP;
-    } while (-- width != 0);
+        v = ldub_p(s);
+        r = (pal[v] >> 16) & 0xff;
+        g = (pal[v] >>  8) & 0xff;
+        b = (pal[v] >>  0) & 0xff;
+        *(PIXEL_TYPE *)d = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
+        s++;
+        d += BPP;
+    } while (--width != 0);
 }
 
 static void glue(draw_line16_, PIXEL_NAME)(
-		 uint8_t *d, const uint8_t *s, int width, const uint32_t *pal)
+                 uint8_t *d, const uint8_t *s, int width, const uint32_t *pal)
 {
     uint16_t rgb565;
     uint8_t r, g, b;
 
     do {
-	rgb565 = lduw_p(s);
-	r = ((rgb565 >> 11) & 0x1f) << 3;
-	g = ((rgb565 >>  5) & 0x3f) << 2;
-	b = ((rgb565 >>  0) & 0x1f) << 3;
-	((PIXEL_TYPE *) d)[0] = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
-	s += 2;
-	d += BPP;
-    } while (-- width != 0);
+        rgb565 = lduw_le_p(s);
+        r = (rgb565 >> 8) & 0xf8;
+        g = (rgb565 >> 3) & 0xfc;
+        b = (rgb565 << 3) & 0xf8;
+        *(PIXEL_TYPE *)d = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
+        s += 2;
+        d += BPP;
+    } while (--width != 0);
 }
 
 static void glue(draw_line32_, PIXEL_NAME)(
-		 uint8_t *d, const uint8_t *s, int width, const uint32_t *pal)
+                 uint8_t *d, const uint8_t *s, int width, const uint32_t *pal)
 {
     uint8_t r, g, b;
 
     do {
-	ldub_p(s);
-#if defined(TARGET_WORDS_BIGENDIAN)
-        r = s[1];
-        g = s[2];
-        b = s[3];
-#else
-        b = s[0];
-        g = s[1];
         r = s[2];
-#endif
-	((PIXEL_TYPE *) d)[0] = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
-	s += 4;
-	d += BPP;
-    } while (-- width != 0);
+        g = s[1];
+        b = s[0];
+        *(PIXEL_TYPE *)d = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
+        s += 4;
+        d += BPP;
+    } while (--width != 0);
 }
 
 /**
  * Draw hardware cursor image on the given line.
  */
-static void glue(draw_hwc_line_, PIXEL_NAME)(SM501State * s, int crt,
-                         uint8_t * palette, int c_y, uint8_t *d, int width)
+static void glue(draw_hwc_line_, PIXEL_NAME)(uint8_t *d, const uint8_t *s,
+                 int width, const uint8_t *palette, int c_x, int c_y)
 {
-    int x, i;
-    uint8_t bitset = 0;
-
-    /* get hardware cursor pattern */
-    uint32_t cursor_addr = get_hwc_address(s, crt);
-    assert(0 <= c_y && c_y < SM501_HWC_HEIGHT);
-    cursor_addr += 64 * c_y / 4;  /* 4 pixels per byte */
-    cursor_addr += s->base;
+    int i;
+    uint8_t r, g, b, v, bitset = 0;
 
     /* get cursor position */
-    x = get_hwc_x(s, crt);
-    d += x * BPP;
-
-    for (i = 0; i < SM501_HWC_WIDTH && x + i < width; i++) {
-        uint8_t v;
+    assert(0 <= c_y && c_y < SM501_HWC_HEIGHT);
+    s += SM501_HWC_WIDTH * c_y / 4;  /* 4 pixels per byte */
+    d += c_x * BPP;
 
+    for (i = 0; i < SM501_HWC_WIDTH && c_x + i < width; i++) {
         /* get pixel value */
         if (i % 4 == 0) {
-            bitset = ldub_phys(&address_space_memory, cursor_addr);
-            cursor_addr++;
+            bitset = ldub_p(s);
+            s++;
         }
         v = bitset & 3;
         bitset >>= 2;
@@ -129,10 +115,10 @@ static void glue(draw_hwc_line_, PIXEL_NAME)(SM501State * s, int crt,
         /* write pixel */
         if (v) {
             v--;
-            uint8_t r = palette[v * 3 + 0];
-            uint8_t g = palette[v * 3 + 1];
-            uint8_t b = palette[v * 3 + 2];
-            ((PIXEL_TYPE *) d)[0] = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
+            r = palette[v * 3 + 0];
+            g = palette[v * 3 + 1];
+            b = palette[v * 3 + 2];
+            *(PIXEL_TYPE *)d = glue(rgb_to_pixel, PIXEL_NAME)(r, g, b);
         }
         d += BPP;
     }
diff --git a/hw/display/tcx.c b/hw/display/tcx.c
index 8e26aae801..5a1115cc65 100644
--- a/hw/display/tcx.c
+++ b/hw/display/tcx.c
@@ -25,7 +25,6 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
-#include "cpu.h" /* FIXME shouldn't use TARGET_PAGE_SIZE */
 #include "ui/console.h"
 #include "ui/pixel_ops.h"
 #include "hw/loader.h"
@@ -93,41 +92,46 @@ typedef struct TCXState {
     uint16_t cursy;
 } TCXState;
 
-static void tcx_set_dirty(TCXState *s)
+static void tcx_set_dirty(TCXState *s, ram_addr_t addr, int len)
 {
-    memory_region_set_dirty(&s->vram_mem, 0, MAXX * MAXY);
+    memory_region_set_dirty(&s->vram_mem, addr, len);
+
+    if (s->depth == 24) {
+        memory_region_set_dirty(&s->vram_mem, s->vram24_offset + addr * 4,
+                                len * 4);
+        memory_region_set_dirty(&s->vram_mem, s->cplane_offset + addr * 4,
+                                len * 4);
+    }
 }
 
-static inline int tcx24_check_dirty(TCXState *s, ram_addr_t page,
-                                    ram_addr_t page24, ram_addr_t cpage)
+static int tcx_check_dirty(TCXState *s, ram_addr_t addr, int len)
 {
     int ret;
 
-    ret = memory_region_get_dirty(&s->vram_mem, page, TARGET_PAGE_SIZE,
-                                  DIRTY_MEMORY_VGA);
-    ret |= memory_region_get_dirty(&s->vram_mem, page24, TARGET_PAGE_SIZE * 4,
-                                   DIRTY_MEMORY_VGA);
-    ret |= memory_region_get_dirty(&s->vram_mem, cpage, TARGET_PAGE_SIZE * 4,
-                                   DIRTY_MEMORY_VGA);
+    ret = memory_region_get_dirty(&s->vram_mem, addr, len, DIRTY_MEMORY_VGA);
+
+    if (s->depth == 24) {
+        ret |= memory_region_get_dirty(&s->vram_mem,
+                                       s->vram24_offset + addr * 4, len * 4,
+                                       DIRTY_MEMORY_VGA);
+        ret |= memory_region_get_dirty(&s->vram_mem,
+                                       s->cplane_offset + addr * 4, len * 4,
+                                       DIRTY_MEMORY_VGA);
+    }
+
     return ret;
 }
 
-static inline void tcx24_reset_dirty(TCXState *ts, ram_addr_t page_min,
-                               ram_addr_t page_max, ram_addr_t page24,
-                              ram_addr_t cpage)
+static void tcx_reset_dirty(TCXState *s, ram_addr_t addr, int len)
 {
-    memory_region_reset_dirty(&ts->vram_mem,
-                              page_min,
-                              (page_max - page_min) + TARGET_PAGE_SIZE,
-                              DIRTY_MEMORY_VGA);
-    memory_region_reset_dirty(&ts->vram_mem,
-                              page24 + page_min * 4,
-                              (page_max - page_min) * 4 + TARGET_PAGE_SIZE,
-                              DIRTY_MEMORY_VGA);
-    memory_region_reset_dirty(&ts->vram_mem,
-                              cpage + page_min * 4,
-                              (page_max - page_min) * 4 + TARGET_PAGE_SIZE,
-                              DIRTY_MEMORY_VGA);
+    memory_region_reset_dirty(&s->vram_mem, addr, len, DIRTY_MEMORY_VGA);
+
+    if (s->depth == 24) {
+        memory_region_reset_dirty(&s->vram_mem, s->vram24_offset + addr * 4,
+                                  len * 4, DIRTY_MEMORY_VGA);
+        memory_region_reset_dirty(&s->vram_mem, s->cplane_offset + addr * 4,
+                                  len * 4, DIRTY_MEMORY_VGA);
+    }
 }
 
 static void update_palette_entries(TCXState *s, int start, int end)
@@ -136,27 +140,14 @@ static void update_palette_entries(TCXState *s, int start, int end)
     int i;
 
     for (i = start; i < end; i++) {
-        switch (surface_bits_per_pixel(surface)) {
-        default:
-        case 8:
-            s->palette[i] = rgb_to_pixel8(s->r[i], s->g[i], s->b[i]);
-            break;
-        case 15:
-            s->palette[i] = rgb_to_pixel15(s->r[i], s->g[i], s->b[i]);
-            break;
-        case 16:
-            s->palette[i] = rgb_to_pixel16(s->r[i], s->g[i], s->b[i]);
-            break;
-        case 32:
-            if (is_surface_bgr(surface)) {
-                s->palette[i] = rgb_to_pixel32bgr(s->r[i], s->g[i], s->b[i]);
-            } else {
-                s->palette[i] = rgb_to_pixel32(s->r[i], s->g[i], s->b[i]);
-            }
-            break;
+        if (is_surface_bgr(surface)) {
+            s->palette[i] = rgb_to_pixel32bgr(s->r[i], s->g[i], s->b[i]);
+        } else {
+            s->palette[i] = rgb_to_pixel32(s->r[i], s->g[i], s->b[i]);
         }
+        break;
     }
-    tcx_set_dirty(s);
+    tcx_set_dirty(s, 0, memory_region_size(&s->vram_mem));
 }
 
 static void tcx_draw_line32(TCXState *s1, uint8_t *d,
@@ -172,31 +163,6 @@ static void tcx_draw_line32(TCXState *s1, uint8_t *d,
     }
 }
 
-static void tcx_draw_line16(TCXState *s1, uint8_t *d,
-                            const uint8_t *s, int width)
-{
-    int x;
-    uint8_t val;
-    uint16_t *p = (uint16_t *)d;
-
-    for (x = 0; x < width; x++) {
-        val = *s++;
-        *p++ = s1->palette[val];
-    }
-}
-
-static void tcx_draw_line8(TCXState *s1, uint8_t *d,
-                           const uint8_t *s, int width)
-{
-    int x;
-    uint8_t val;
-
-    for(x = 0; x < width; x++) {
-        val = *s++;
-        *d++ = s1->palette[val];
-    }
-}
-
 static void tcx_draw_cursor32(TCXState *s1, uint8_t *d,
                               int y, int width)
 {
@@ -223,57 +189,6 @@ static void tcx_draw_cursor32(TCXState *s1, uint8_t *d,
     }
 }
 
-static void tcx_draw_cursor16(TCXState *s1, uint8_t *d,
-                              int y, int width)
-{
-    int x, len;
-    uint32_t mask, bits;
-    uint16_t *p = (uint16_t *)d;
-
-    y = y - s1->cursy;
-    mask = s1->cursmask[y];
-    bits = s1->cursbits[y];
-    len = MIN(width - s1->cursx, 32);
-    p = &p[s1->cursx];
-    for (x = 0; x < len; x++) {
-        if (mask & 0x80000000) {
-            if (bits & 0x80000000) {
-                *p = s1->palette[259];
-            } else {
-                *p = s1->palette[258];
-            }
-        }
-        p++;
-        mask <<= 1;
-        bits <<= 1;
-    }
-}
-
-static void tcx_draw_cursor8(TCXState *s1, uint8_t *d,
-                              int y, int width)
-{
-    int x, len;
-    uint32_t mask, bits;
-
-    y = y - s1->cursy;
-    mask = s1->cursmask[y];
-    bits = s1->cursbits[y];
-    len = MIN(width - s1->cursx, 32);
-    d = &d[s1->cursx];
-    for (x = 0; x < len; x++) {
-        if (mask & 0x80000000) {
-            if (bits & 0x80000000) {
-                *d = s1->palette[259];
-            } else {
-                *d = s1->palette[258];
-            }
-        }
-        d++;
-        mask <<= 1;
-        bits <<= 1;
-    }
-}
-
 /*
   XXX Could be much more optimal:
   * detect if line/page/whole screen is in 24 bit mode
@@ -322,10 +237,8 @@ static void tcx_update_display(void *opaque)
     ram_addr_t page, page_min, page_max;
     int y, y_start, dd, ds;
     uint8_t *d, *s;
-    void (*f)(TCXState *s1, uint8_t *dst, const uint8_t *src, int width);
-    void (*fc)(TCXState *s1, uint8_t *dst, int y, int width);
 
-    if (surface_bits_per_pixel(surface) == 0) {
+    if (surface_bits_per_pixel(surface) != 32) {
         return;
     }
 
@@ -338,29 +251,9 @@ static void tcx_update_display(void *opaque)
     dd = surface_stride(surface);
     ds = 1024;
 
-    switch (surface_bits_per_pixel(surface)) {
-    case 32:
-        f = tcx_draw_line32;
-        fc = tcx_draw_cursor32;
-        break;
-    case 15:
-    case 16:
-        f = tcx_draw_line16;
-        fc = tcx_draw_cursor16;
-        break;
-    default:
-    case 8:
-        f = tcx_draw_line8;
-        fc = tcx_draw_cursor8;
-        break;
-    case 0:
-        return;
-    }
-
     memory_region_sync_dirty_bitmap(&ts->vram_mem);
-    for (y = 0; y < ts->height; page += TARGET_PAGE_SIZE) {
-        if (memory_region_get_dirty(&ts->vram_mem, page, TARGET_PAGE_SIZE,
-                                    DIRTY_MEMORY_VGA)) {
+    for (y = 0; y < ts->height; y++, page += ds) {
+        if (tcx_check_dirty(ts, page, ds)) {
             if (y_start < 0)
                 y_start = y;
             if (page < page_min)
@@ -368,37 +261,10 @@ static void tcx_update_display(void *opaque)
             if (page > page_max)
                 page_max = page;
 
-            f(ts, d, s, ts->width);
-            if (y >= ts->cursy && y < ts->cursy + 32 && ts->cursx < ts->width) {
-                fc(ts, d, y, ts->width);
-            }
-            d += dd;
-            s += ds;
-            y++;
-
-            f(ts, d, s, ts->width);
-            if (y >= ts->cursy && y < ts->cursy + 32 && ts->cursx < ts->width) {
-                fc(ts, d, y, ts->width);
-            }
-            d += dd;
-            s += ds;
-            y++;
-
-            f(ts, d, s, ts->width);
-            if (y >= ts->cursy && y < ts->cursy + 32 && ts->cursx < ts->width) {
-                fc(ts, d, y, ts->width);
-            }
-            d += dd;
-            s += ds;
-            y++;
-
-            f(ts, d, s, ts->width);
+            tcx_draw_line32(ts, d, s, ts->width);
             if (y >= ts->cursy && y < ts->cursy + 32 && ts->cursx < ts->width) {
-                fc(ts, d, y, ts->width);
+                tcx_draw_cursor32(ts, d, y, ts->width);
             }
-            d += dd;
-            s += ds;
-            y++;
         } else {
             if (y_start >= 0) {
                 /* flush to display */
@@ -406,10 +272,9 @@ static void tcx_update_display(void *opaque)
                                ts->width, y - y_start);
                 y_start = -1;
             }
-            d += dd * 4;
-            s += ds * 4;
-            y += 4;
         }
+        s += ds;
+        d += dd;
     }
     if (y_start >= 0) {
         /* flush to display */
@@ -418,10 +283,7 @@ static void tcx_update_display(void *opaque)
     }
     /* reset modified pages */
     if (page_max >= page_min) {
-        memory_region_reset_dirty(&ts->vram_mem,
-                                  page_min,
-                                  (page_max - page_min) + TARGET_PAGE_SIZE,
-                                  DIRTY_MEMORY_VGA);
+        tcx_reset_dirty(ts, page_min, page_max - page_min);
     }
 }
 
@@ -429,7 +291,7 @@ static void tcx24_update_display(void *opaque)
 {
     TCXState *ts = opaque;
     DisplaySurface *surface = qemu_console_surface(ts->con);
-    ram_addr_t page, page_min, page_max, cpage, page24;
+    ram_addr_t page, page_min, page_max;
     int y, y_start, dd, ds;
     uint8_t *d, *s;
     uint32_t *cptr, *s24;
@@ -439,8 +301,6 @@ static void tcx24_update_display(void *opaque)
     }
 
     page = 0;
-    page24 = ts->vram24_offset;
-    cpage = ts->cplane_offset;
     y_start = -1;
     page_min = -1;
     page_max = 0;
@@ -452,9 +312,8 @@ static void tcx24_update_display(void *opaque)
     ds = 1024;
 
     memory_region_sync_dirty_bitmap(&ts->vram_mem);
-    for (y = 0; y < ts->height; page += TARGET_PAGE_SIZE,
-            page24 += TARGET_PAGE_SIZE, cpage += TARGET_PAGE_SIZE) {
-        if (tcx24_check_dirty(ts, page, page24, cpage)) {
+    for (y = 0; y < ts->height; y++, page += ds) {
+        if (tcx_check_dirty(ts, page, ds)) {
             if (y_start < 0)
                 y_start = y;
             if (page < page_min)
@@ -465,38 +324,6 @@ static void tcx24_update_display(void *opaque)
             if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
                 tcx_draw_cursor32(ts, d, y, ts->width);
             }
-            d += dd;
-            s += ds;
-            cptr += ds;
-            s24 += ds;
-            y++;
-            tcx24_draw_line32(ts, d, s, ts->width, cptr, s24);
-            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
-                tcx_draw_cursor32(ts, d, y, ts->width);
-            }
-            d += dd;
-            s += ds;
-            cptr += ds;
-            s24 += ds;
-            y++;
-            tcx24_draw_line32(ts, d, s, ts->width, cptr, s24);
-            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
-                tcx_draw_cursor32(ts, d, y, ts->width);
-            }
-            d += dd;
-            s += ds;
-            cptr += ds;
-            s24 += ds;
-            y++;
-            tcx24_draw_line32(ts, d, s, ts->width, cptr, s24);
-            if (y >= ts->cursy && y < ts->cursy+32 && ts->cursx < ts->width) {
-                tcx_draw_cursor32(ts, d, y, ts->width);
-            }
-            d += dd;
-            s += ds;
-            cptr += ds;
-            s24 += ds;
-            y++;
         } else {
             if (y_start >= 0) {
                 /* flush to display */
@@ -504,12 +331,11 @@ static void tcx24_update_display(void *opaque)
                                ts->width, y - y_start);
                 y_start = -1;
             }
-            d += dd * 4;
-            s += ds * 4;
-            cptr += ds * 4;
-            s24 += ds * 4;
-            y += 4;
         }
+        d += dd;
+        s += ds;
+        cptr += ds;
+        s24 += ds;
     }
     if (y_start >= 0) {
         /* flush to display */
@@ -518,7 +344,7 @@ static void tcx24_update_display(void *opaque)
     }
     /* reset modified pages */
     if (page_max >= page_min) {
-        tcx24_reset_dirty(ts, page_min, page_max, page24, cpage);
+        tcx_reset_dirty(ts, page_min, page_max - page_min);
     }
 }
 
@@ -526,7 +352,7 @@ static void tcx_invalidate_display(void *opaque)
 {
     TCXState *s = opaque;
 
-    tcx_set_dirty(s);
+    tcx_set_dirty(s, 0, memory_region_size(&s->vram_mem));
     qemu_console_resize(s->con, s->width, s->height);
 }
 
@@ -534,7 +360,7 @@ static void tcx24_invalidate_display(void *opaque)
 {
     TCXState *s = opaque;
 
-    tcx_set_dirty(s);
+    tcx_set_dirty(s, 0, memory_region_size(&s->vram_mem));
     qemu_console_resize(s->con, s->width, s->height);
 }
 
@@ -543,7 +369,7 @@ static int vmstate_tcx_post_load(void *opaque, int version_id)
     TCXState *s = opaque;
 
     update_palette_entries(s, 0, 256);
-    tcx_set_dirty(s);
+    tcx_set_dirty(s, 0, memory_region_size(&s->vram_mem));
     return 0;
 }
 
@@ -699,7 +525,7 @@ static void tcx_stip_writel(void *opaque, hwaddr addr,
                 val <<= 1;
             }
         }
-        memory_region_set_dirty(&s->vram_mem, addr, 32);
+        tcx_set_dirty(s, addr, 32);
     }
 }
 
@@ -732,7 +558,7 @@ static void tcx_rstip_writel(void *opaque, hwaddr addr,
                 val <<= 1;
             }
         }
-        memory_region_set_dirty(&s->vram_mem, addr, 32);
+        tcx_set_dirty(s, addr, 32);
     }
 }
 
@@ -790,7 +616,7 @@ static void tcx_blit_writel(void *opaque, hwaddr addr,
                 memcpy(&s->vram24[addr], &s->vram24[adsr], len * 4);
             }
         }
-        memory_region_set_dirty(&s->vram_mem, addr, len);
+        tcx_set_dirty(s, addr, len);
     }
 }
 
@@ -824,7 +650,7 @@ static void tcx_rblit_writel(void *opaque, hwaddr addr,
                 memcpy(&s->cplane[addr], &s->cplane[adsr], len * 4);
             }
         }
-        memory_region_set_dirty(&s->vram_mem, addr, len);
+        tcx_set_dirty(s, addr, len);
     }
 }
 
@@ -861,7 +687,7 @@ static void tcx_invalidate_cursor_position(TCXState *s)
     start = ymin * 1024;
     end   = ymax * 1024;
 
-    memory_region_set_dirty(&s->vram_mem, start, end-start);
+    tcx_set_dirty(s, start, end - start);
 }
 
 static uint64_t tcx_thc_readl(void *opaque, hwaddr addr,
@@ -1017,8 +843,7 @@ static void tcx_realizefn(DeviceState *dev, Error **errp)
     vmstate_register_ram_global(&s->rom);
     fcode_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, TCX_ROM_FILE);
     if (fcode_filename) {
-        ret = load_image_targphys(fcode_filename, s->prom_addr,
-                                  FCODE_MAX_ROM_SIZE);
+        ret = load_image_mr(fcode_filename, &s->rom);
         g_free(fcode_filename);
         if (ret < 0 || ret > FCODE_MAX_ROM_SIZE) {
             error_report("tcx: could not load prom '%s'", TCX_ROM_FILE);
@@ -1076,7 +901,6 @@ static Property tcx_properties[] = {
     DEFINE_PROP_UINT16("width",    TCXState, width,     -1),
     DEFINE_PROP_UINT16("height",   TCXState, height,    -1),
     DEFINE_PROP_UINT16("depth",    TCXState, depth,     -1),
-    DEFINE_PROP_UINT64("prom_addr", TCXState, prom_addr, -1),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/display/vga.c b/hw/display/vga.c
index 69c3e1d674..b2516c8d21 100644
--- a/hw/display/vga.c
+++ b/hw/display/vga.c
@@ -1434,6 +1434,14 @@ void vga_invalidate_scanlines(VGACommonState *s, int y1, int y2)
     }
 }
 
+static bool vga_scanline_invalidated(VGACommonState *s, int y)
+{
+    if (y >= VGA_MAX_HEIGHT) {
+        return false;
+    }
+    return s->invalidated_y_table[y >> 5] & (1 << (y & 0x1f));
+}
+
 void vga_sync_dirty_bitmap(VGACommonState *s)
 {
     memory_region_sync_dirty_bitmap(&s->vram);
@@ -1457,7 +1465,8 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
     DisplaySurface *surface = qemu_console_surface(s->con);
     int y1, y, update, linesize, y_start, double_scan, mask, depth;
     int width, height, shift_control, line_offset, bwidth, bits;
-    ram_addr_t page0, page1, page_min, page_max;
+    ram_addr_t page0, page1;
+    DirtyBitmapSnapshot *snap = NULL;
     int disp_width, multi_scan, multi_run;
     uint8_t *d;
     uint32_t v, addr1, addr;
@@ -1472,9 +1481,6 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
 
     full_update |= update_basic_params(s);
 
-    if (!full_update)
-        vga_sync_dirty_bitmap(s);
-
     s->get_resolution(s, &width, &height);
     disp_width = width;
 
@@ -1617,11 +1623,17 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
     addr1 = (s->start_addr * 4);
     bwidth = (width * bits + 7) / 8;
     y_start = -1;
-    page_min = -1;
-    page_max = 0;
     d = surface_data(surface);
     linesize = surface_stride(surface);
     y1 = 0;
+
+    if (!full_update) {
+        vga_sync_dirty_bitmap(s);
+        snap = memory_region_snapshot_and_clear_dirty(&s->vram, addr1,
+                                                      bwidth * height,
+                                                      DIRTY_MEMORY_VGA);
+    }
+
     for(y = 0; y < height; y++) {
         addr = addr1;
         if (!(s->cr[VGA_CRTC_MODE] & 1)) {
@@ -1636,17 +1648,17 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
         update = full_update;
         page0 = addr;
         page1 = addr + bwidth - 1;
-        update |= memory_region_get_dirty(&s->vram, page0, page1 - page0,
-                                          DIRTY_MEMORY_VGA);
-        /* explicit invalidation for the hardware cursor */
-        update |= (s->invalidated_y_table[y >> 5] >> (y & 0x1f)) & 1;
+        if (full_update) {
+            update = 1;
+        } else {
+            update = memory_region_snapshot_get_dirty(&s->vram, snap,
+                                                      page0, page1 - page0);
+        }
+        /* explicit invalidation for the hardware cursor (cirrus only) */
+        update |= vga_scanline_invalidated(s, y);
         if (update) {
             if (y_start < 0)
                 y_start = y;
-            if (page0 < page_min)
-                page_min = page0;
-            if (page1 > page_max)
-                page_max = page1;
             if (!(is_buffer_shared(surface))) {
                 vga_draw_line(s, d, s->vram_ptr + addr, width);
                 if (s->cursor_draw_line)
@@ -1679,14 +1691,8 @@ static void vga_draw_graphic(VGACommonState *s, int full_update)
         dpy_gfx_update(s->con, 0, y_start,
                        disp_width, y - y_start);
     }
-    /* reset modified pages */
-    if (page_max >= page_min) {
-        memory_region_reset_dirty(&s->vram,
-                                  page_min,
-                                  page_max - page_min,
-                                  DIRTY_MEMORY_VGA);
-    }
-    memset(s->invalidated_y_table, 0, ((height + 31) >> 5) * 4);
+    g_free(snap);
+    memset(s->invalidated_y_table, 0, sizeof(s->invalidated_y_table));
 }
 
 static void vga_draw_blank(VGACommonState *s, int full_update)
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 9b530ab5b0..e1056f34df 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -258,41 +258,22 @@ void virtio_gpu_get_display_info(VirtIOGPU *g,
 static pixman_format_code_t get_pixman_format(uint32_t virtio_gpu_format)
 {
     switch (virtio_gpu_format) {
-#ifdef HOST_WORDS_BIGENDIAN
     case VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM:
-        return PIXMAN_b8g8r8x8;
+        return PIXMAN_BE_b8g8r8x8;
     case VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM:
-        return PIXMAN_b8g8r8a8;
+        return PIXMAN_BE_b8g8r8a8;
     case VIRTIO_GPU_FORMAT_X8R8G8B8_UNORM:
-        return PIXMAN_x8r8g8b8;
+        return PIXMAN_BE_x8r8g8b8;
     case VIRTIO_GPU_FORMAT_A8R8G8B8_UNORM:
-        return PIXMAN_a8r8g8b8;
+        return PIXMAN_BE_a8r8g8b8;
     case VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM:
-        return PIXMAN_r8g8b8x8;
+        return PIXMAN_BE_r8g8b8x8;
     case VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM:
-        return PIXMAN_r8g8b8a8;
+        return PIXMAN_BE_r8g8b8a8;
     case VIRTIO_GPU_FORMAT_X8B8G8R8_UNORM:
-        return PIXMAN_x8b8g8r8;
+        return PIXMAN_BE_x8b8g8r8;
     case VIRTIO_GPU_FORMAT_A8B8G8R8_UNORM:
-        return PIXMAN_a8b8g8r8;
-#else
-    case VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM:
-        return PIXMAN_x8r8g8b8;
-    case VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM:
-        return PIXMAN_a8r8g8b8;
-    case VIRTIO_GPU_FORMAT_X8R8G8B8_UNORM:
-        return PIXMAN_b8g8r8x8;
-    case VIRTIO_GPU_FORMAT_A8R8G8B8_UNORM:
-        return PIXMAN_b8g8r8a8;
-    case VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM:
-        return PIXMAN_x8b8g8r8;
-    case VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM:
-        return PIXMAN_a8b8g8r8;
-    case VIRTIO_GPU_FORMAT_X8B8G8R8_UNORM:
-        return PIXMAN_r8g8b8x8;
-    case VIRTIO_GPU_FORMAT_A8B8G8R8_UNORM:
-        return PIXMAN_r8g8b8a8;
-#endif
+        return PIXMAN_BE_a8b8g8r8;
     default:
         return 0;
     }
@@ -1170,8 +1151,8 @@ static void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
     virtio_init(VIRTIO_DEVICE(g), "virtio-gpu", VIRTIO_ID_GPU,
                 g->config_size);
 
-    g->req_state[0].width = 1024;
-    g->req_state[0].height = 768;
+    g->req_state[0].width = g->conf.xres;
+    g->req_state[0].height = g->conf.yres;
 
     if (virtio_gpu_virgl_enabled(g->conf)) {
         /* use larger control queue in 3d mode */
@@ -1291,6 +1272,8 @@ static Property virtio_gpu_properties[] = {
     DEFINE_PROP_BIT("stats", VirtIOGPU, conf.flags,
                     VIRTIO_GPU_FLAG_STATS_ENABLED, false),
 #endif
+    DEFINE_PROP_UINT32("xres", VirtIOGPU, conf.xres, 1024),
+    DEFINE_PROP_UINT32("yres", VirtIOGPU, conf.yres, 768),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/display/vmware_vga.c b/hw/display/vmware_vga.c
index 6599cf078d..ec5f27d67e 100644
--- a/hw/display/vmware_vga.c
+++ b/hw/display/vmware_vga.c
@@ -1118,9 +1118,9 @@ static void vmsvga_update_display(void *opaque)
 {
     struct vmsvga_state_s *s = opaque;
     DisplaySurface *surface;
-    bool dirty = false;
 
-    if (!s->enable) {
+    if (!s->enable || !s->config) {
+        /* in standard vga mode */
         s->vga.hw_ops->gfx_update(&s->vga);
         return;
     }
@@ -1131,26 +1131,11 @@ static void vmsvga_update_display(void *opaque)
     vmsvga_fifo_run(s);
     vmsvga_update_rect_flush(s);
 
-    /*
-     * Is it more efficient to look at vram VGA-dirty bits or wait
-     * for the driver to issue SVGA_CMD_UPDATE?
-     */
-    if (memory_region_is_logging(&s->vga.vram, DIRTY_MEMORY_VGA)) {
-        vga_sync_dirty_bitmap(&s->vga);
-        dirty = memory_region_get_dirty(&s->vga.vram, 0,
-            surface_stride(surface) * surface_height(surface),
-            DIRTY_MEMORY_VGA);
-    }
-    if (s->invalidated || dirty) {
+    if (s->invalidated) {
         s->invalidated = 0;
         dpy_gfx_update(s->vga.con, 0, 0,
                    surface_width(surface), surface_height(surface));
     }
-    if (dirty) {
-        memory_region_reset_dirty(&s->vga.vram, 0,
-            surface_stride(surface) * surface_height(surface),
-            DIRTY_MEMORY_VGA);
-    }
 }
 
 static void vmsvga_reset(DeviceState *dev)
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index e0732ccaf1..f86a40aa30 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -572,8 +572,7 @@ static uint64_t amdvi_mmio_read(void *opaque, hwaddr addr, unsigned size)
 
     uint64_t val = -1;
     if (addr + size > AMDVI_MMIO_SIZE) {
-        trace_amdvi_mmio_read("error: addr outside region: max ",
-                (uint64_t)AMDVI_MMIO_SIZE, addr, size);
+        trace_amdvi_mmio_read_invalid(AMDVI_MMIO_SIZE, addr, size);
         return (uint64_t)-1;
     }
 
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 22d8226e43..02f047c8e3 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -595,6 +595,22 @@ static inline uint32_t vtd_get_agaw_from_context_entry(VTDContextEntry *ce)
     return 30 + (ce->hi & VTD_CONTEXT_ENTRY_AW) * 9;
 }
 
+static inline uint64_t vtd_iova_limit(VTDContextEntry *ce)
+{
+    uint32_t ce_agaw = vtd_get_agaw_from_context_entry(ce);
+    return 1ULL << MIN(ce_agaw, VTD_MGAW);
+}
+
+/* Return true if IOVA passes range check, otherwise false. */
+static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce)
+{
+    /*
+     * Check if @iova is above 2^X-1, where X is the minimum of MGAW
+     * in CAP_REG and AW in context-entry.
+     */
+    return !(iova & ~(vtd_iova_limit(ce) - 1));
+}
+
 static const uint64_t vtd_paging_entry_rsvd_field[] = {
     [0] = ~0ULL,
     /* For not large page */
@@ -630,13 +646,9 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
     uint32_t level = vtd_get_level_from_context_entry(ce);
     uint32_t offset;
     uint64_t slpte;
-    uint32_t ce_agaw = vtd_get_agaw_from_context_entry(ce);
     uint64_t access_right_check;
 
-    /* Check if @iova is above 2^X-1, where X is the minimum of MGAW
-     * in CAP_REG and AW in context-entry.
-     */
-    if (iova & ~((1ULL << MIN(ce_agaw, VTD_MGAW)) - 1)) {
+    if (!vtd_iova_range_check(iova, ce)) {
         VTD_DPRINTF(GENERAL, "error: iova 0x%"PRIx64 " exceeds limits", iova);
         return -VTD_FR_ADDR_BEYOND_MGAW;
     }
@@ -684,6 +696,135 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
     }
 }
 
+typedef int (*vtd_page_walk_hook)(IOMMUTLBEntry *entry, void *private);
+
+/**
+ * vtd_page_walk_level - walk over specific level for IOVA range
+ *
+ * @addr: base GPA addr to start the walk
+ * @start: IOVA range start address
+ * @end: IOVA range end address (start <= addr < end)
+ * @hook_fn: hook func to be called when detected page
+ * @private: private data to be passed into hook func
+ * @read: whether parent level has read permission
+ * @write: whether parent level has write permission
+ * @notify_unmap: whether we should notify invalid entries
+ */
+static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
+                               uint64_t end, vtd_page_walk_hook hook_fn,
+                               void *private, uint32_t level,
+                               bool read, bool write, bool notify_unmap)
+{
+    bool read_cur, write_cur, entry_valid;
+    uint32_t offset;
+    uint64_t slpte;
+    uint64_t subpage_size, subpage_mask;
+    IOMMUTLBEntry entry;
+    uint64_t iova = start;
+    uint64_t iova_next;
+    int ret = 0;
+
+    trace_vtd_page_walk_level(addr, level, start, end);
+
+    subpage_size = 1ULL << vtd_slpt_level_shift(level);
+    subpage_mask = vtd_slpt_level_page_mask(level);
+
+    while (iova < end) {
+        iova_next = (iova & subpage_mask) + subpage_size;
+
+        offset = vtd_iova_level_offset(iova, level);
+        slpte = vtd_get_slpte(addr, offset);
+
+        if (slpte == (uint64_t)-1) {
+            trace_vtd_page_walk_skip_read(iova, iova_next);
+            goto next;
+        }
+
+        if (vtd_slpte_nonzero_rsvd(slpte, level)) {
+            trace_vtd_page_walk_skip_reserve(iova, iova_next);
+            goto next;
+        }
+
+        /* Permissions are stacked with parents' */
+        read_cur = read && (slpte & VTD_SL_R);
+        write_cur = write && (slpte & VTD_SL_W);
+
+        /*
+         * As long as we have either read/write permission, this is a
+         * valid entry. The rule works for both page entries and page
+         * table entries.
+         */
+        entry_valid = read_cur | write_cur;
+
+        if (vtd_is_last_slpte(slpte, level)) {
+            entry.target_as = &address_space_memory;
+            entry.iova = iova & subpage_mask;
+            /* NOTE: this is only meaningful if entry_valid == true */
+            entry.translated_addr = vtd_get_slpte_addr(slpte);
+            entry.addr_mask = ~subpage_mask;
+            entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur);
+            if (!entry_valid && !notify_unmap) {
+                trace_vtd_page_walk_skip_perm(iova, iova_next);
+                goto next;
+            }
+            trace_vtd_page_walk_one(level, entry.iova, entry.translated_addr,
+                                    entry.addr_mask, entry.perm);
+            if (hook_fn) {
+                ret = hook_fn(&entry, private);
+                if (ret < 0) {
+                    return ret;
+                }
+            }
+        } else {
+            if (!entry_valid) {
+                trace_vtd_page_walk_skip_perm(iova, iova_next);
+                goto next;
+            }
+            ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte), iova,
+                                      MIN(iova_next, end), hook_fn, private,
+                                      level - 1, read_cur, write_cur,
+                                      notify_unmap);
+            if (ret < 0) {
+                return ret;
+            }
+        }
+
+next:
+        iova = iova_next;
+    }
+
+    return 0;
+}
+
+/**
+ * vtd_page_walk - walk specific IOVA range, and call the hook
+ *
+ * @ce: context entry to walk upon
+ * @start: IOVA address to start the walk
+ * @end: IOVA range end address (start <= addr < end)
+ * @hook_fn: the hook that to be called for each detected area
+ * @private: private data for the hook function
+ */
+static int vtd_page_walk(VTDContextEntry *ce, uint64_t start, uint64_t end,
+                         vtd_page_walk_hook hook_fn, void *private,
+                         bool notify_unmap)
+{
+    dma_addr_t addr = vtd_get_slpt_base_from_context(ce);
+    uint32_t level = vtd_get_level_from_context_entry(ce);
+
+    if (!vtd_iova_range_check(start, ce)) {
+        return -VTD_FR_ADDR_BEYOND_MGAW;
+    }
+
+    if (!vtd_iova_range_check(end, ce)) {
+        /* Fix end so that it reaches the maximum */
+        end = vtd_iova_limit(ce);
+    }
+
+    return vtd_page_walk_level(addr, start, end, hook_fn, private,
+                               level, true, true, notify_unmap);
+}
+
 /* Map a device to its corresponding domain (context-entry) */
 static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
                                     uint8_t devfn, VTDContextEntry *ce)
@@ -898,6 +1039,15 @@ static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s)
                 s->intr_root, s->intr_size);
 }
 
+static void vtd_iommu_replay_all(IntelIOMMUState *s)
+{
+    IntelIOMMUNotifierNode *node;
+
+    QLIST_FOREACH(node, &s->notifiers_list, next) {
+        memory_region_iommu_replay_all(&node->vtd_as->iommu);
+    }
+}
+
 static void vtd_context_global_invalidate(IntelIOMMUState *s)
 {
     trace_vtd_inv_desc_cc_global();
@@ -905,6 +1055,14 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s)
     if (s->context_cache_gen == VTD_CONTEXT_CACHE_GEN_MAX) {
         vtd_reset_context_cache(s);
     }
+    /*
+     * From VT-d spec 6.5.2.1, a global context entry invalidation
+     * should be followed by a IOTLB global invalidation, so we should
+     * be safe even without this. Hoewever, let's replay the region as
+     * well to be safer, and go back here when we need finer tunes for
+     * VT-d emulation codes.
+     */
+    vtd_iommu_replay_all(s);
 }
 
 
@@ -971,6 +1129,16 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
                 trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it),
                                              VTD_PCI_FUNC(devfn_it));
                 vtd_as->context_cache_entry.context_cache_gen = 0;
+                /*
+                 * So a device is moving out of (or moving into) a
+                 * domain, a replay() suites here to notify all the
+                 * IOMMU_NOTIFIER_MAP registers about this change.
+                 * This won't bring bad even if we have no such
+                 * notifier registered - the IOMMU notification
+                 * framework will skip MAP notifications if that
+                 * happened.
+                 */
+                memory_region_iommu_replay_all(&vtd_as->iommu);
             }
         }
     }
@@ -1012,12 +1180,53 @@ static void vtd_iotlb_global_invalidate(IntelIOMMUState *s)
 {
     trace_vtd_iotlb_reset("global invalidation recved");
     vtd_reset_iotlb(s);
+    vtd_iommu_replay_all(s);
 }
 
 static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
 {
+    IntelIOMMUNotifierNode *node;
+    VTDContextEntry ce;
+    VTDAddressSpace *vtd_as;
+
     g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain,
                                 &domain_id);
+
+    QLIST_FOREACH(node, &s->notifiers_list, next) {
+        vtd_as = node->vtd_as;
+        if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
+                                      vtd_as->devfn, &ce) &&
+            domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) {
+            memory_region_iommu_replay_all(&vtd_as->iommu);
+        }
+    }
+}
+
+static int vtd_page_invalidate_notify_hook(IOMMUTLBEntry *entry,
+                                           void *private)
+{
+    memory_region_notify_iommu((MemoryRegion *)private, *entry);
+    return 0;
+}
+
+static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
+                                           uint16_t domain_id, hwaddr addr,
+                                           uint8_t am)
+{
+    IntelIOMMUNotifierNode *node;
+    VTDContextEntry ce;
+    int ret;
+
+    QLIST_FOREACH(node, &(s->notifiers_list), next) {
+        VTDAddressSpace *vtd_as = node->vtd_as;
+        ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
+                                       vtd_as->devfn, &ce);
+        if (!ret && domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) {
+            vtd_page_walk(&ce, addr, addr + (1 << am) * VTD_PAGE_SIZE,
+                          vtd_page_invalidate_notify_hook,
+                          (void *)&vtd_as->iommu, true);
+        }
+    }
 }
 
 static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
@@ -1030,6 +1239,7 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
     info.addr = addr;
     info.mask = ~((1 << am) - 1);
     g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info);
+    vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am);
 }
 
 /* Flush IOTLB
@@ -1151,9 +1361,49 @@ static void vtd_handle_gcmd_sirtp(IntelIOMMUState *s)
     vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRTPS);
 }
 
+static void vtd_switch_address_space(VTDAddressSpace *as)
+{
+    assert(as);
+
+    trace_vtd_switch_address_space(pci_bus_num(as->bus),
+                                   VTD_PCI_SLOT(as->devfn),
+                                   VTD_PCI_FUNC(as->devfn),
+                                   as->iommu_state->dmar_enabled);
+
+    /* Turn off first then on the other */
+    if (as->iommu_state->dmar_enabled) {
+        memory_region_set_enabled(&as->sys_alias, false);
+        memory_region_set_enabled(&as->iommu, true);
+    } else {
+        memory_region_set_enabled(&as->iommu, false);
+        memory_region_set_enabled(&as->sys_alias, true);
+    }
+}
+
+static void vtd_switch_address_space_all(IntelIOMMUState *s)
+{
+    GHashTableIter iter;
+    VTDBus *vtd_bus;
+    int i;
+
+    g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
+    while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) {
+        for (i = 0; i < X86_IOMMU_PCI_DEVFN_MAX; i++) {
+            if (!vtd_bus->dev_as[i]) {
+                continue;
+            }
+            vtd_switch_address_space(vtd_bus->dev_as[i]);
+        }
+    }
+}
+
 /* Handle Translation Enable/Disable */
 static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en)
 {
+    if (s->dmar_enabled == en) {
+        return;
+    }
+
     VTD_DPRINTF(CSR, "Translation Enable %s", (en ? "on" : "off"));
 
     if (en) {
@@ -1168,6 +1418,8 @@ static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en)
         /* Ok - report back to driver */
         vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_TES, 0);
     }
+
+    vtd_switch_address_space_all(s);
 }
 
 /* Handle Interrupt Remap Enable/Disable */
@@ -1457,7 +1709,7 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
     entry.iova = addr;
     entry.perm = IOMMU_NONE;
     entry.translated_addr = 0;
-    memory_region_notify_iommu(entry.target_as->root, entry);
+    memory_region_notify_iommu(&vtd_dev_as->iommu, entry);
 
 done:
     return true;
@@ -2005,15 +2257,33 @@ static void vtd_iommu_notify_flag_changed(MemoryRegion *iommu,
                                           IOMMUNotifierFlag new)
 {
     VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
+    IntelIOMMUState *s = vtd_as->iommu_state;
+    IntelIOMMUNotifierNode *node = NULL;
+    IntelIOMMUNotifierNode *next_node = NULL;
 
-    if (new & IOMMU_NOTIFIER_MAP) {
-        error_report("Device at bus %s addr %02x.%d requires iommu "
-                     "notifier which is currently not supported by "
-                     "intel-iommu emulation",
-                     vtd_as->bus->qbus.name, PCI_SLOT(vtd_as->devfn),
-                     PCI_FUNC(vtd_as->devfn));
+    if (!s->caching_mode && new & IOMMU_NOTIFIER_MAP) {
+        error_report("We need to set cache_mode=1 for intel-iommu to enable "
+                     "device assignment with IOMMU protection.");
         exit(1);
     }
+
+    if (old == IOMMU_NOTIFIER_NONE) {
+        node = g_malloc0(sizeof(*node));
+        node->vtd_as = vtd_as;
+        QLIST_INSERT_HEAD(&s->notifiers_list, node, next);
+        return;
+    }
+
+    /* update notifier node with new flags */
+    QLIST_FOREACH_SAFE(node, &s->notifiers_list, next, next_node) {
+        if (node->vtd_as == vtd_as) {
+            if (new == IOMMU_NOTIFIER_NONE) {
+                QLIST_REMOVE(node, next);
+                g_free(node);
+            }
+            return;
+        }
+    }
 }
 
 static const VMStateDescription vtd_vmstate = {
@@ -2389,19 +2659,150 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
         vtd_dev_as->devfn = (uint8_t)devfn;
         vtd_dev_as->iommu_state = s;
         vtd_dev_as->context_cache_entry.context_cache_gen = 0;
+
+        /*
+         * Memory region relationships looks like (Address range shows
+         * only lower 32 bits to make it short in length...):
+         *
+         * |-----------------+-------------------+----------|
+         * | Name            | Address range     | Priority |
+         * |-----------------+-------------------+----------+
+         * | vtd_root        | 00000000-ffffffff |        0 |
+         * |  intel_iommu    | 00000000-ffffffff |        1 |
+         * |  vtd_sys_alias  | 00000000-ffffffff |        1 |
+         * |  intel_iommu_ir | fee00000-feefffff |       64 |
+         * |-----------------+-------------------+----------|
+         *
+         * We enable/disable DMAR by switching enablement for
+         * vtd_sys_alias and intel_iommu regions. IR region is always
+         * enabled.
+         */
         memory_region_init_iommu(&vtd_dev_as->iommu, OBJECT(s),
-                                 &s->iommu_ops, "intel_iommu", UINT64_MAX);
+                                 &s->iommu_ops, "intel_iommu_dmar",
+                                 UINT64_MAX);
+        memory_region_init_alias(&vtd_dev_as->sys_alias, OBJECT(s),
+                                 "vtd_sys_alias", get_system_memory(),
+                                 0, memory_region_size(get_system_memory()));
         memory_region_init_io(&vtd_dev_as->iommu_ir, OBJECT(s),
                               &vtd_mem_ir_ops, s, "intel_iommu_ir",
                               VTD_INTERRUPT_ADDR_SIZE);
-        memory_region_add_subregion(&vtd_dev_as->iommu, VTD_INTERRUPT_ADDR_FIRST,
-                                    &vtd_dev_as->iommu_ir);
-        address_space_init(&vtd_dev_as->as,
-                           &vtd_dev_as->iommu, name);
+        memory_region_init(&vtd_dev_as->root, OBJECT(s),
+                           "vtd_root", UINT64_MAX);
+        memory_region_add_subregion_overlap(&vtd_dev_as->root,
+                                            VTD_INTERRUPT_ADDR_FIRST,
+                                            &vtd_dev_as->iommu_ir, 64);
+        address_space_init(&vtd_dev_as->as, &vtd_dev_as->root, name);
+        memory_region_add_subregion_overlap(&vtd_dev_as->root, 0,
+                                            &vtd_dev_as->sys_alias, 1);
+        memory_region_add_subregion_overlap(&vtd_dev_as->root, 0,
+                                            &vtd_dev_as->iommu, 1);
+        vtd_switch_address_space(vtd_dev_as);
     }
     return vtd_dev_as;
 }
 
+/* Unmap the whole range in the notifier's scope. */
+static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
+{
+    IOMMUTLBEntry entry;
+    hwaddr size;
+    hwaddr start = n->start;
+    hwaddr end = n->end;
+
+    /*
+     * Note: all the codes in this function has a assumption that IOVA
+     * bits are no more than VTD_MGAW bits (which is restricted by
+     * VT-d spec), otherwise we need to consider overflow of 64 bits.
+     */
+
+    if (end > VTD_ADDRESS_SIZE) {
+        /*
+         * Don't need to unmap regions that is bigger than the whole
+         * VT-d supported address space size
+         */
+        end = VTD_ADDRESS_SIZE;
+    }
+
+    assert(start <= end);
+    size = end - start;
+
+    if (ctpop64(size) != 1) {
+        /*
+         * This size cannot format a correct mask. Let's enlarge it to
+         * suite the minimum available mask.
+         */
+        int n = 64 - clz64(size);
+        if (n > VTD_MGAW) {
+            /* should not happen, but in case it happens, limit it */
+            n = VTD_MGAW;
+        }
+        size = 1ULL << n;
+    }
+
+    entry.target_as = &address_space_memory;
+    /* Adjust iova for the size */
+    entry.iova = n->start & ~(size - 1);
+    /* This field is meaningless for unmap */
+    entry.translated_addr = 0;
+    entry.perm = IOMMU_NONE;
+    entry.addr_mask = size - 1;
+
+    trace_vtd_as_unmap_whole(pci_bus_num(as->bus),
+                             VTD_PCI_SLOT(as->devfn),
+                             VTD_PCI_FUNC(as->devfn),
+                             entry.iova, size);
+
+    memory_region_notify_one(n, &entry);
+}
+
+static void vtd_address_space_unmap_all(IntelIOMMUState *s)
+{
+    IntelIOMMUNotifierNode *node;
+    VTDAddressSpace *vtd_as;
+    IOMMUNotifier *n;
+
+    QLIST_FOREACH(node, &s->notifiers_list, next) {
+        vtd_as = node->vtd_as;
+        IOMMU_NOTIFIER_FOREACH(n, &vtd_as->iommu) {
+            vtd_address_space_unmap(vtd_as, n);
+        }
+    }
+}
+
+static int vtd_replay_hook(IOMMUTLBEntry *entry, void *private)
+{
+    memory_region_notify_one((IOMMUNotifier *)private, entry);
+    return 0;
+}
+
+static void vtd_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n)
+{
+    VTDAddressSpace *vtd_as = container_of(mr, VTDAddressSpace, iommu);
+    IntelIOMMUState *s = vtd_as->iommu_state;
+    uint8_t bus_n = pci_bus_num(vtd_as->bus);
+    VTDContextEntry ce;
+
+    /*
+     * The replay can be triggered by either a invalidation or a newly
+     * created entry. No matter what, we release existing mappings
+     * (it means flushing caches for UNMAP-only registers).
+     */
+    vtd_address_space_unmap(vtd_as, n);
+
+    if (vtd_dev_to_context_entry(s, bus_n, vtd_as->devfn, &ce) == 0) {
+        trace_vtd_replay_ce_valid(bus_n, PCI_SLOT(vtd_as->devfn),
+                                  PCI_FUNC(vtd_as->devfn),
+                                  VTD_CONTEXT_ENTRY_DID(ce.hi),
+                                  ce.hi, ce.lo);
+        vtd_page_walk(&ce, 0, ~0ULL, vtd_replay_hook, (void *)n, false);
+    } else {
+        trace_vtd_replay_ce_invalid(bus_n, PCI_SLOT(vtd_as->devfn),
+                                    PCI_FUNC(vtd_as->devfn));
+    }
+
+    return;
+}
+
 /* Do the initialization. It will also be called when reset, so pay
  * attention when adding new initialization stuff.
  */
@@ -2416,6 +2817,7 @@ static void vtd_init(IntelIOMMUState *s)
 
     s->iommu_ops.translate = vtd_iommu_translate;
     s->iommu_ops.notify_flag_changed = vtd_iommu_notify_flag_changed;
+    s->iommu_ops.replay = vtd_iommu_replay;
     s->root = 0;
     s->root_extended = false;
     s->dmar_enabled = false;
@@ -2511,6 +2913,11 @@ static void vtd_reset(DeviceState *dev)
 
     VTD_DPRINTF(GENERAL, "");
     vtd_init(s);
+
+    /*
+     * When device reset, throw away all mappings and external caches
+     */
+    vtd_address_space_unmap_all(s);
 }
 
 static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
@@ -2574,6 +2981,7 @@ static void vtd_realize(DeviceState *dev, Error **errp)
         return;
     }
 
+    QLIST_INIT(&s->notifiers_list);
     memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num));
     memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s,
                           "intel_iommu", DMAR_REG_SIZE);
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 41041219ba..29d67075f4 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -197,6 +197,7 @@
 #define VTD_DOMAIN_ID_MASK          ((1UL << VTD_DOMAIN_ID_SHIFT) - 1)
 #define VTD_CAP_ND                  (((VTD_DOMAIN_ID_SHIFT - 4) / 2) & 7ULL)
 #define VTD_MGAW                    39  /* Maximum Guest Address Width */
+#define VTD_ADDRESS_SIZE            (1ULL << VTD_MGAW)
 #define VTD_CAP_MGAW                (((VTD_MGAW - 1) & 0x3fULL) << 16)
 #define VTD_MAMV                    18ULL
 #define VTD_CAP_MAMV                (VTD_MAMV << 48)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index d24388e05f..f3b372a18f 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1104,9 +1104,7 @@ static void pc_new_cpu(const char *typename, int64_t apic_id, Error **errp)
     object_property_set_bool(cpu, true, "realized", &local_err);
 
     object_unref(cpu);
-    if (local_err) {
-        error_propagate(errp, local_err);
-    }
+    error_propagate(errp, local_err);
 }
 
 void pc_hot_add_cpu(const int64_t id, Error **errp)
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index 88ad5e4c43..04a6980800 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -4,7 +4,6 @@
 x86_iommu_iec_notify(bool global, uint32_t index, uint32_t mask) "Notify IEC invalidation: global=%d index=%" PRIu32 " mask=%" PRIu32
 
 # hw/i386/intel_iommu.c
-vtd_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)"
 vtd_inv_desc(const char *type, uint64_t hi, uint64_t lo) "invalidate desc type %s high 0x%"PRIx64" low 0x%"PRIx64
 vtd_inv_desc_invalid(uint64_t hi, uint64_t lo) "invalid inv desc hi 0x%"PRIx64" lo 0x%"PRIx64
 vtd_inv_desc_cc_domain(uint16_t domain) "context invalidate domain 0x%"PRIx16
@@ -30,6 +29,15 @@ vtd_iotlb_cc_hit(uint8_t bus, uint8_t devfn, uint64_t high, uint64_t low, uint32
 vtd_iotlb_cc_update(uint8_t bus, uint8_t devfn, uint64_t high, uint64_t low, uint32_t gen1, uint32_t gen2) "IOTLB context update bus 0x%"PRIx8" devfn 0x%"PRIx8" high 0x%"PRIx64" low 0x%"PRIx64" gen %"PRIu32" -> gen %"PRIu32
 vtd_iotlb_reset(const char *reason) "IOTLB reset (reason: %s)"
 vtd_fault_disabled(void) "Fault processing disabled for context entry"
+vtd_replay_ce_valid(uint8_t bus, uint8_t dev, uint8_t fn, uint16_t domain, uint64_t hi, uint64_t lo) "replay valid context device %02"PRIx8":%02"PRIx8".%02"PRIx8" domain 0x%"PRIx16" hi 0x%"PRIx64" lo 0x%"PRIx64
+vtd_replay_ce_invalid(uint8_t bus, uint8_t dev, uint8_t fn) "replay invalid context device %02"PRIx8":%02"PRIx8".%02"PRIx8
+vtd_page_walk_level(uint64_t addr, uint32_t level, uint64_t start, uint64_t end) "walk (base=0x%"PRIx64", level=%"PRIu32") iova range 0x%"PRIx64" - 0x%"PRIx64
+vtd_page_walk_one(uint32_t level, uint64_t iova, uint64_t gpa, uint64_t mask, int perm) "detected page level 0x%"PRIx32" iova 0x%"PRIx64" -> gpa 0x%"PRIx64" mask 0x%"PRIx64" perm %d"
+vtd_page_walk_skip_read(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to unable to read"
+vtd_page_walk_skip_perm(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to perm empty"
+vtd_page_walk_skip_reserve(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" - 0x%"PRIx64" due to rsrv set"
+vtd_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)"
+vtd_as_unmap_whole(uint8_t bus, uint8_t slot, uint8_t fn, uint64_t iova, uint64_t size) "Device %02x:%02x.%x start 0x%"PRIx64" size 0x%"PRIx64
 
 # hw/i386/amd_iommu.c
 amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" +  offset 0x%"PRIx32
@@ -37,6 +45,7 @@ amdvi_cache_update(uint16_t domid, uint8_t bus, uint8_t slot, uint8_t func, uint
 amdvi_completion_wait_fail(uint64_t addr) "error: fail to write at address 0x%"PRIx64
 amdvi_mmio_write(const char *reg, uint64_t addr, unsigned size, uint64_t val, uint64_t offset) "%s write addr 0x%"PRIx64", size %u, val 0x%"PRIx64", offset 0x%"PRIx64
 amdvi_mmio_read(const char *reg, uint64_t addr, unsigned size, uint64_t offset) "%s read addr 0x%"PRIx64", size %u offset 0x%"PRIx64
+amdvi_mmio_read_invalid(int max, uint64_t addr, unsigned size) "error: addr outside region (max 0x%x): read addr 0x%" PRIx64 ", size %u"
 amdvi_command_error(uint64_t status) "error: Executing commands with command buffer disabled 0x%"PRIx64
 amdvi_command_read_fail(uint64_t addr, uint32_t head) "error: fail to access memory at 0x%"PRIx64" + 0x%"PRIx32
 amdvi_command_exec(uint32_t head, uint32_t tail, uint64_t buf) "command buffer head at 0x%"PRIx32" command buffer tail at 0x%"PRIx32" command buffer base at 0x%"PRIx64
diff --git a/hw/input/virtio-input.c b/hw/input/virtio-input.c
index b678ee9f20..0e42f0d02c 100644
--- a/hw/input/virtio-input.c
+++ b/hw/input/virtio-input.c
@@ -22,7 +22,6 @@
 void virtio_input_send(VirtIOInput *vinput, virtio_input_event *event)
 {
     VirtQueueElement *elem;
-    unsigned have, need;
     int i, len;
 
     if (!vinput->active) {
@@ -32,10 +31,10 @@ void virtio_input_send(VirtIOInput *vinput, virtio_input_event *event)
     /* queue up events ... */
     if (vinput->qindex == vinput->qsize) {
         vinput->qsize++;
-        vinput->queue = realloc(vinput->queue, vinput->qsize *
-                                sizeof(virtio_input_event));
+        vinput->queue = g_realloc(vinput->queue, vinput->qsize *
+                                  sizeof(vinput->queue[0]));
     }
-    vinput->queue[vinput->qindex++] = *event;
+    vinput->queue[vinput->qindex++].event = *event;
 
     /* ... until we see a report sync ... */
     if (event->type != cpu_to_le16(EV_SYN) ||
@@ -44,24 +43,24 @@ void virtio_input_send(VirtIOInput *vinput, virtio_input_event *event)
     }
 
     /* ... then check available space ... */
-    need = sizeof(virtio_input_event) * vinput->qindex;
-    virtqueue_get_avail_bytes(vinput->evt, &have, NULL, need, 0);
-    if (have < need) {
-        vinput->qindex = 0;
-        trace_virtio_input_queue_full();
-        return;
-    }
-
-    /* ... and finally pass them to the guest */
     for (i = 0; i < vinput->qindex; i++) {
         elem = virtqueue_pop(vinput->evt, sizeof(VirtQueueElement));
         if (!elem) {
-            /* should not happen, we've checked for space beforehand */
-            fprintf(stderr, "%s: Huh?  No vq elem available ...\n", __func__);
+            while (--i >= 0) {
+                virtqueue_unpop(vinput->evt, vinput->queue[i].elem, 0);
+            }
+            vinput->qindex = 0;
+            trace_virtio_input_queue_full();
             return;
         }
+        vinput->queue[i].elem = elem;
+    }
+
+    /* ... and finally pass them to the guest */
+    for (i = 0; i < vinput->qindex; i++) {
+        elem = vinput->queue[i].elem;
         len = iov_from_buf(elem->in_sg, elem->in_num,
-                           0, vinput->queue+i, sizeof(virtio_input_event));
+                           0, &vinput->queue[i].event, sizeof(virtio_input_event));
         virtqueue_push(vinput->evt, elem, len);
         g_free(elem);
     }
@@ -272,6 +271,8 @@ static void virtio_input_finalize(Object *obj)
         QTAILQ_REMOVE(&vinput->cfg_list, cfg, node);
         g_free(cfg);
     }
+
+    g_free(vinput->queue);
 }
 static void virtio_input_device_unrealize(DeviceState *dev, Error **errp)
 {
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index 7a6e771ed1..c3829e31b5 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -387,25 +387,6 @@ static bool apic_common_sipi_needed(void *opaque)
     return s->wait_for_sipi != 0;
 }
 
-static bool apic_irq_delivered_needed(void *opaque)
-{
-    APICCommonState *s = APIC_COMMON(opaque);
-    return s->cpu == X86_CPU(first_cpu) && apic_irq_delivered != 0;
-}
-
-static void apic_irq_delivered_pre_save(void *opaque)
-{
-    APICCommonState *s = APIC_COMMON(opaque);
-    s->apic_irq_delivered = apic_irq_delivered;
-}
-
-static int apic_irq_delivered_post_load(void *opaque, int version_id)
-{
-    APICCommonState *s = APIC_COMMON(opaque);
-    apic_irq_delivered = s->apic_irq_delivered;
-    return 0;
-}
-
 static const VMStateDescription vmstate_apic_common_sipi = {
     .name = "apic_sipi",
     .version_id = 1,
@@ -418,19 +399,6 @@ static const VMStateDescription vmstate_apic_common_sipi = {
     }
 };
 
-static const VMStateDescription vmstate_apic_irq_delivered = {
-    .name = "apic_irq_delivered",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .needed = apic_irq_delivered_needed,
-    .pre_save = apic_irq_delivered_pre_save,
-    .post_load = apic_irq_delivered_post_load,
-    .fields = (VMStateField[]) {
-        VMSTATE_INT32(apic_irq_delivered, APICCommonState),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
 static const VMStateDescription vmstate_apic_common = {
     .name = "apic",
     .version_id = 3,
@@ -465,7 +433,6 @@ static const VMStateDescription vmstate_apic_common = {
     },
     .subsections = (const VMStateDescription*[]) {
         &vmstate_apic_common_sipi,
-        &vmstate_apic_irq_delivered,
         NULL
     }
 };
diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index 81f0403117..19aab56072 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -614,12 +614,6 @@ static void arm_gicv3_icc_reset(CPUARMState *env, const ARMCPRegInfo *ri)
     s = c->gic;
     cpu = ARM_CPU(c->cpu);
 
-    /* Initialize to actual HW supported configuration */
-    kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
-                      KVM_VGIC_ATTR(ICC_CTLR_EL1, cpu->mp_affinity),
-                      &c->icc_ctlr_el1[GICV3_NS], false);
-
-    c->icc_ctlr_el1[GICV3_S] = c->icc_ctlr_el1[GICV3_NS];
     c->icc_pmr_el1 = 0;
     c->icc_bpr[GICV3_G0] = GIC_MIN_BPR;
     c->icc_bpr[GICV3_G1] = GIC_MIN_BPR;
@@ -628,6 +622,17 @@ static void arm_gicv3_icc_reset(CPUARMState *env, const ARMCPRegInfo *ri)
     c->icc_sre_el1 = 0x7;
     memset(c->icc_apr, 0, sizeof(c->icc_apr));
     memset(c->icc_igrpen, 0, sizeof(c->icc_igrpen));
+
+    if (s->migration_blocker) {
+        return;
+    }
+
+    /* Initialize to actual HW supported configuration */
+    kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+                      KVM_VGIC_ATTR(ICC_CTLR_EL1, cpu->mp_affinity),
+                      &c->icc_ctlr_el1[GICV3_NS], false);
+
+    c->icc_ctlr_el1[GICV3_S] = c->icc_ctlr_el1[GICV3_NS];
 }
 
 static void kvm_arm_gicv3_reset(DeviceState *dev)
diff --git a/hw/intc/s390_flic.c b/hw/intc/s390_flic.c
index bef4caf980..711c11454f 100644
--- a/hw/intc/s390_flic.c
+++ b/hw/intc/s390_flic.c
@@ -21,11 +21,14 @@
 
 S390FLICState *s390_get_flic(void)
 {
-    S390FLICState *fs;
+    static S390FLICState *fs;
 
-    fs = S390_FLIC_COMMON(object_resolve_path(TYPE_KVM_S390_FLIC, NULL));
     if (!fs) {
-        fs = S390_FLIC_COMMON(object_resolve_path(TYPE_QEMU_S390_FLIC, NULL));
+        fs = S390_FLIC_COMMON(object_resolve_path(TYPE_KVM_S390_FLIC, NULL));
+        if (!fs) {
+            fs = S390_FLIC_COMMON(object_resolve_path(TYPE_QEMU_S390_FLIC,
+                                                      NULL));
+        }
     }
     return fs;
 }
diff --git a/hw/ipmi/isa_ipmi_bt.c b/hw/ipmi/isa_ipmi_bt.c
index 1c69cb33f8..2fcc3d2e7c 100644
--- a/hw/ipmi/isa_ipmi_bt.c
+++ b/hw/ipmi/isa_ipmi_bt.c
@@ -37,40 +37,30 @@
 #define IPMI_BT_HBUSY_BIT          6
 #define IPMI_BT_BBUSY_BIT          7
 
-#define IPMI_BT_CLR_WR_MASK        (1 << IPMI_BT_CLR_WR_BIT)
 #define IPMI_BT_GET_CLR_WR(d)      (((d) >> IPMI_BT_CLR_WR_BIT) & 0x1)
-#define IPMI_BT_SET_CLR_WR(d, v)   (d) = (((d) & ~IPMI_BT_CLR_WR_MASK) | \
-                                       (((v & 1) << IPMI_BT_CLR_WR_BIT)))
 
-#define IPMI_BT_CLR_RD_MASK        (1 << IPMI_BT_CLR_RD_BIT)
 #define IPMI_BT_GET_CLR_RD(d)      (((d) >> IPMI_BT_CLR_RD_BIT) & 0x1)
-#define IPMI_BT_SET_CLR_RD(d, v)   (d) = (((d) & ~IPMI_BT_CLR_RD_MASK) | \
-                                       (((v & 1) << IPMI_BT_CLR_RD_BIT)))
 
-#define IPMI_BT_H2B_ATN_MASK       (1 << IPMI_BT_H2B_ATN_BIT)
 #define IPMI_BT_GET_H2B_ATN(d)     (((d) >> IPMI_BT_H2B_ATN_BIT) & 0x1)
-#define IPMI_BT_SET_H2B_ATN(d, v)  (d) = (((d) & ~IPMI_BT_H2B_ATN_MASK) | \
-                                        (((v & 1) << IPMI_BT_H2B_ATN_BIT)))
 
 #define IPMI_BT_B2H_ATN_MASK       (1 << IPMI_BT_B2H_ATN_BIT)
 #define IPMI_BT_GET_B2H_ATN(d)     (((d) >> IPMI_BT_B2H_ATN_BIT) & 0x1)
-#define IPMI_BT_SET_B2H_ATN(d, v)  (d) = (((d) & ~IPMI_BT_B2H_ATN_MASK) | \
-                                        (((v & 1) << IPMI_BT_B2H_ATN_BIT)))
+#define IPMI_BT_SET_B2H_ATN(d, v)  ((d) = (((d) & ~IPMI_BT_B2H_ATN_MASK) | \
+                                        (((v) & 1) << IPMI_BT_B2H_ATN_BIT)))
 
 #define IPMI_BT_SMS_ATN_MASK       (1 << IPMI_BT_SMS_ATN_BIT)
 #define IPMI_BT_GET_SMS_ATN(d)     (((d) >> IPMI_BT_SMS_ATN_BIT) & 0x1)
-#define IPMI_BT_SET_SMS_ATN(d, v)  (d) = (((d) & ~IPMI_BT_SMS_ATN_MASK) | \
-                                        (((v & 1) << IPMI_BT_SMS_ATN_BIT)))
+#define IPMI_BT_SET_SMS_ATN(d, v)  ((d) = (((d) & ~IPMI_BT_SMS_ATN_MASK) | \
+                                        (((v) & 1) << IPMI_BT_SMS_ATN_BIT)))
 
 #define IPMI_BT_HBUSY_MASK         (1 << IPMI_BT_HBUSY_BIT)
 #define IPMI_BT_GET_HBUSY(d)       (((d) >> IPMI_BT_HBUSY_BIT) & 0x1)
-#define IPMI_BT_SET_HBUSY(d, v)    (d) = (((d) & ~IPMI_BT_HBUSY_MASK) | \
-                                       (((v & 1) << IPMI_BT_HBUSY_BIT)))
+#define IPMI_BT_SET_HBUSY(d, v)    ((d) = (((d) & ~IPMI_BT_HBUSY_MASK) | \
+                                       (((v) & 1) << IPMI_BT_HBUSY_BIT)))
 
 #define IPMI_BT_BBUSY_MASK         (1 << IPMI_BT_BBUSY_BIT)
-#define IPMI_BT_GET_BBUSY(d)       (((d) >> IPMI_BT_BBUSY_BIT) & 0x1)
-#define IPMI_BT_SET_BBUSY(d, v)    (d) = (((d) & ~IPMI_BT_BBUSY_MASK) | \
-                                       (((v & 1) << IPMI_BT_BBUSY_BIT)))
+#define IPMI_BT_SET_BBUSY(d, v)    ((d) = (((d) & ~IPMI_BT_BBUSY_MASK) | \
+                                       (((v) & 1) << IPMI_BT_BBUSY_BIT)))
 
 
 /* Mask register */
@@ -79,13 +69,13 @@
 
 #define IPMI_BT_B2H_IRQ_EN_MASK      (1 << IPMI_BT_B2H_IRQ_EN_BIT)
 #define IPMI_BT_GET_B2H_IRQ_EN(d)    (((d) >> IPMI_BT_B2H_IRQ_EN_BIT) & 0x1)
-#define IPMI_BT_SET_B2H_IRQ_EN(d, v) (d) = (((d) & ~IPMI_BT_B2H_IRQ_EN_MASK) | \
-                                        (((v & 1) << IPMI_BT_B2H_IRQ_EN_BIT)))
+#define IPMI_BT_SET_B2H_IRQ_EN(d, v) ((d) = (((d) & ~IPMI_BT_B2H_IRQ_EN_MASK) |\
+                                        (((v) & 1) << IPMI_BT_B2H_IRQ_EN_BIT)))
 
 #define IPMI_BT_B2H_IRQ_MASK         (1 << IPMI_BT_B2H_IRQ_BIT)
 #define IPMI_BT_GET_B2H_IRQ(d)       (((d) >> IPMI_BT_B2H_IRQ_BIT) & 0x1)
-#define IPMI_BT_SET_B2H_IRQ(d, v)    (d) = (((d) & ~IPMI_BT_B2H_IRQ_MASK) | \
-                                        (((v & 1) << IPMI_BT_B2H_IRQ_BIT)))
+#define IPMI_BT_SET_B2H_IRQ(d, v)    ((d) = (((d) & ~IPMI_BT_B2H_IRQ_MASK) | \
+                                        (((v) & 1) << IPMI_BT_B2H_IRQ_BIT)))
 
 typedef struct IPMIBT {
     IPMIBmc *bmc;
diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c
index 59930dd9d0..a0866c3856 100644
--- a/hw/isa/lpc_ich9.c
+++ b/hw/isa/lpc_ich9.c
@@ -312,11 +312,6 @@ void ich9_generate_smi(void)
     cpu_interrupt(first_cpu, CPU_INTERRUPT_SMI);
 }
 
-void ich9_generate_nmi(void)
-{
-    cpu_interrupt(first_cpu, CPU_INTERRUPT_NMI);
-}
-
 static int ich9_lpc_sci_irq(ICH9LPCState *lpc)
 {
     switch (lpc->d.config[ICH9_LPC_ACPI_CTRL] &
diff --git a/hw/misc/exynos4210_pmu.c b/hw/misc/exynos4210_pmu.c
index e30dbc7d3d..63a8ccd355 100644
--- a/hw/misc/exynos4210_pmu.c
+++ b/hw/misc/exynos4210_pmu.c
@@ -401,8 +401,8 @@ static uint64_t exynos4210_pmu_read(void *opaque, hwaddr offset,
                                     unsigned size)
 {
     Exynos4210PmuState *s = (Exynos4210PmuState *)opaque;
-    unsigned i;
     const Exynos4210PmuReg *reg_p = exynos4210_pmu_regs;
+    unsigned int i;
 
     for (i = 0; i < PMU_NUM_OF_REGISTERS; i++) {
         if (reg_p->offset == offset) {
@@ -420,8 +420,8 @@ static void exynos4210_pmu_write(void *opaque, hwaddr offset,
                                  uint64_t val, unsigned size)
 {
     Exynos4210PmuState *s = (Exynos4210PmuState *)opaque;
-    unsigned i;
     const Exynos4210PmuReg *reg_p = exynos4210_pmu_regs;
+    unsigned int i;
 
     for (i = 0; i < PMU_NUM_OF_REGISTERS; i++) {
         if (reg_p->offset == offset) {
diff --git a/hw/net/Makefile.objs b/hw/net/Makefile.objs
index 6a95d92d37..5ddaffe63a 100644
--- a/hw/net/Makefile.objs
+++ b/hw/net/Makefile.objs
@@ -26,6 +26,7 @@ common-obj-$(CONFIG_IMX_FEC) += imx_fec.o
 common-obj-$(CONFIG_CADENCE) += cadence_gem.o
 common-obj-$(CONFIG_STELLARIS_ENET) += stellaris_enet.o
 common-obj-$(CONFIG_LANCE) += lance.o
+common-obj-$(CONFIG_FTGMAC100) += ftgmac100.o
 
 obj-$(CONFIG_ETRAXFS) += etraxfs_eth.o
 obj-$(CONFIG_COLDFIRE) += mcf_fec.o
diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c
index d4de8ad9f1..3943187572 100644
--- a/hw/net/cadence_gem.c
+++ b/hw/net/cadence_gem.c
@@ -300,6 +300,8 @@
 #define DESC_1_RX_SOF 0x00004000
 #define DESC_1_RX_EOF 0x00008000
 
+#define GEM_MODID_VALUE 0x00020118
+
 static inline unsigned tx_desc_get_buffer(unsigned *desc)
 {
     return desc[0];
@@ -481,14 +483,17 @@ static int gem_can_receive(NetClientState *nc)
     }
 
     for (i = 0; i < s->num_priority_queues; i++) {
-        if (rx_desc_get_ownership(s->rx_desc[i]) == 1) {
-            if (s->can_rx_state != 2) {
-                s->can_rx_state = 2;
-                DB_PRINT("can't receive - busy buffer descriptor (q%d) 0x%x\n",
-                         i, s->rx_desc_addr[i]);
-             }
-            return 0;
+        if (rx_desc_get_ownership(s->rx_desc[i]) != 1) {
+            break;
+        }
+    };
+
+    if (i == s->num_priority_queues) {
+        if (s->can_rx_state != 2) {
+            s->can_rx_state = 2;
+            DB_PRINT("can't receive - all the buffer descriptors are busy\n");
         }
+        return 0;
     }
 
     if (s->can_rx_state != 0) {
@@ -506,7 +511,18 @@ static void gem_update_int_status(CadenceGEMState *s)
 {
     int i;
 
-    if ((s->num_priority_queues == 1) && s->regs[GEM_ISR]) {
+    if (!s->regs[GEM_ISR]) {
+        /* ISR isn't set, clear all the interrupts */
+        for (i = 0; i < s->num_priority_queues; ++i) {
+            qemu_set_irq(s->irq[i], 0);
+        }
+        return;
+    }
+
+    /* If we get here we know s->regs[GEM_ISR] is set, so we don't need to
+     * check it again.
+     */
+    if (s->num_priority_queues == 1) {
         /* No priority queues, just trigger the interrupt */
         DB_PRINT("asserting int.\n");
         qemu_set_irq(s->irq[0], 1);
@@ -790,8 +806,8 @@ static void gem_get_rx_desc(CadenceGEMState *s, int q)
 {
     DB_PRINT("read descriptor 0x%x\n", (unsigned)s->rx_desc_addr[q]);
     /* read current descriptor */
-    cpu_physical_memory_read(s->rx_desc_addr[0],
-                             (uint8_t *)s->rx_desc[0], sizeof(s->rx_desc[0]));
+    cpu_physical_memory_read(s->rx_desc_addr[q],
+                             (uint8_t *)s->rx_desc[q], sizeof(s->rx_desc[q]));
 
     /* Descriptor owned by software ? */
     if (rx_desc_get_ownership(s->rx_desc[q]) == 1) {
@@ -1209,7 +1225,7 @@ static void gem_reset(DeviceState *d)
     s->regs[GEM_TXPAUSE] = 0x0000ffff;
     s->regs[GEM_TXPARTIALSF] = 0x000003ff;
     s->regs[GEM_RXPARTIALSF] = 0x000003ff;
-    s->regs[GEM_MODID] = 0x00020118;
+    s->regs[GEM_MODID] = s->revision;
     s->regs[GEM_DESCONF] = 0x02500111;
     s->regs[GEM_DESCONF2] = 0x2ab13fff;
     s->regs[GEM_DESCONF5] = 0x002f2145;
@@ -1271,7 +1287,6 @@ static uint64_t gem_read(void *opaque, hwaddr offset, unsigned size)
 {
     CadenceGEMState *s;
     uint32_t retval;
-    int i;
     s = (CadenceGEMState *)opaque;
 
     offset >>= 2;
@@ -1282,9 +1297,7 @@ static uint64_t gem_read(void *opaque, hwaddr offset, unsigned size)
     switch (offset) {
     case GEM_ISR:
         DB_PRINT("lowering irqs on ISR read\n");
-        for (i = 0; i < s->num_priority_queues; ++i) {
-            qemu_set_irq(s->irq[i], 0);
-        }
+        /* The interrupts get updated at the end of the function. */
         break;
     case GEM_PHYMNTNC:
         if (retval & GEM_PHYMNTNC_OP_R) {
@@ -1508,6 +1521,8 @@ static const VMStateDescription vmstate_cadence_gem = {
 
 static Property gem_properties[] = {
     DEFINE_NIC_PROPERTIES(CadenceGEMState, conf),
+    DEFINE_PROP_UINT32("revision", CadenceGEMState, revision,
+                       GEM_MODID_VALUE),
     DEFINE_PROP_UINT8("num-priority-queues", CadenceGEMState,
                       num_priority_queues, 1),
     DEFINE_PROP_UINT8("num-type1-screeners", CadenceGEMState,
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index 93249497f4..f2e5072d27 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -40,7 +40,7 @@
 
 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
 
-#define E1000_DEBUG
+/* #define E1000_DEBUG */
 
 #ifdef E1000_DEBUG
 enum {
diff --git a/hw/net/ftgmac100.c b/hw/net/ftgmac100.c
new file mode 100644
index 0000000000..3c36ab9cec
--- /dev/null
+++ b/hw/net/ftgmac100.c
@@ -0,0 +1,1016 @@
+/*
+ * Faraday FTGMAC100 Gigabit Ethernet
+ *
+ * Copyright (C) 2016-2017, IBM Corporation.
+ *
+ * Based on Coldfire Fast Ethernet Controller emulation.
+ *
+ * Copyright (c) 2007 CodeSourcery.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/net/ftgmac100.h"
+#include "sysemu/dma.h"
+#include "qemu/log.h"
+#include "net/checksum.h"
+#include "net/eth.h"
+#include "hw/net/mii.h"
+
+/* For crc32 */
+#include <zlib.h>
+
+/*
+ * FTGMAC100 registers
+ */
+#define FTGMAC100_ISR             0x00
+#define FTGMAC100_IER             0x04
+#define FTGMAC100_MAC_MADR        0x08
+#define FTGMAC100_MAC_LADR        0x0c
+#define FTGMAC100_MATH0           0x10
+#define FTGMAC100_MATH1           0x14
+#define FTGMAC100_NPTXPD          0x18
+#define FTGMAC100_RXPD            0x1C
+#define FTGMAC100_NPTXR_BADR      0x20
+#define FTGMAC100_RXR_BADR        0x24
+#define FTGMAC100_HPTXPD          0x28
+#define FTGMAC100_HPTXR_BADR      0x2c
+#define FTGMAC100_ITC             0x30
+#define FTGMAC100_APTC            0x34
+#define FTGMAC100_DBLAC           0x38
+#define FTGMAC100_REVR            0x40
+#define FTGMAC100_FEAR1           0x44
+#define FTGMAC100_RBSR            0x4c
+#define FTGMAC100_TPAFCR          0x48
+
+#define FTGMAC100_MACCR           0x50
+#define FTGMAC100_MACSR           0x54
+#define FTGMAC100_PHYCR           0x60
+#define FTGMAC100_PHYDATA         0x64
+#define FTGMAC100_FCR             0x68
+
+/*
+ * Interrupt status register & interrupt enable register
+ */
+#define FTGMAC100_INT_RPKT_BUF    (1 << 0)
+#define FTGMAC100_INT_RPKT_FIFO   (1 << 1)
+#define FTGMAC100_INT_NO_RXBUF    (1 << 2)
+#define FTGMAC100_INT_RPKT_LOST   (1 << 3)
+#define FTGMAC100_INT_XPKT_ETH    (1 << 4)
+#define FTGMAC100_INT_XPKT_FIFO   (1 << 5)
+#define FTGMAC100_INT_NO_NPTXBUF  (1 << 6)
+#define FTGMAC100_INT_XPKT_LOST   (1 << 7)
+#define FTGMAC100_INT_AHB_ERR     (1 << 8)
+#define FTGMAC100_INT_PHYSTS_CHG  (1 << 9)
+#define FTGMAC100_INT_NO_HPTXBUF  (1 << 10)
+
+/*
+ * Automatic polling timer control register
+ */
+#define FTGMAC100_APTC_RXPOLL_CNT(x)        ((x) & 0xf)
+#define FTGMAC100_APTC_RXPOLL_TIME_SEL      (1 << 4)
+#define FTGMAC100_APTC_TXPOLL_CNT(x)        (((x) >> 8) & 0xf)
+#define FTGMAC100_APTC_TXPOLL_TIME_SEL      (1 << 12)
+
+/*
+ * PHY control register
+ */
+#define FTGMAC100_PHYCR_MIIRD               (1 << 26)
+#define FTGMAC100_PHYCR_MIIWR               (1 << 27)
+
+#define FTGMAC100_PHYCR_DEV(x)              (((x) >> 16) & 0x1f)
+#define FTGMAC100_PHYCR_REG(x)              (((x) >> 21) & 0x1f)
+
+/*
+ * PHY data register
+ */
+#define FTGMAC100_PHYDATA_MIIWDATA(x)       ((x) & 0xffff)
+#define FTGMAC100_PHYDATA_MIIRDATA(x)       (((x) >> 16) & 0xffff)
+
+/*
+ * Feature Register
+ */
+#define FTGMAC100_REVR_NEW_MDIO_INTERFACE   (1 << 31)
+
+/*
+ * MAC control register
+ */
+#define FTGMAC100_MACCR_TXDMA_EN         (1 << 0)
+#define FTGMAC100_MACCR_RXDMA_EN         (1 << 1)
+#define FTGMAC100_MACCR_TXMAC_EN         (1 << 2)
+#define FTGMAC100_MACCR_RXMAC_EN         (1 << 3)
+#define FTGMAC100_MACCR_RM_VLAN          (1 << 4)
+#define FTGMAC100_MACCR_HPTXR_EN         (1 << 5)
+#define FTGMAC100_MACCR_LOOP_EN          (1 << 6)
+#define FTGMAC100_MACCR_ENRX_IN_HALFTX   (1 << 7)
+#define FTGMAC100_MACCR_FULLDUP          (1 << 8)
+#define FTGMAC100_MACCR_GIGA_MODE        (1 << 9)
+#define FTGMAC100_MACCR_CRC_APD          (1 << 10) /* not needed */
+#define FTGMAC100_MACCR_RX_RUNT          (1 << 12)
+#define FTGMAC100_MACCR_JUMBO_LF         (1 << 13)
+#define FTGMAC100_MACCR_RX_ALL           (1 << 14)
+#define FTGMAC100_MACCR_HT_MULTI_EN      (1 << 15)
+#define FTGMAC100_MACCR_RX_MULTIPKT      (1 << 16)
+#define FTGMAC100_MACCR_RX_BROADPKT      (1 << 17)
+#define FTGMAC100_MACCR_DISCARD_CRCERR   (1 << 18)
+#define FTGMAC100_MACCR_FAST_MODE        (1 << 19)
+#define FTGMAC100_MACCR_SW_RST           (1 << 31)
+
+/*
+ * Transmit descriptor
+ */
+#define FTGMAC100_TXDES0_TXBUF_SIZE(x)   ((x) & 0x3fff)
+#define FTGMAC100_TXDES0_EDOTR           (1 << 15)
+#define FTGMAC100_TXDES0_CRC_ERR         (1 << 19)
+#define FTGMAC100_TXDES0_LTS             (1 << 28)
+#define FTGMAC100_TXDES0_FTS             (1 << 29)
+#define FTGMAC100_TXDES0_EDOTR_ASPEED    (1 << 30)
+#define FTGMAC100_TXDES0_TXDMA_OWN       (1 << 31)
+
+#define FTGMAC100_TXDES1_VLANTAG_CI(x)   ((x) & 0xffff)
+#define FTGMAC100_TXDES1_INS_VLANTAG     (1 << 16)
+#define FTGMAC100_TXDES1_TCP_CHKSUM      (1 << 17)
+#define FTGMAC100_TXDES1_UDP_CHKSUM      (1 << 18)
+#define FTGMAC100_TXDES1_IP_CHKSUM       (1 << 19)
+#define FTGMAC100_TXDES1_LLC             (1 << 22)
+#define FTGMAC100_TXDES1_TX2FIC          (1 << 30)
+#define FTGMAC100_TXDES1_TXIC            (1 << 31)
+
+/*
+ * Receive descriptor
+ */
+#define FTGMAC100_RXDES0_VDBC            0x3fff
+#define FTGMAC100_RXDES0_EDORR           (1 << 15)
+#define FTGMAC100_RXDES0_MULTICAST       (1 << 16)
+#define FTGMAC100_RXDES0_BROADCAST       (1 << 17)
+#define FTGMAC100_RXDES0_RX_ERR          (1 << 18)
+#define FTGMAC100_RXDES0_CRC_ERR         (1 << 19)
+#define FTGMAC100_RXDES0_FTL             (1 << 20)
+#define FTGMAC100_RXDES0_RUNT            (1 << 21)
+#define FTGMAC100_RXDES0_RX_ODD_NB       (1 << 22)
+#define FTGMAC100_RXDES0_FIFO_FULL       (1 << 23)
+#define FTGMAC100_RXDES0_PAUSE_OPCODE    (1 << 24)
+#define FTGMAC100_RXDES0_PAUSE_FRAME     (1 << 25)
+#define FTGMAC100_RXDES0_LRS             (1 << 28)
+#define FTGMAC100_RXDES0_FRS             (1 << 29)
+#define FTGMAC100_RXDES0_EDORR_ASPEED    (1 << 30)
+#define FTGMAC100_RXDES0_RXPKT_RDY       (1 << 31)
+
+#define FTGMAC100_RXDES1_VLANTAG_CI      0xffff
+#define FTGMAC100_RXDES1_PROT_MASK       (0x3 << 20)
+#define FTGMAC100_RXDES1_PROT_NONIP      (0x0 << 20)
+#define FTGMAC100_RXDES1_PROT_IP         (0x1 << 20)
+#define FTGMAC100_RXDES1_PROT_TCPIP      (0x2 << 20)
+#define FTGMAC100_RXDES1_PROT_UDPIP      (0x3 << 20)
+#define FTGMAC100_RXDES1_LLC             (1 << 22)
+#define FTGMAC100_RXDES1_DF              (1 << 23)
+#define FTGMAC100_RXDES1_VLANTAG_AVAIL   (1 << 24)
+#define FTGMAC100_RXDES1_TCP_CHKSUM_ERR  (1 << 25)
+#define FTGMAC100_RXDES1_UDP_CHKSUM_ERR  (1 << 26)
+#define FTGMAC100_RXDES1_IP_CHKSUM_ERR   (1 << 27)
+
+/*
+ * Receive and transmit Buffer Descriptor
+ */
+typedef struct {
+    uint32_t        des0;
+    uint32_t        des1;
+    uint32_t        des2;        /* not used by HW */
+    uint32_t        des3;
+} FTGMAC100Desc;
+
+/*
+ * Specific RTL8211E MII Registers
+ */
+#define RTL8211E_MII_PHYCR        16 /* PHY Specific Control */
+#define RTL8211E_MII_PHYSR        17 /* PHY Specific Status */
+#define RTL8211E_MII_INER         18 /* Interrupt Enable */
+#define RTL8211E_MII_INSR         19 /* Interrupt Status */
+#define RTL8211E_MII_RXERC        24 /* Receive Error Counter */
+#define RTL8211E_MII_LDPSR        27 /* Link Down Power Saving */
+#define RTL8211E_MII_EPAGSR       30 /* Extension Page Select */
+#define RTL8211E_MII_PAGSEL       31 /* Page Select */
+
+/*
+ * RTL8211E Interrupt Status
+ */
+#define PHY_INT_AUTONEG_ERROR       (1 << 15)
+#define PHY_INT_PAGE_RECV           (1 << 12)
+#define PHY_INT_AUTONEG_COMPLETE    (1 << 11)
+#define PHY_INT_LINK_STATUS         (1 << 10)
+#define PHY_INT_ERROR               (1 << 9)
+#define PHY_INT_DOWN                (1 << 8)
+#define PHY_INT_JABBER              (1 << 0)
+
+/*
+ * Max frame size for the receiving buffer
+ */
+#define FTGMAC100_MAX_FRAME_SIZE    10240
+
+/* Limits depending on the type of the frame
+ *
+ *   9216 for Jumbo frames (+ 4 for VLAN)
+ *   1518 for other frames (+ 4 for VLAN)
+ */
+static int ftgmac100_max_frame_size(FTGMAC100State *s)
+{
+    return (s->maccr & FTGMAC100_MACCR_JUMBO_LF ? 9216 : 1518) + 4;
+}
+
+static void ftgmac100_update_irq(FTGMAC100State *s)
+{
+    qemu_set_irq(s->irq, s->isr & s->ier);
+}
+
+/*
+ * The MII phy could raise a GPIO to the processor which in turn
+ * could be handled as an interrpt by the OS.
+ * For now we don't handle any GPIO/interrupt line, so the OS will
+ * have to poll for the PHY status.
+ */
+static void phy_update_irq(FTGMAC100State *s)
+{
+    ftgmac100_update_irq(s);
+}
+
+static void phy_update_link(FTGMAC100State *s)
+{
+    /* Autonegotiation status mirrors link status.  */
+    if (qemu_get_queue(s->nic)->link_down) {
+        s->phy_status &= ~(MII_BMSR_LINK_ST | MII_BMSR_AN_COMP);
+        s->phy_int |= PHY_INT_DOWN;
+    } else {
+        s->phy_status |= (MII_BMSR_LINK_ST | MII_BMSR_AN_COMP);
+        s->phy_int |= PHY_INT_AUTONEG_COMPLETE;
+    }
+    phy_update_irq(s);
+}
+
+static void ftgmac100_set_link(NetClientState *nc)
+{
+    phy_update_link(FTGMAC100(qemu_get_nic_opaque(nc)));
+}
+
+static void phy_reset(FTGMAC100State *s)
+{
+    s->phy_status = (MII_BMSR_100TX_FD | MII_BMSR_100TX_HD | MII_BMSR_10T_FD |
+                     MII_BMSR_10T_HD | MII_BMSR_EXTSTAT | MII_BMSR_MFPS |
+                     MII_BMSR_AN_COMP | MII_BMSR_AUTONEG | MII_BMSR_LINK_ST |
+                     MII_BMSR_EXTCAP);
+    s->phy_control = (MII_BMCR_AUTOEN | MII_BMCR_FD | MII_BMCR_SPEED1000);
+    s->phy_advertise = (MII_ANAR_PAUSE_ASYM | MII_ANAR_PAUSE | MII_ANAR_TXFD |
+                        MII_ANAR_TX | MII_ANAR_10FD | MII_ANAR_10 |
+                        MII_ANAR_CSMACD);
+    s->phy_int_mask = 0;
+    s->phy_int = 0;
+}
+
+static uint32_t do_phy_read(FTGMAC100State *s, int reg)
+{
+    uint32_t val;
+
+    switch (reg) {
+    case MII_BMCR: /* Basic Control */
+        val = s->phy_control;
+        break;
+    case MII_BMSR: /* Basic Status */
+        val = s->phy_status;
+        break;
+    case MII_PHYID1: /* ID1 */
+        val = RTL8211E_PHYID1;
+        break;
+    case MII_PHYID2: /* ID2 */
+        val = RTL8211E_PHYID2;
+        break;
+    case MII_ANAR: /* Auto-neg advertisement */
+        val = s->phy_advertise;
+        break;
+    case MII_ANLPAR: /* Auto-neg Link Partner Ability */
+        val = (MII_ANLPAR_ACK | MII_ANLPAR_PAUSE | MII_ANLPAR_TXFD |
+               MII_ANLPAR_TX | MII_ANLPAR_10FD | MII_ANLPAR_10 |
+               MII_ANLPAR_CSMACD);
+        break;
+    case MII_ANER: /* Auto-neg Expansion */
+        val = MII_ANER_NWAY;
+        break;
+    case MII_CTRL1000: /* 1000BASE-T control  */
+        val = (MII_CTRL1000_HALF | MII_CTRL1000_FULL);
+        break;
+    case MII_STAT1000: /* 1000BASE-T status  */
+        val = MII_STAT1000_FULL;
+        break;
+    case RTL8211E_MII_INSR:  /* Interrupt status.  */
+        val = s->phy_int;
+        s->phy_int = 0;
+        phy_update_irq(s);
+        break;
+    case RTL8211E_MII_INER:  /* Interrupt enable */
+        val = s->phy_int_mask;
+        break;
+    case RTL8211E_MII_PHYCR:
+    case RTL8211E_MII_PHYSR:
+    case RTL8211E_MII_RXERC:
+    case RTL8211E_MII_LDPSR:
+    case RTL8211E_MII_EPAGSR:
+    case RTL8211E_MII_PAGSEL:
+        qemu_log_mask(LOG_UNIMP, "%s: reg %d not implemented\n",
+                      __func__, reg);
+        val = 0;
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad address at offset %d\n",
+                      __func__, reg);
+        val = 0;
+        break;
+    }
+
+    return val;
+}
+
+#define MII_BMCR_MASK (MII_BMCR_LOOPBACK | MII_BMCR_SPEED100 |          \
+                       MII_BMCR_SPEED | MII_BMCR_AUTOEN | MII_BMCR_PDOWN | \
+                       MII_BMCR_FD | MII_BMCR_CTST)
+#define MII_ANAR_MASK 0x2d7f
+
+static void do_phy_write(FTGMAC100State *s, int reg, uint32_t val)
+{
+    switch (reg) {
+    case MII_BMCR:     /* Basic Control */
+        if (val & MII_BMCR_RESET) {
+            phy_reset(s);
+        } else {
+            s->phy_control = val & MII_BMCR_MASK;
+            /* Complete autonegotiation immediately.  */
+            if (val & MII_BMCR_AUTOEN) {
+                s->phy_status |= MII_BMSR_AN_COMP;
+            }
+        }
+        break;
+    case MII_ANAR:     /* Auto-neg advertisement */
+        s->phy_advertise = (val & MII_ANAR_MASK) | MII_ANAR_TX;
+        break;
+    case RTL8211E_MII_INER: /* Interrupt enable */
+        s->phy_int_mask = val & 0xff;
+        phy_update_irq(s);
+        break;
+    case RTL8211E_MII_PHYCR:
+    case RTL8211E_MII_PHYSR:
+    case RTL8211E_MII_RXERC:
+    case RTL8211E_MII_LDPSR:
+    case RTL8211E_MII_EPAGSR:
+    case RTL8211E_MII_PAGSEL:
+        qemu_log_mask(LOG_UNIMP, "%s: reg %d not implemented\n",
+                      __func__, reg);
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad address at offset %d\n",
+                      __func__, reg);
+        break;
+    }
+}
+
+static int ftgmac100_read_bd(FTGMAC100Desc *bd, dma_addr_t addr)
+{
+    if (dma_memory_read(&address_space_memory, addr, bd, sizeof(*bd))) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: failed to read descriptor @ 0x%"
+                      HWADDR_PRIx "\n", __func__, addr);
+        return -1;
+    }
+    bd->des0 = le32_to_cpu(bd->des0);
+    bd->des1 = le32_to_cpu(bd->des1);
+    bd->des2 = le32_to_cpu(bd->des2);
+    bd->des3 = le32_to_cpu(bd->des3);
+    return 0;
+}
+
+static int ftgmac100_write_bd(FTGMAC100Desc *bd, dma_addr_t addr)
+{
+    FTGMAC100Desc lebd;
+
+    lebd.des0 = cpu_to_le32(bd->des0);
+    lebd.des1 = cpu_to_le32(bd->des1);
+    lebd.des2 = cpu_to_le32(bd->des2);
+    lebd.des3 = cpu_to_le32(bd->des3);
+    if (dma_memory_write(&address_space_memory, addr, &lebd, sizeof(lebd))) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: failed to write descriptor @ 0x%"
+                      HWADDR_PRIx "\n", __func__, addr);
+        return -1;
+    }
+    return 0;
+}
+
+static void ftgmac100_do_tx(FTGMAC100State *s, uint32_t tx_ring,
+                            uint32_t tx_descriptor)
+{
+    int frame_size = 0;
+    uint8_t *ptr = s->frame;
+    uint32_t addr = tx_descriptor;
+    uint32_t flags = 0;
+    int max_frame_size = ftgmac100_max_frame_size(s);
+
+    while (1) {
+        FTGMAC100Desc bd;
+        int len;
+
+        if (ftgmac100_read_bd(&bd, addr) ||
+            ((bd.des0 & FTGMAC100_TXDES0_TXDMA_OWN) == 0)) {
+            /* Run out of descriptors to transmit.  */
+            s->isr |= FTGMAC100_INT_NO_NPTXBUF;
+            break;
+        }
+
+        /* record transmit flags as they are valid only on the first
+         * segment */
+        if (bd.des0 & FTGMAC100_TXDES0_FTS) {
+            flags = bd.des1;
+        }
+
+        len = bd.des0 & 0x3FFF;
+        if (frame_size + len > max_frame_size) {
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: frame too big : %d bytes\n",
+                          __func__, len);
+            len = max_frame_size - frame_size;
+        }
+
+        if (dma_memory_read(&address_space_memory, bd.des3, ptr, len)) {
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: failed to read packet @ 0x%x\n",
+                          __func__, bd.des3);
+            s->isr |= FTGMAC100_INT_NO_NPTXBUF;
+            break;
+        }
+
+        ptr += len;
+        frame_size += len;
+        if (bd.des0 & FTGMAC100_TXDES0_LTS) {
+            if (flags & FTGMAC100_TXDES1_IP_CHKSUM) {
+                net_checksum_calculate(s->frame, frame_size);
+            }
+            /* Last buffer in frame.  */
+            qemu_send_packet(qemu_get_queue(s->nic), s->frame, frame_size);
+            ptr = s->frame;
+            frame_size = 0;
+            if (flags & FTGMAC100_TXDES1_TXIC) {
+                s->isr |= FTGMAC100_INT_XPKT_ETH;
+            }
+        }
+
+        if (flags & FTGMAC100_TXDES1_TX2FIC) {
+            s->isr |= FTGMAC100_INT_XPKT_FIFO;
+        }
+        bd.des0 &= ~FTGMAC100_TXDES0_TXDMA_OWN;
+
+        /* Write back the modified descriptor.  */
+        ftgmac100_write_bd(&bd, addr);
+        /* Advance to the next descriptor.  */
+        if (bd.des0 & s->txdes0_edotr) {
+            addr = tx_ring;
+        } else {
+            addr += sizeof(FTGMAC100Desc);
+        }
+    }
+
+    s->tx_descriptor = addr;
+
+    ftgmac100_update_irq(s);
+}
+
+static int ftgmac100_can_receive(NetClientState *nc)
+{
+    FTGMAC100State *s = FTGMAC100(qemu_get_nic_opaque(nc));
+    FTGMAC100Desc bd;
+
+    if ((s->maccr & (FTGMAC100_MACCR_RXDMA_EN | FTGMAC100_MACCR_RXMAC_EN))
+         != (FTGMAC100_MACCR_RXDMA_EN | FTGMAC100_MACCR_RXMAC_EN)) {
+        return 0;
+    }
+
+    if (ftgmac100_read_bd(&bd, s->rx_descriptor)) {
+        return 0;
+    }
+    return !(bd.des0 & FTGMAC100_RXDES0_RXPKT_RDY);
+}
+
+/*
+ * This is purely informative. The HW can poll the RW (and RX) ring
+ * buffers for available descriptors but we don't need to trigger a
+ * timer for that in qemu.
+ */
+static uint32_t ftgmac100_rxpoll(FTGMAC100State *s)
+{
+    /* Polling times :
+     *
+     * Speed      TIME_SEL=0    TIME_SEL=1
+     *
+     *    10         51.2 ms      819.2 ms
+     *   100         5.12 ms      81.92 ms
+     *  1000        1.024 ms     16.384 ms
+     */
+    static const int div[] = { 20, 200, 1000 };
+
+    uint32_t cnt = 1024 * FTGMAC100_APTC_RXPOLL_CNT(s->aptcr);
+    uint32_t speed = (s->maccr & FTGMAC100_MACCR_FAST_MODE) ? 1 : 0;
+    uint32_t period;
+
+    if (s->aptcr & FTGMAC100_APTC_RXPOLL_TIME_SEL) {
+        cnt <<= 4;
+    }
+
+    if (s->maccr & FTGMAC100_MACCR_GIGA_MODE) {
+        speed = 2;
+    }
+
+    period = cnt / div[speed];
+
+    return period;
+}
+
+static void ftgmac100_reset(DeviceState *d)
+{
+    FTGMAC100State *s = FTGMAC100(d);
+
+    /* Reset the FTGMAC100 */
+    s->isr = 0;
+    s->ier = 0;
+    s->rx_enabled = 0;
+    s->rx_ring = 0;
+    s->rbsr = 0x640;
+    s->rx_descriptor = 0;
+    s->tx_ring = 0;
+    s->tx_descriptor = 0;
+    s->math[0] = 0;
+    s->math[1] = 0;
+    s->itc = 0;
+    s->aptcr = 1;
+    s->dblac = 0x00022f00;
+    s->revr = 0;
+    s->fear1 = 0;
+    s->tpafcr = 0xf1;
+
+    s->maccr = 0;
+    s->phycr = 0;
+    s->phydata = 0;
+    s->fcr = 0x400;
+
+    /* and the PHY */
+    phy_reset(s);
+}
+
+static uint64_t ftgmac100_read(void *opaque, hwaddr addr, unsigned size)
+{
+    FTGMAC100State *s = FTGMAC100(opaque);
+
+    switch (addr & 0xff) {
+    case FTGMAC100_ISR:
+        return s->isr;
+    case FTGMAC100_IER:
+        return s->ier;
+    case FTGMAC100_MAC_MADR:
+        return (s->conf.macaddr.a[0] << 8)  | s->conf.macaddr.a[1];
+    case FTGMAC100_MAC_LADR:
+        return ((uint32_t) s->conf.macaddr.a[2] << 24) |
+            (s->conf.macaddr.a[3] << 16) | (s->conf.macaddr.a[4] << 8) |
+            s->conf.macaddr.a[5];
+    case FTGMAC100_MATH0:
+        return s->math[0];
+    case FTGMAC100_MATH1:
+        return s->math[1];
+    case FTGMAC100_ITC:
+        return s->itc;
+    case FTGMAC100_DBLAC:
+        return s->dblac;
+    case FTGMAC100_REVR:
+        return s->revr;
+    case FTGMAC100_FEAR1:
+        return s->fear1;
+    case FTGMAC100_TPAFCR:
+        return s->tpafcr;
+    case FTGMAC100_FCR:
+        return s->fcr;
+    case FTGMAC100_MACCR:
+        return s->maccr;
+    case FTGMAC100_PHYCR:
+        return s->phycr;
+    case FTGMAC100_PHYDATA:
+        return s->phydata;
+
+        /* We might want to support these one day */
+    case FTGMAC100_HPTXPD: /* High Priority Transmit Poll Demand */
+    case FTGMAC100_HPTXR_BADR: /* High Priority Transmit Ring Base Address */
+    case FTGMAC100_MACSR: /* MAC Status Register (MACSR) */
+        qemu_log_mask(LOG_UNIMP, "%s: read to unimplemented register 0x%"
+                      HWADDR_PRIx "\n", __func__, addr);
+        return 0;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad address at offset 0x%"
+                      HWADDR_PRIx "\n", __func__, addr);
+        return 0;
+    }
+}
+
+static void ftgmac100_write(void *opaque, hwaddr addr,
+                          uint64_t value, unsigned size)
+{
+    FTGMAC100State *s = FTGMAC100(opaque);
+    int reg;
+
+    switch (addr & 0xff) {
+    case FTGMAC100_ISR: /* Interrupt status */
+        s->isr &= ~value;
+        break;
+    case FTGMAC100_IER: /* Interrupt control */
+        s->ier = value;
+        break;
+    case FTGMAC100_MAC_MADR: /* MAC */
+        s->conf.macaddr.a[0] = value >> 8;
+        s->conf.macaddr.a[1] = value;
+        break;
+    case FTGMAC100_MAC_LADR:
+        s->conf.macaddr.a[2] = value >> 24;
+        s->conf.macaddr.a[3] = value >> 16;
+        s->conf.macaddr.a[4] = value >> 8;
+        s->conf.macaddr.a[5] = value;
+        break;
+    case FTGMAC100_MATH0: /* Multicast Address Hash Table 0 */
+        s->math[0] = value;
+        break;
+    case FTGMAC100_MATH1: /* Multicast Address Hash Table 1 */
+        s->math[1] = value;
+        break;
+    case FTGMAC100_ITC: /* TODO: Interrupt Timer Control */
+        s->itc = value;
+        break;
+    case FTGMAC100_RXR_BADR: /* Ring buffer address */
+        s->rx_ring = value;
+        s->rx_descriptor = s->rx_ring;
+        break;
+
+    case FTGMAC100_RBSR: /* DMA buffer size */
+        s->rbsr = value;
+        break;
+
+    case FTGMAC100_NPTXR_BADR: /* Transmit buffer address */
+        s->tx_ring = value;
+        s->tx_descriptor = s->tx_ring;
+        break;
+
+    case FTGMAC100_NPTXPD: /* Trigger transmit */
+        if ((s->maccr & (FTGMAC100_MACCR_TXDMA_EN | FTGMAC100_MACCR_TXMAC_EN))
+            == (FTGMAC100_MACCR_TXDMA_EN | FTGMAC100_MACCR_TXMAC_EN)) {
+            /* TODO: high priority tx ring */
+            ftgmac100_do_tx(s, s->tx_ring, s->tx_descriptor);
+        }
+        if (ftgmac100_can_receive(qemu_get_queue(s->nic))) {
+            qemu_flush_queued_packets(qemu_get_queue(s->nic));
+        }
+        break;
+
+    case FTGMAC100_RXPD: /* Receive Poll Demand Register */
+        if (ftgmac100_can_receive(qemu_get_queue(s->nic))) {
+            qemu_flush_queued_packets(qemu_get_queue(s->nic));
+        }
+        break;
+
+    case FTGMAC100_APTC: /* Automatic polling */
+        s->aptcr = value;
+
+        if (FTGMAC100_APTC_RXPOLL_CNT(s->aptcr)) {
+            ftgmac100_rxpoll(s);
+        }
+
+        if (FTGMAC100_APTC_TXPOLL_CNT(s->aptcr)) {
+            qemu_log_mask(LOG_UNIMP, "%s: no transmit polling\n", __func__);
+        }
+        break;
+
+    case FTGMAC100_MACCR: /* MAC Device control */
+        s->maccr = value;
+        if (value & FTGMAC100_MACCR_SW_RST) {
+            ftgmac100_reset(DEVICE(s));
+        }
+
+        if (ftgmac100_can_receive(qemu_get_queue(s->nic))) {
+            qemu_flush_queued_packets(qemu_get_queue(s->nic));
+        }
+        break;
+
+    case FTGMAC100_PHYCR:  /* PHY Device control */
+        reg = FTGMAC100_PHYCR_REG(value);
+        s->phycr = value;
+        if (value & FTGMAC100_PHYCR_MIIWR) {
+            do_phy_write(s, reg, s->phydata & 0xffff);
+            s->phycr &= ~FTGMAC100_PHYCR_MIIWR;
+        } else {
+            s->phydata = do_phy_read(s, reg) << 16;
+            s->phycr &= ~FTGMAC100_PHYCR_MIIRD;
+        }
+        break;
+    case FTGMAC100_PHYDATA:
+        s->phydata = value & 0xffff;
+        break;
+    case FTGMAC100_DBLAC: /* DMA Burst Length and Arbitration Control */
+        s->dblac = value;
+        break;
+    case FTGMAC100_REVR:  /* Feature Register */
+        /* TODO: Only Old MDIO interface is supported */
+        s->revr = value & ~FTGMAC100_REVR_NEW_MDIO_INTERFACE;
+        break;
+    case FTGMAC100_FEAR1: /* Feature Register 1 */
+        s->fear1 = value;
+        break;
+    case FTGMAC100_TPAFCR: /* Transmit Priority Arbitration and FIFO Control */
+        s->tpafcr = value;
+        break;
+    case FTGMAC100_FCR: /* Flow Control  */
+        s->fcr  = value;
+        break;
+
+    case FTGMAC100_HPTXPD: /* High Priority Transmit Poll Demand */
+    case FTGMAC100_HPTXR_BADR: /* High Priority Transmit Ring Base Address */
+    case FTGMAC100_MACSR: /* MAC Status Register (MACSR) */
+        qemu_log_mask(LOG_UNIMP, "%s: write to unimplemented register 0x%"
+                      HWADDR_PRIx "\n", __func__, addr);
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad address at offset 0x%"
+                      HWADDR_PRIx "\n", __func__, addr);
+        break;
+    }
+
+    ftgmac100_update_irq(s);
+}
+
+static int ftgmac100_filter(FTGMAC100State *s, const uint8_t *buf, size_t len)
+{
+    unsigned mcast_idx;
+
+    if (s->maccr & FTGMAC100_MACCR_RX_ALL) {
+        return 1;
+    }
+
+    switch (get_eth_packet_type(PKT_GET_ETH_HDR(buf))) {
+    case ETH_PKT_BCAST:
+        if (!(s->maccr & FTGMAC100_MACCR_RX_BROADPKT)) {
+            return 0;
+        }
+        break;
+    case ETH_PKT_MCAST:
+        if (!(s->maccr & FTGMAC100_MACCR_RX_MULTIPKT)) {
+            if (!(s->maccr & FTGMAC100_MACCR_HT_MULTI_EN)) {
+                return 0;
+            }
+
+            /* TODO: this does not seem to work for ftgmac100 */
+            mcast_idx = compute_mcast_idx(buf);
+            if (!(s->math[mcast_idx / 32] & (1 << (mcast_idx % 32)))) {
+                return 0;
+            }
+        }
+        break;
+    case ETH_PKT_UCAST:
+        if (memcmp(s->conf.macaddr.a, buf, 6)) {
+            return 0;
+        }
+        break;
+    }
+
+    return 1;
+}
+
+static ssize_t ftgmac100_receive(NetClientState *nc, const uint8_t *buf,
+                                 size_t len)
+{
+    FTGMAC100State *s = FTGMAC100(qemu_get_nic_opaque(nc));
+    FTGMAC100Desc bd;
+    uint32_t flags = 0;
+    uint32_t addr;
+    uint32_t crc;
+    uint32_t buf_addr;
+    uint8_t *crc_ptr;
+    uint32_t buf_len;
+    size_t size = len;
+    uint32_t first = FTGMAC100_RXDES0_FRS;
+    int max_frame_size = ftgmac100_max_frame_size(s);
+
+    if ((s->maccr & (FTGMAC100_MACCR_RXDMA_EN | FTGMAC100_MACCR_RXMAC_EN))
+         != (FTGMAC100_MACCR_RXDMA_EN | FTGMAC100_MACCR_RXMAC_EN)) {
+        return -1;
+    }
+
+    /* TODO : Pad to minimum Ethernet frame length */
+    /* handle small packets.  */
+    if (size < 10) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: dropped frame of %zd bytes\n",
+                      __func__, size);
+        return size;
+    }
+
+    if (size < 64 && !(s->maccr & FTGMAC100_MACCR_RX_RUNT)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: dropped runt frame of %zd bytes\n",
+                      __func__, size);
+        return size;
+    }
+
+    if (!ftgmac100_filter(s, buf, size)) {
+        return size;
+    }
+
+    /* 4 bytes for the CRC.  */
+    size += 4;
+    crc = cpu_to_be32(crc32(~0, buf, size));
+    crc_ptr = (uint8_t *) &crc;
+
+    /* Huge frames are truncated.  */
+    if (size > max_frame_size) {
+        size = max_frame_size;
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: frame too big : %zd bytes\n",
+                      __func__, size);
+        flags |= FTGMAC100_RXDES0_FTL;
+    }
+
+    switch (get_eth_packet_type(PKT_GET_ETH_HDR(buf))) {
+    case ETH_PKT_BCAST:
+        flags |= FTGMAC100_RXDES0_BROADCAST;
+        break;
+    case ETH_PKT_MCAST:
+        flags |= FTGMAC100_RXDES0_MULTICAST;
+        break;
+    case ETH_PKT_UCAST:
+        break;
+    }
+
+    addr = s->rx_descriptor;
+    while (size > 0) {
+        if (!ftgmac100_can_receive(nc)) {
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: Unexpected packet\n", __func__);
+            return -1;
+        }
+
+        if (ftgmac100_read_bd(&bd, addr) ||
+            (bd.des0 & FTGMAC100_RXDES0_RXPKT_RDY)) {
+            /* No descriptors available.  Bail out.  */
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: Lost end of frame\n",
+                          __func__);
+            s->isr |= FTGMAC100_INT_NO_RXBUF;
+            break;
+        }
+        buf_len = (size <= s->rbsr) ? size : s->rbsr;
+        bd.des0 |= buf_len & 0x3fff;
+        size -= buf_len;
+
+        /* The last 4 bytes are the CRC.  */
+        if (size < 4) {
+            buf_len += size - 4;
+        }
+        buf_addr = bd.des3;
+        dma_memory_write(&address_space_memory, buf_addr, buf, buf_len);
+        buf += buf_len;
+        if (size < 4) {
+            dma_memory_write(&address_space_memory, buf_addr + buf_len,
+                             crc_ptr, 4 - size);
+            crc_ptr += 4 - size;
+        }
+
+        bd.des0 |= first | FTGMAC100_RXDES0_RXPKT_RDY;
+        first = 0;
+        if (size == 0) {
+            /* Last buffer in frame.  */
+            bd.des0 |= flags | FTGMAC100_RXDES0_LRS;
+            s->isr |= FTGMAC100_INT_RPKT_BUF;
+        } else {
+            s->isr |= FTGMAC100_INT_RPKT_FIFO;
+        }
+        ftgmac100_write_bd(&bd, addr);
+        if (bd.des0 & s->rxdes0_edorr) {
+            addr = s->rx_ring;
+        } else {
+            addr += sizeof(FTGMAC100Desc);
+        }
+    }
+    s->rx_descriptor = addr;
+
+    ftgmac100_update_irq(s);
+    return len;
+}
+
+static const MemoryRegionOps ftgmac100_ops = {
+    .read = ftgmac100_read,
+    .write = ftgmac100_write,
+    .valid.min_access_size = 4,
+    .valid.max_access_size = 4,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void ftgmac100_cleanup(NetClientState *nc)
+{
+    FTGMAC100State *s = FTGMAC100(qemu_get_nic_opaque(nc));
+
+    s->nic = NULL;
+}
+
+static NetClientInfo net_ftgmac100_info = {
+    .type = NET_CLIENT_DRIVER_NIC,
+    .size = sizeof(NICState),
+    .can_receive = ftgmac100_can_receive,
+    .receive = ftgmac100_receive,
+    .cleanup = ftgmac100_cleanup,
+    .link_status_changed = ftgmac100_set_link,
+};
+
+static void ftgmac100_realize(DeviceState *dev, Error **errp)
+{
+    FTGMAC100State *s = FTGMAC100(dev);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+
+    if (s->aspeed) {
+        s->txdes0_edotr = FTGMAC100_TXDES0_EDOTR_ASPEED;
+        s->rxdes0_edorr = FTGMAC100_RXDES0_EDORR_ASPEED;
+    } else {
+        s->txdes0_edotr = FTGMAC100_TXDES0_EDOTR;
+        s->rxdes0_edorr = FTGMAC100_RXDES0_EDORR;
+    }
+
+    memory_region_init_io(&s->iomem, OBJECT(dev), &ftgmac100_ops, s,
+                          TYPE_FTGMAC100, 0x2000);
+    sysbus_init_mmio(sbd, &s->iomem);
+    sysbus_init_irq(sbd, &s->irq);
+    qemu_macaddr_default_if_unset(&s->conf.macaddr);
+
+    s->conf.peers.ncs[0] = nd_table[0].netdev;
+
+    s->nic = qemu_new_nic(&net_ftgmac100_info, &s->conf,
+                          object_get_typename(OBJECT(dev)), DEVICE(dev)->id,
+                          s);
+    qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
+
+    s->frame = g_malloc(FTGMAC100_MAX_FRAME_SIZE);
+}
+
+static const VMStateDescription vmstate_ftgmac100 = {
+    .name = TYPE_FTGMAC100,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(irq_state, FTGMAC100State),
+        VMSTATE_UINT32(isr, FTGMAC100State),
+        VMSTATE_UINT32(ier, FTGMAC100State),
+        VMSTATE_UINT32(rx_enabled, FTGMAC100State),
+        VMSTATE_UINT32(rx_ring, FTGMAC100State),
+        VMSTATE_UINT32(rbsr, FTGMAC100State),
+        VMSTATE_UINT32(tx_ring, FTGMAC100State),
+        VMSTATE_UINT32(rx_descriptor, FTGMAC100State),
+        VMSTATE_UINT32(tx_descriptor, FTGMAC100State),
+        VMSTATE_UINT32_ARRAY(math, FTGMAC100State, 2),
+        VMSTATE_UINT32(itc, FTGMAC100State),
+        VMSTATE_UINT32(aptcr, FTGMAC100State),
+        VMSTATE_UINT32(dblac, FTGMAC100State),
+        VMSTATE_UINT32(revr, FTGMAC100State),
+        VMSTATE_UINT32(fear1, FTGMAC100State),
+        VMSTATE_UINT32(tpafcr, FTGMAC100State),
+        VMSTATE_UINT32(maccr, FTGMAC100State),
+        VMSTATE_UINT32(phycr, FTGMAC100State),
+        VMSTATE_UINT32(phydata, FTGMAC100State),
+        VMSTATE_UINT32(fcr, FTGMAC100State),
+        VMSTATE_UINT32(phy_status, FTGMAC100State),
+        VMSTATE_UINT32(phy_control, FTGMAC100State),
+        VMSTATE_UINT32(phy_advertise, FTGMAC100State),
+        VMSTATE_UINT32(phy_int, FTGMAC100State),
+        VMSTATE_UINT32(phy_int_mask, FTGMAC100State),
+        VMSTATE_UINT32(txdes0_edotr, FTGMAC100State),
+        VMSTATE_UINT32(rxdes0_edorr, FTGMAC100State),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static Property ftgmac100_properties[] = {
+    DEFINE_PROP_BOOL("aspeed", FTGMAC100State, aspeed, false),
+    DEFINE_NIC_PROPERTIES(FTGMAC100State, conf),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void ftgmac100_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->vmsd = &vmstate_ftgmac100;
+    dc->reset = ftgmac100_reset;
+    dc->props = ftgmac100_properties;
+    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
+    dc->realize = ftgmac100_realize;
+    dc->desc = "Faraday FTGMAC100 Gigabit Ethernet emulation";
+}
+
+static const TypeInfo ftgmac100_info = {
+    .name = TYPE_FTGMAC100,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(FTGMAC100State),
+    .class_init = ftgmac100_class_init,
+};
+
+static void ftgmac100_register_types(void)
+{
+    type_register_static(&ftgmac100_info);
+}
+
+type_init(ftgmac100_register_types)
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index c32168077a..7d091c9259 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -510,6 +510,10 @@ static int peer_attach(VirtIONet *n, int index)
         return 0;
     }
 
+    if (n->max_queues == 1) {
+        return 0;
+    }
+
     return tap_enable(nc->peer);
 }
 
diff --git a/hw/pci-host/versatile.c b/hw/pci-host/versatile.c
index 467cbb9cb8..27fde46126 100644
--- a/hw/pci-host/versatile.c
+++ b/hw/pci-host/versatile.c
@@ -380,20 +380,8 @@ static void pci_vpb_reset(DeviceState *d)
 
 static void pci_vpb_init(Object *obj)
 {
-    PCIHostState *h = PCI_HOST_BRIDGE(obj);
     PCIVPBState *s = PCI_VPB(obj);
 
-    memory_region_init(&s->pci_io_space, OBJECT(s), "pci_io", 1ULL << 32);
-    memory_region_init(&s->pci_mem_space, OBJECT(s), "pci_mem", 1ULL << 32);
-
-    pci_bus_new_inplace(&s->pci_bus, sizeof(s->pci_bus), DEVICE(obj), "pci",
-                        &s->pci_mem_space, &s->pci_io_space,
-                        PCI_DEVFN(11, 0), TYPE_PCI_BUS);
-    h->bus = &s->pci_bus;
-
-    object_initialize(&s->pci_dev, sizeof(s->pci_dev), TYPE_VERSATILE_PCI_HOST);
-    qdev_set_parent_bus(DEVICE(&s->pci_dev), BUS(&s->pci_bus));
-
     /* Window sizes for VersatilePB; realview_pci's init will override */
     s->mem_win_size[0] = 0x0c000000;
     s->mem_win_size[1] = 0x10000000;
@@ -403,10 +391,22 @@ static void pci_vpb_init(Object *obj)
 static void pci_vpb_realize(DeviceState *dev, Error **errp)
 {
     PCIVPBState *s = PCI_VPB(dev);
+    PCIHostState *h = PCI_HOST_BRIDGE(dev);
     SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
     pci_map_irq_fn mapfn;
     int i;
 
+    memory_region_init(&s->pci_io_space, OBJECT(s), "pci_io", 1ULL << 32);
+    memory_region_init(&s->pci_mem_space, OBJECT(s), "pci_mem", 1ULL << 32);
+
+    pci_bus_new_inplace(&s->pci_bus, sizeof(s->pci_bus), dev, "pci",
+                        &s->pci_mem_space, &s->pci_io_space,
+                        PCI_DEVFN(11, 0), TYPE_PCI_BUS);
+    h->bus = &s->pci_bus;
+
+    object_initialize(&s->pci_dev, sizeof(s->pci_dev), TYPE_VERSATILE_PCI_HOST);
+    qdev_set_parent_bus(DEVICE(&s->pci_dev), BUS(&s->pci_bus));
+
     for (i = 0; i < 4; i++) {
         sysbus_init_irq(sbd, &s->irq[i]);
     }
@@ -503,8 +503,6 @@ static void pci_vpb_class_init(ObjectClass *klass, void *data)
     dc->reset = pci_vpb_reset;
     dc->vmsd = &pci_vpb_vmstate;
     dc->props = pci_vpb_properties;
-    /* Reason: object_unref() hangs */
-    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static const TypeInfo pci_vpb_info = {
@@ -526,19 +524,10 @@ static void pci_realview_init(Object *obj)
     s->mem_win_size[2] = 0x08000000;
 }
 
-static void pci_realview_class_init(ObjectClass *class, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(class);
-
-    /* Reason: object_unref() hangs */
-    dc->cannot_destroy_with_object_finalize_yet = true;
-}
-
 static const TypeInfo pci_realview_info = {
     .name          = "realview_pci",
     .parent        = TYPE_VERSATILE_PCI,
     .instance_init = pci_realview_init,
-    .class_init    = pci_realview_class_init,
 };
 
 static void versatile_pci_register_types(void)
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index e6b08e1988..259483b1c0 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -869,6 +869,10 @@ static void do_pci_unregister_device(PCIDevice *pci_dev)
     pci_dev->bus->devices[pci_dev->devfn] = NULL;
     pci_config_free(pci_dev);
 
+    if (memory_region_is_mapped(&pci_dev->bus_master_enable_region)) {
+        memory_region_del_subregion(&pci_dev->bus_master_container_region,
+                                    &pci_dev->bus_master_enable_region);
+    }
     address_space_destroy(&pci_dev->bus_master_as);
 }
 
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 6ee566d658..35db949dbc 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1524,16 +1524,16 @@ static void htab_save_first_pass(QEMUFile *f, sPAPRMachineState *spapr,
         /* Consume invalid HPTEs */
         while ((index < htabslots)
                && !HPTE_VALID(HPTE(spapr->htab, index))) {
-            index++;
             CLEAN_HPTE(HPTE(spapr->htab, index));
+            index++;
         }
 
         /* Consume valid HPTEs */
         chunkstart = index;
         while ((index < htabslots) && (index - chunkstart < USHRT_MAX)
                && HPTE_VALID(HPTE(spapr->htab, index))) {
-            index++;
             CLEAN_HPTE(HPTE(spapr->htab, index));
+            index++;
         }
 
         if (index > chunkstart) {
@@ -2790,6 +2790,12 @@ static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
         goto out;
     }
 
+    if (cc->nr_threads != smp_threads) {
+        error_setg(errp, "invalid nr-threads %d, must be %d",
+                   cc->nr_threads, smp_threads);
+        return;
+    }
+
     core_slot = spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index);
     if (!core_slot) {
         error_setg(&local_err, "core id %d out of range", cc->core_id);
@@ -3096,6 +3102,11 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     xic->ics_resend = spapr_ics_resend;
     xic->icp_get = spapr_icp_get;
     ispc->print_info = spapr_pic_print_info;
+    /* Force NUMA node memory size to be a multiple of
+     * SPAPR_MEMORY_BLOCK_SIZE (256M) since that's the granularity
+     * in which LMBs are represented and hot-added
+     */
+    mc->numa_mem_align_shift = 28;
 }
 
 static const TypeInfo spapr_machine_info = {
@@ -3180,6 +3191,7 @@ static void spapr_machine_2_8_class_options(MachineClass *mc)
 {
     spapr_machine_2_9_class_options(mc);
     SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_8);
+    mc->numa_mem_align_shift = 23;
 }
 
 DEFINE_SPAPR_MACHINE(2_8, "2.8", false);
diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index 150f6bf2c7..a1cdc875b1 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -135,6 +135,17 @@ static uint32_t set_allocation_state(sPAPRDRConnector *drc,
         if (!drc->dev) {
             return RTAS_OUT_NO_SUCH_INDICATOR;
         }
+        if (drc->awaiting_release && drc->awaiting_allocation) {
+            /* kernel is acknowledging a previous hotplug event
+             * while we are already removing it.
+             * it's safe to ignore awaiting_allocation here since we know the
+             * situation is predicated on the guest either already having done
+             * so (boot-time hotplug), or never being able to acquire in the
+             * first place (hotplug followed by immediate unplug).
+             */
+            drc->awaiting_allocation_skippable = true;
+            return RTAS_OUT_NO_SUCH_INDICATOR;
+        }
     }
 
     if (drc->type != SPAPR_DR_CONNECTOR_TYPE_PCI) {
@@ -436,9 +447,11 @@ static void detach(sPAPRDRConnector *drc, DeviceState *d,
     }
 
     if (drc->awaiting_allocation) {
-        drc->awaiting_release = true;
-        trace_spapr_drc_awaiting_allocation(get_index(drc));
-        return;
+        if (!drc->awaiting_allocation_skippable) {
+            drc->awaiting_release = true;
+            trace_spapr_drc_awaiting_allocation(get_index(drc));
+            return;
+        }
     }
 
     drc->indicator_state = SPAPR_DR_INDICATOR_STATE_INACTIVE;
@@ -448,6 +461,7 @@ static void detach(sPAPRDRConnector *drc, DeviceState *d,
     }
 
     drc->awaiting_release = false;
+    drc->awaiting_allocation_skippable = false;
     g_free(drc->fdt);
     drc->fdt = NULL;
     drc->fdt_start_offset = 0;
diff --git a/hw/s390x/ccw-device.c b/hw/s390x/ccw-device.c
index 28ea20440e..fb8d640a7e 100644
--- a/hw/s390x/ccw-device.c
+++ b/hw/s390x/ccw-device.c
@@ -11,11 +11,51 @@
 #include "qemu/osdep.h"
 #include "ccw-device.h"
 
+static void ccw_device_refill_ids(CcwDevice *dev)
+{
+    SubchDev *sch = dev->sch;
+
+    assert(sch);
+
+    dev->dev_id.cssid = sch->cssid;
+    dev->dev_id.ssid = sch->ssid;
+    dev->dev_id.devid = sch->devno;
+    dev->dev_id.valid = true;
+
+    dev->subch_id.cssid = sch->cssid;
+    dev->subch_id.ssid = sch->ssid;
+    dev->subch_id.devid = sch->schid;
+    dev->subch_id.valid = true;
+}
+
+static void ccw_device_realize(CcwDevice *dev, Error **errp)
+{
+    ccw_device_refill_ids(dev);
+}
+
+static Property ccw_device_properties[] = {
+    DEFINE_PROP_CSS_DEV_ID("devno", CcwDevice, devno),
+    DEFINE_PROP_CSS_DEV_ID_RO("dev_id", CcwDevice, dev_id),
+    DEFINE_PROP_CSS_DEV_ID_RO("subch_id", CcwDevice, subch_id),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void ccw_device_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    CCWDeviceClass *k = CCW_DEVICE_CLASS(klass);
+
+    k->realize = ccw_device_realize;
+    k->refill_ids = ccw_device_refill_ids;
+    dc->props = ccw_device_properties;
+}
+
 static const TypeInfo ccw_device_info = {
     .name = TYPE_CCW_DEVICE,
     .parent = TYPE_DEVICE,
     .instance_size = sizeof(CcwDevice),
     .class_size = sizeof(CCWDeviceClass),
+    .class_init = ccw_device_class_init,
     .abstract = true,
 };
 
diff --git a/hw/s390x/ccw-device.h b/hw/s390x/ccw-device.h
index 59ba01b6c5..89c8e5dff7 100644
--- a/hw/s390x/ccw-device.h
+++ b/hw/s390x/ccw-device.h
@@ -19,12 +19,19 @@ typedef struct CcwDevice {
     DeviceState parent_obj;
     SubchDev *sch;
     /* <cssid>.<ssid>.<device number> */
-    CssDevId bus_id;
+    /* The user-set busid of the virtual ccw device. */
+    CssDevId devno;
+    /* The actual busid of the virtual ccw device. */
+    CssDevId dev_id;
+    /* The actual busid of the virtual subchannel. */
+    CssDevId subch_id;
 } CcwDevice;
 
 typedef struct CCWDeviceClass {
     DeviceClass parent_class;
     void (*unplug)(HotplugHandler *, DeviceState *, Error **);
+    void (*realize)(CcwDevice *, Error **);
+    void (*refill_ids)(CcwDevice *);
 } CCWDeviceClass;
 
 static inline CcwDevice *to_ccw_dev_fast(DeviceState *d)
diff --git a/hw/s390x/css-bridge.c b/hw/s390x/css-bridge.c
index 9a7f7ee60c..b54ac01d37 100644
--- a/hw/s390x/css-bridge.c
+++ b/hw/s390x/css-bridge.c
@@ -107,6 +107,9 @@ VirtualCssBus *virtual_css_bus_init(void)
     /* Enable hotplugging */
     qbus_set_hotplug_handler(bus, dev, &error_abort);
 
+    css_register_io_adapters(CSS_IO_ADAPTER_VIRTIO, true, false,
+                             &error_abort);
+
     return cbus;
  }
 
diff --git a/hw/s390x/css.c b/hw/s390x/css.c
index 37caa98195..c03bb20bc9 100644
--- a/hw/s390x/css.c
+++ b/hw/s390x/css.c
@@ -47,7 +47,6 @@ typedef struct IoAdapter {
     uint32_t id;
     uint8_t type;
     uint8_t isc;
-    QTAILQ_ENTRY(IoAdapter) sibling;
 } IoAdapter;
 
 typedef struct ChannelSubSys {
@@ -61,7 +60,7 @@ typedef struct ChannelSubSys {
     uint64_t chnmon_area;
     CssImage *css[MAX_CSSID + 1];
     uint8_t default_cssid;
-    QTAILQ_HEAD(, IoAdapter) io_adapters;
+    IoAdapter *io_adapters[CSS_IO_ADAPTER_TYPE_NUMS][MAX_ISC + 1];
     QTAILQ_HEAD(, IndAddr) indicator_addresses;
 } ChannelSubSys;
 
@@ -72,7 +71,6 @@ static ChannelSubSys channel_subsys = {
     .do_crw_mchk = true,
     .crws_lost = false,
     .chnmon_active = false,
-    .io_adapters = QTAILQ_HEAD_INITIALIZER(channel_subsys.io_adapters),
     .indicator_addresses =
         QTAILQ_HEAD_INITIALIZER(channel_subsys.indicator_addresses),
 };
@@ -155,44 +153,67 @@ int css_create_css_image(uint8_t cssid, bool default_image)
     return 0;
 }
 
-int css_register_io_adapter(uint8_t type, uint8_t isc, bool swap,
-                            bool maskable, uint32_t *id)
+uint32_t css_get_adapter_id(CssIoAdapterType type, uint8_t isc)
 {
+    if (type >= CSS_IO_ADAPTER_TYPE_NUMS || isc > MAX_ISC ||
+        !channel_subsys.io_adapters[type][isc]) {
+        return -1;
+    }
+
+    return channel_subsys.io_adapters[type][isc]->id;
+}
+
+/**
+ * css_register_io_adapters: Register I/O adapters per ISC during init
+ *
+ * @swap: an indication if byte swap is needed.
+ * @maskable: an indication if the adapter is subject to the mask operation.
+ * @errp: location to store error information.
+ */
+void css_register_io_adapters(CssIoAdapterType type, bool swap, bool maskable,
+                              Error **errp)
+{
+    uint32_t id;
+    int ret, isc;
     IoAdapter *adapter;
-    bool found = false;
-    int ret;
     S390FLICState *fs = s390_get_flic();
     S390FLICStateClass *fsc = S390_FLIC_COMMON_GET_CLASS(fs);
 
-    *id = 0;
-    QTAILQ_FOREACH(adapter, &channel_subsys.io_adapters, sibling) {
-        if ((adapter->type == type) && (adapter->isc == isc)) {
-            *id = adapter->id;
-            found = true;
-            ret = 0;
+    /*
+     * Disallow multiple registrations for the same device type.
+     * Report an error if registering for an already registered type.
+     */
+    if (channel_subsys.io_adapters[type][0]) {
+        error_setg(errp, "Adapters for type %d already registered", type);
+    }
+
+    for (isc = 0; isc <= MAX_ISC; isc++) {
+        id = (type << 3) | isc;
+        ret = fsc->register_io_adapter(fs, id, isc, swap, maskable);
+        if (ret == 0) {
+            adapter = g_new0(IoAdapter, 1);
+            adapter->id = id;
+            adapter->isc = isc;
+            adapter->type = type;
+            channel_subsys.io_adapters[type][isc] = adapter;
+        } else {
+            error_setg_errno(errp, -ret, "Unexpected error %d when "
+                             "registering adapter %d", ret, id);
             break;
         }
-        if (adapter->id >= *id) {
-            *id = adapter->id + 1;
-        }
-    }
-    if (found) {
-        goto out;
     }
-    adapter = g_new0(IoAdapter, 1);
-    ret = fsc->register_io_adapter(fs, *id, isc, swap, maskable);
-    if (ret == 0) {
-        adapter->id = *id;
-        adapter->isc = isc;
-        adapter->type = type;
-        QTAILQ_INSERT_TAIL(&channel_subsys.io_adapters, adapter, sibling);
-    } else {
-        g_free(adapter);
-        fprintf(stderr, "Unexpected error %d when registering adapter %d\n",
-                ret, *id);
+
+    /*
+     * No need to free registered adapters in kvm: kvm will clean up
+     * when the machine goes away.
+     */
+    if (ret) {
+        for (isc--; isc >= 0; isc--) {
+            g_free(channel_subsys.io_adapters[type][isc]);
+            channel_subsys.io_adapters[type][isc] = NULL;
+        }
     }
-out:
-    return ret;
+
 }
 
 static void css_clear_io_interrupt(uint16_t subchannel_id,
@@ -1894,6 +1915,13 @@ PropertyInfo css_devid_propinfo = {
     .set = set_css_devid,
 };
 
+PropertyInfo css_devid_ro_propinfo = {
+    .name = "str",
+    .description = "Read-only identifier of an I/O device in the channel "
+                   "subsystem, example: fe.1.23ab",
+    .get = get_css_devid,
+};
+
 SubchDev *css_create_virtual_sch(CssDevId bus_id, Error **errp)
 {
     uint16_t schid = 0;
diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 69b0291e8a..a8a1bab50a 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -23,15 +23,17 @@
 #include "hw/pci/msi.h"
 #include "qemu/error-report.h"
 
-/* #define DEBUG_S390PCI_BUS */
-#ifdef DEBUG_S390PCI_BUS
-#define DPRINTF(fmt, ...) \
-    do { fprintf(stderr, "S390pci-bus: " fmt, ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTF(fmt, ...) \
-    do { } while (0)
+#ifndef DEBUG_S390PCI_BUS
+#define DEBUG_S390PCI_BUS  0
 #endif
 
+#define DPRINTF(fmt, ...)                                         \
+    do {                                                          \
+        if (DEBUG_S390PCI_BUS) {                                  \
+            fprintf(stderr, "S390pci-bus: " fmt, ## __VA_ARGS__); \
+        }                                                         \
+    } while (0)
+
 S390pciState *s390_get_phb(void)
 {
     static S390pciState *phb;
@@ -579,6 +581,9 @@ static int s390_pcihost_init(SysBusDevice *dev)
     s->bus_no = 0;
     QTAILQ_INIT(&s->pending_sei);
     QTAILQ_INIT(&s->zpci_devs);
+
+    css_register_io_adapters(CSS_IO_ADAPTER_PCI, true, false, &error_abort);
+
     return 0;
 }
 
diff --git a/hw/s390x/s390-pci-bus.h b/hw/s390x/s390-pci-bus.h
index dcbf4820c9..cf142a3e68 100644
--- a/hw/s390x/s390-pci-bus.h
+++ b/hw/s390x/s390-pci-bus.h
@@ -30,7 +30,6 @@
 #define FH_MASK_INDEX    0x0000ffff
 #define FH_SHM_VFIO      0x00010000
 #define FH_SHM_EMUL      0x00020000
-#define S390_PCIPT_ADAPTER 2
 #define ZPCI_MAX_FID 0xffffffff
 #define ZPCI_MAX_UID 0xffff
 #define UID_UNDEFINED 0
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index d2a8c0a083..314a9cbad4 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -20,15 +20,17 @@
 #include "qemu/error-report.h"
 #include "sysemu/hw_accel.h"
 
-/* #define DEBUG_S390PCI_INST */
-#ifdef DEBUG_S390PCI_INST
-#define DPRINTF(fmt, ...) \
-    do { fprintf(stderr, "s390pci-inst: " fmt, ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTF(fmt, ...) \
-    do { } while (0)
+#ifndef DEBUG_S390PCI_INST
+#define DEBUG_S390PCI_INST  0
 #endif
 
+#define DPRINTF(fmt, ...)                                          \
+    do {                                                           \
+        if (DEBUG_S390PCI_INST) {                                  \
+            fprintf(stderr, "s390pci-inst: " fmt, ## __VA_ARGS__); \
+        }                                                          \
+    } while (0)
+
 static void s390_set_status_code(CPUS390XState *env,
                                  uint8_t r, uint64_t status_code)
 {
@@ -731,12 +733,10 @@ int pcistb_service_call(S390CPU *cpu, uint8_t r1, uint8_t r3, uint64_t gaddr,
 static int reg_irqs(CPUS390XState *env, S390PCIBusDevice *pbdev, ZpciFib fib)
 {
     int ret, len;
+    uint8_t isc = FIB_DATA_ISC(ldl_p(&fib.data));
 
-    ret = css_register_io_adapter(S390_PCIPT_ADAPTER,
-                                  FIB_DATA_ISC(ldl_p(&fib.data)), true, false,
-                                  &pbdev->routes.adapter.adapter_id);
-    assert(ret == 0);
-
+    pbdev->routes.adapter.adapter_id = css_get_adapter_id(
+                                       CSS_IO_ADAPTER_PCI, isc);
     pbdev->summary_ind = get_indicator(ldq_p(&fib.aisb), sizeof(uint64_t));
     len = BITS_TO_LONGS(FIB_DATA_NOI(ldl_p(&fib.data))) * sizeof(unsigned long);
     pbdev->indicator = get_indicator(ldq_p(&fib.aibv), len);
@@ -755,7 +755,7 @@ static int reg_irqs(CPUS390XState *env, S390PCIBusDevice *pbdev, ZpciFib fib)
     pbdev->routes.adapter.summary_offset = FIB_DATA_AISBO(ldl_p(&fib.data));
     pbdev->routes.adapter.ind_addr = ldq_p(&fib.aibv);
     pbdev->routes.adapter.ind_offset = FIB_DATA_AIBVO(ldl_p(&fib.data));
-    pbdev->isc = FIB_DATA_ISC(ldl_p(&fib.data));
+    pbdev->isc = isc;
     pbdev->noi = FIB_DATA_NOI(ldl_p(&fib.data));
     pbdev->sum = FIB_DATA_SUM(ldl_p(&fib.data));
 
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 40914fde6f..04bd0ebe40 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -113,12 +113,13 @@ static void ccw_init(MachineState *machine)
     s390_sclp_init();
     s390_memory_init(machine->ram_size);
 
+    s390_flic_init();
+
     /* get a BUS */
     css_bus = virtual_css_bus_init();
     s390_init_ipl_dev(machine->kernel_filename, machine->kernel_cmdline,
                       machine->initrd_filename, "s390-ccw.img",
                       "s390-netboot.img", true);
-    s390_flic_init();
 
     dev = qdev_create(NULL, TYPE_S390_PCI_HOST_BRIDGE);
     object_property_add_child(qdev_get_machine(), TYPE_S390_PCI_HOST_BRIDGE,
@@ -336,6 +337,9 @@ static const TypeInfo ccw_machine_info = {
     }                                                                         \
     type_init(ccw_machine_register_##suffix)
 
+#define CCW_COMPAT_2_9 \
+        HW_COMPAT_2_9
+
 #define CCW_COMPAT_2_8 \
         HW_COMPAT_2_8 \
         {\
@@ -402,14 +406,26 @@ static const TypeInfo ccw_machine_info = {
             .value    = "0",\
         },
 
+static void ccw_machine_2_10_instance_options(MachineState *machine)
+{
+}
+
+static void ccw_machine_2_10_class_options(MachineClass *mc)
+{
+}
+DEFINE_CCW_MACHINE(2_10, "2.10", true);
+
 static void ccw_machine_2_9_instance_options(MachineState *machine)
 {
+    ccw_machine_2_10_instance_options(machine);
 }
 
 static void ccw_machine_2_9_class_options(MachineClass *mc)
 {
+    ccw_machine_2_10_class_options(mc);
+    SET_MACHINE_COMPAT(mc, CCW_COMPAT_2_9);
 }
-DEFINE_CCW_MACHINE(2_9, "2.9", true);
+DEFINE_CCW_MACHINE(2_9, "2.9", false);
 
 static void ccw_machine_2_8_instance_options(MachineState *machine)
 {
diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index 00b3bde4e9..e7167e3d05 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -616,10 +616,9 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
                 dev->routes.adapter.ind_offset = ind_bit;
                 dev->routes.adapter.summary_offset = 7;
                 cpu_physical_memory_unmap(thinint, hw_len, 0, hw_len);
-                ret = css_register_io_adapter(CSS_IO_ADAPTER_VIRTIO,
-                                              dev->thinint_isc, true, false,
-                                              &dev->routes.adapter.adapter_id);
-                assert(ret == 0);
+                dev->routes.adapter.adapter_id = css_get_adapter_id(
+                                                 CSS_IO_ADAPTER_VIRTIO,
+                                                 dev->thinint_isc);
                 sch->thinint_active = ((dev->indicators != NULL) &&
                                        (dev->summary_indicator != NULL));
                 sch->curr_status.scsw.count = ccw.count - len;
@@ -680,7 +679,8 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp)
 {
     VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_GET_CLASS(dev);
     CcwDevice *ccw_dev = CCW_DEVICE(dev);
-    SubchDev *sch = css_create_virtual_sch(ccw_dev->bus_id, errp);
+    CCWDeviceClass *ck = CCW_DEVICE_GET_CLASS(ccw_dev);
+    SubchDev *sch = css_create_virtual_sch(ccw_dev->devno, errp);
     Error *err = NULL;
 
     if (!sch) {
@@ -689,8 +689,7 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp)
     if (!virtio_ccw_rev_max(dev) && dev->force_revision_1) {
         error_setg(&err, "Invalid value of property max_rev "
                    "(is %d expected >= 1)", virtio_ccw_rev_max(dev));
-        error_propagate(errp, err);
-        return;
+        goto out_err;
     }
 
     sch->driver_data = dev;
@@ -705,7 +704,7 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp)
 
     trace_virtio_ccw_new_device(
         sch->cssid, sch->ssid, sch->schid, sch->devno,
-        ccw_dev->bus_id.valid ? "user-configured" : "auto-configured");
+        ccw_dev->devno.valid ? "user-configured" : "auto-configured");
 
     if (!kvm_eventfds_enabled()) {
         dev->flags &= ~VIRTIO_CCW_FLAG_USE_IOEVENTFD;
@@ -713,13 +712,23 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp)
 
     if (k->realize) {
         k->realize(dev, &err);
+        if (err) {
+            goto out_err;
+        }
     }
+
+    ck->realize(ccw_dev, &err);
     if (err) {
-        error_propagate(errp, err);
-        css_subch_assign(sch->cssid, sch->ssid, sch->schid, sch->devno, NULL);
-        ccw_dev->sch = NULL;
-        g_free(sch);
+        goto out_err;
     }
+
+    return;
+
+out_err:
+    error_propagate(errp, err);
+    css_subch_assign(sch->cssid, sch->ssid, sch->schid, sch->devno, NULL);
+    ccw_dev->sch = NULL;
+    g_free(sch);
 }
 
 static int virtio_ccw_exit(VirtioCcwDevice *dev)
@@ -1261,12 +1270,17 @@ static int virtio_ccw_load_config(DeviceState *d, QEMUFile *f)
 {
     VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d);
     CcwDevice *ccw_dev = CCW_DEVICE(d);
+    CCWDeviceClass *ck = CCW_DEVICE_GET_CLASS(ccw_dev);
     SubchDev *s = ccw_dev->sch;
     VirtIODevice *vdev = virtio_ccw_get_vdev(s);
     int len;
 
     s->driver_data = dev;
     subch_device_load(s, f);
+    /* Re-fill subch_id after loading the subchannel states.*/
+    if (ck->refill_ids) {
+        ck->refill_ids(ccw_dev);
+    }
     len = qemu_get_be32(f);
     if (len != 0) {
         dev->indicators = get_indicator(qemu_get_be64(f), len);
@@ -1293,9 +1307,9 @@ static int virtio_ccw_load_config(DeviceState *d, QEMUFile *f)
     dev->thinint_isc = qemu_get_byte(f);
     dev->revision = qemu_get_be32(f);
     if (s->thinint_active) {
-        return css_register_io_adapter(CSS_IO_ADAPTER_VIRTIO,
-                                       dev->thinint_isc, true, false,
-                                       &dev->routes.adapter.adapter_id);
+        dev->routes.adapter.adapter_id = css_get_adapter_id(
+                                         CSS_IO_ADAPTER_VIRTIO,
+                                         dev->thinint_isc);
     }
 
     return 0;
@@ -1354,7 +1368,6 @@ static void virtio_ccw_device_unplugged(DeviceState *d)
 /**************** Virtio-ccw Bus Device Descriptions *******************/
 
 static Property virtio_ccw_net_properties[] = {
-    DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id),
     DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags,
                     VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true),
     DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev,
@@ -1383,7 +1396,6 @@ static const TypeInfo virtio_ccw_net = {
 };
 
 static Property virtio_ccw_blk_properties[] = {
-    DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id),
     DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags,
                     VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true),
     DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev,
@@ -1412,7 +1424,6 @@ static const TypeInfo virtio_ccw_blk = {
 };
 
 static Property virtio_ccw_serial_properties[] = {
-    DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id),
     DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags,
                     VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true),
     DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev,
@@ -1441,7 +1452,6 @@ static const TypeInfo virtio_ccw_serial = {
 };
 
 static Property virtio_ccw_balloon_properties[] = {
-    DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id),
     DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags,
                     VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true),
     DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev,
@@ -1470,7 +1480,6 @@ static const TypeInfo virtio_ccw_balloon = {
 };
 
 static Property virtio_ccw_scsi_properties[] = {
-    DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id),
     DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags,
                     VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true),
     DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev,
@@ -1500,7 +1509,6 @@ static const TypeInfo virtio_ccw_scsi = {
 
 #ifdef CONFIG_VHOST_SCSI
 static Property vhost_ccw_scsi_properties[] = {
-    DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id),
     DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev,
                        VIRTIO_CCW_MAX_REV),
     DEFINE_PROP_END_OF_LIST(),
@@ -1538,7 +1546,6 @@ static void virtio_ccw_rng_instance_init(Object *obj)
 }
 
 static Property virtio_ccw_rng_properties[] = {
-    DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id),
     DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags,
                     VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true),
     DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev,
@@ -1567,7 +1574,6 @@ static const TypeInfo virtio_ccw_rng = {
 };
 
 static Property virtio_ccw_crypto_properties[] = {
-    DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id),
     DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags,
                     VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true),
     DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev,
@@ -1694,7 +1700,6 @@ static const TypeInfo virtio_ccw_bus_info = {
 
 #ifdef CONFIG_VIRTFS
 static Property virtio_ccw_9p_properties[] = {
-    DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id),
     DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags,
             VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true),
     DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev,
@@ -1743,7 +1748,6 @@ static const TypeInfo virtio_ccw_9p_info = {
 #ifdef CONFIG_VHOST_VSOCK
 
 static Property vhost_vsock_ccw_properties[] = {
-    DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id),
     DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev,
                        VIRTIO_CCW_MAX_REV),
     DEFINE_PROP_END_OF_LIST(),
@@ -1757,9 +1761,7 @@ static void vhost_vsock_ccw_realize(VirtioCcwDevice *ccw_dev, Error **errp)
 
     qdev_set_parent_bus(vdev, BUS(&ccw_dev->bus));
     object_property_set_bool(OBJECT(vdev), true, "realized", &err);
-    if (err) {
-        error_propagate(errp, err);
-    }
+    error_propagate(errp, err);
 }
 
 static void vhost_vsock_ccw_class_init(ObjectClass *klass, void *data)
diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c
index e3d59b7c83..84b8caf901 100644
--- a/hw/scsi/megasas.c
+++ b/hw/scsi/megasas.c
@@ -291,7 +291,7 @@ static int megasas_map_sgl(MegasasState *s, MegasasCmd *cmd, union mfi_sgl *sgl)
     if (cmd->iov_size > iov_size) {
         trace_megasas_iovec_overflow(cmd->index, iov_size, cmd->iov_size);
     } else if (cmd->iov_size < iov_size) {
-        trace_megasas_iovec_underflow(cmd->iov_size, iov_size, cmd->iov_size);
+        trace_megasas_iovec_underflow(cmd->index, iov_size, cmd->iov_size);
     }
     cmd->iov_offset = 0;
     return 0;
@@ -1924,8 +1924,8 @@ static int megasas_handle_abort(MegasasState *s, MegasasCmd *cmd)
         abort_ctx &= (uint64_t)0xFFFFFFFF;
     }
     if (abort_cmd->context != abort_ctx) {
-        trace_megasas_abort_invalid_context(cmd->index, abort_cmd->index,
-                                            abort_cmd->context);
+        trace_megasas_abort_invalid_context(cmd->index, abort_cmd->context,
+                                            abort_cmd->index);
         s->event_count++;
         return MFI_STAT_ABORT_NOT_POSSIBLE;
     }
diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
index 2933119e7d..a55ff87c22 100644
--- a/hw/scsi/scsi-generic.c
+++ b/hw/scsi/scsi-generic.c
@@ -237,9 +237,8 @@ static void scsi_read_complete(void * opaque, int ret)
         assert(max_transfer);
         stl_be_p(&r->buf[8], max_transfer);
         /* Also take care of the opt xfer len. */
-        if (ldl_be_p(&r->buf[12]) > max_transfer) {
-            stl_be_p(&r->buf[12], max_transfer);
-        }
+        stl_be_p(&r->buf[12],
+                 MIN_NON_ZERO(max_transfer, ldl_be_p(&r->buf[12])));
     }
     scsi_req_data(&r->req, len);
     scsi_req_unref(&r->req);
diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c
index c491ece1f2..f53bc179da 100644
--- a/hw/scsi/vhost-scsi.c
+++ b/hw/scsi/vhost-scsi.c
@@ -233,9 +233,11 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp)
         }
     }
 
-    virtio_scsi_common_realize(dev, &err, vhost_dummy_handle_output,
+    virtio_scsi_common_realize(dev,
                                vhost_dummy_handle_output,
-                               vhost_dummy_handle_output);
+                               vhost_dummy_handle_output,
+                               vhost_dummy_handle_output,
+                               &err);
     if (err != NULL) {
         error_propagate(errp, err);
         goto close_fd;
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
index 74c95e0e60..944ea4eb53 100644
--- a/hw/scsi/virtio-scsi-dataplane.c
+++ b/hw/scsi/virtio-scsi-dataplane.c
@@ -52,28 +52,40 @@ void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp)
 static bool virtio_scsi_data_plane_handle_cmd(VirtIODevice *vdev,
                                               VirtQueue *vq)
 {
-    VirtIOSCSI *s = (VirtIOSCSI *)vdev;
+    bool progress;
+    VirtIOSCSI *s = VIRTIO_SCSI(vdev);
 
+    virtio_scsi_acquire(s);
     assert(s->ctx && s->dataplane_started);
-    return virtio_scsi_handle_cmd_vq(s, vq);
+    progress = virtio_scsi_handle_cmd_vq(s, vq);
+    virtio_scsi_release(s);
+    return progress;
 }
 
 static bool virtio_scsi_data_plane_handle_ctrl(VirtIODevice *vdev,
                                                VirtQueue *vq)
 {
+    bool progress;
     VirtIOSCSI *s = VIRTIO_SCSI(vdev);
 
+    virtio_scsi_acquire(s);
     assert(s->ctx && s->dataplane_started);
-    return virtio_scsi_handle_ctrl_vq(s, vq);
+    progress = virtio_scsi_handle_ctrl_vq(s, vq);
+    virtio_scsi_release(s);
+    return progress;
 }
 
 static bool virtio_scsi_data_plane_handle_event(VirtIODevice *vdev,
                                                 VirtQueue *vq)
 {
+    bool progress;
     VirtIOSCSI *s = VIRTIO_SCSI(vdev);
 
+    virtio_scsi_acquire(s);
     assert(s->ctx && s->dataplane_started);
-    return virtio_scsi_handle_event_vq(s, vq);
+    progress = virtio_scsi_handle_event_vq(s, vq);
+    virtio_scsi_release(s);
+    return progress;
 }
 
 static int virtio_scsi_vring_init(VirtIOSCSI *s, VirtQueue *vq, int n,
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index 1dbc4bced9..46a3e3f280 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -422,31 +422,15 @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req)
     }
 }
 
-static inline void virtio_scsi_acquire(VirtIOSCSI *s)
-{
-    if (s->ctx) {
-        aio_context_acquire(s->ctx);
-    }
-}
-
-static inline void virtio_scsi_release(VirtIOSCSI *s)
-{
-    if (s->ctx) {
-        aio_context_release(s->ctx);
-    }
-}
-
 bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq)
 {
     VirtIOSCSIReq *req;
     bool progress = false;
 
-    virtio_scsi_acquire(s);
     while ((req = virtio_scsi_pop_req(s, vq))) {
         progress = true;
         virtio_scsi_handle_ctrl_req(s, req);
     }
-    virtio_scsi_release(s);
     return progress;
 }
 
@@ -460,7 +444,9 @@ static void virtio_scsi_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
             return;
         }
     }
+    virtio_scsi_acquire(s);
     virtio_scsi_handle_ctrl_vq(s, vq);
+    virtio_scsi_release(s);
 }
 
 static void virtio_scsi_complete_cmd_req(VirtIOSCSIReq *req)
@@ -604,7 +590,6 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
 
     QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs);
 
-    virtio_scsi_acquire(s);
     do {
         virtio_queue_set_notification(vq, 0);
 
@@ -632,7 +617,6 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
     QTAILQ_FOREACH_SAFE(req, &reqs, next, next) {
         virtio_scsi_handle_cmd_req_submit(s, req);
     }
-    virtio_scsi_release(s);
     return progress;
 }
 
@@ -647,7 +631,9 @@ static void virtio_scsi_handle_cmd(VirtIODevice *vdev, VirtQueue *vq)
             return;
         }
     }
+    virtio_scsi_acquire(s);
     virtio_scsi_handle_cmd_vq(s, vq);
+    virtio_scsi_release(s);
 }
 
 static void virtio_scsi_get_config(VirtIODevice *vdev,
@@ -723,12 +709,10 @@ void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev,
         return;
     }
 
-    virtio_scsi_acquire(s);
-
     req = virtio_scsi_pop_req(s, vs->event_vq);
     if (!req) {
         s->events_dropped = true;
-        goto out;
+        return;
     }
 
     if (s->events_dropped) {
@@ -738,7 +722,7 @@ void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev,
 
     if (virtio_scsi_parse_req(req, 0, sizeof(VirtIOSCSIEvent))) {
         virtio_scsi_bad_req(req);
-        goto out;
+        return;
     }
 
     evt = &req->resp.event;
@@ -758,19 +742,14 @@ void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev,
         evt->lun[3] = dev->lun & 0xFF;
     }
     virtio_scsi_complete_req(req);
-out:
-    virtio_scsi_release(s);
 }
 
 bool virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq)
 {
-    virtio_scsi_acquire(s);
     if (s->events_dropped) {
         virtio_scsi_push_event(s, NULL, VIRTIO_SCSI_T_NO_EVENT, 0);
-        virtio_scsi_release(s);
         return true;
     }
-    virtio_scsi_release(s);
     return false;
 }
 
@@ -784,7 +763,9 @@ static void virtio_scsi_handle_event(VirtIODevice *vdev, VirtQueue *vq)
             return;
         }
     }
+    virtio_scsi_acquire(s);
     virtio_scsi_handle_event_vq(s, vq);
+    virtio_scsi_release(s);
 }
 
 static void virtio_scsi_change(SCSIBus *bus, SCSIDevice *dev, SCSISense sense)
@@ -794,8 +775,10 @@ static void virtio_scsi_change(SCSIBus *bus, SCSIDevice *dev, SCSISense sense)
 
     if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_CHANGE) &&
         dev->type != TYPE_ROM) {
+        virtio_scsi_acquire(s);
         virtio_scsi_push_event(s, dev, VIRTIO_SCSI_T_PARAM_CHANGE,
                                sense.asc | (sense.ascq << 8));
+        virtio_scsi_release(s);
     }
 }
 
@@ -817,9 +800,11 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev,
     }
 
     if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) {
+        virtio_scsi_acquire(s);
         virtio_scsi_push_event(s, sd,
                                VIRTIO_SCSI_T_TRANSPORT_RESET,
                                VIRTIO_SCSI_EVT_RESET_RESCAN);
+        virtio_scsi_release(s);
     }
 }
 
@@ -831,9 +816,11 @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev,
     SCSIDevice *sd = SCSI_DEVICE(dev);
 
     if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) {
+        virtio_scsi_acquire(s);
         virtio_scsi_push_event(s, sd,
                                VIRTIO_SCSI_T_TRANSPORT_RESET,
                                VIRTIO_SCSI_EVT_RESET_REMOVED);
+        virtio_scsi_release(s);
     }
 
     qdev_simple_device_unplug_cb(hotplug_dev, dev, errp);
@@ -854,10 +841,11 @@ static struct SCSIBusInfo virtio_scsi_scsi_info = {
     .load_request = virtio_scsi_load_request,
 };
 
-void virtio_scsi_common_realize(DeviceState *dev, Error **errp,
+void virtio_scsi_common_realize(DeviceState *dev,
                                 VirtIOHandleOutput ctrl,
                                 VirtIOHandleOutput evt,
-                                VirtIOHandleOutput cmd)
+                                VirtIOHandleOutput cmd,
+                                Error **errp)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VirtIOSCSICommon *s = VIRTIO_SCSI_COMMON(dev);
@@ -891,9 +879,11 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp)
     VirtIOSCSI *s = VIRTIO_SCSI(dev);
     Error *err = NULL;
 
-    virtio_scsi_common_realize(dev, &err, virtio_scsi_handle_ctrl,
+    virtio_scsi_common_realize(dev,
+                               virtio_scsi_handle_ctrl,
                                virtio_scsi_handle_event,
-                               virtio_scsi_handle_cmd);
+                               virtio_scsi_handle_cmd,
+                               &err);
     if (err != NULL) {
         error_propagate(errp, err);
         return;
diff --git a/hw/sh4/r2d.c b/hw/sh4/r2d.c
index 6d06968f8b..8f520cec1c 100644
--- a/hw/sh4/r2d.c
+++ b/hw/sh4/r2d.c
@@ -277,8 +277,15 @@ static void r2d_init(MachineState *machine)
     sysbus_connect_irq(busdev, 2, irq[PCI_INTC]);
     sysbus_connect_irq(busdev, 3, irq[PCI_INTD]);
 
-    sm501_init(address_space_mem, 0x10000000, SM501_VRAM_SIZE,
-               irq[SM501], serial_hds[2]);
+    dev = qdev_create(NULL, "sysbus-sm501");
+    busdev = SYS_BUS_DEVICE(dev);
+    qdev_prop_set_uint32(dev, "vram-size", SM501_VRAM_SIZE);
+    qdev_prop_set_uint32(dev, "base", 0x10000000);
+    qdev_prop_set_ptr(dev, "chr-state", serial_hds[2]);
+    qdev_init_nofail(dev);
+    sysbus_mmio_map(busdev, 0, 0x10000000);
+    sysbus_mmio_map(busdev, 1, 0x13e00000);
+    sysbus_connect_irq(busdev, 0, irq[SM501]);
 
     /* onboard CF (True IDE mode, Master only). */
     dinfo = drive_get(IF_IDE, 0, 0);
diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c
index 873cd7df9a..5f022cc08d 100644
--- a/hw/sparc/sun4m.c
+++ b/hw/sparc/sun4m.c
@@ -491,7 +491,6 @@ static void tcx_init(hwaddr addr, qemu_irq irq, int vram_size, int width,
     qdev_prop_set_uint16(dev, "width", width);
     qdev_prop_set_uint16(dev, "height", height);
     qdev_prop_set_uint16(dev, "depth", depth);
-    qdev_prop_set_uint64(dev, "prom_addr", addr);
     qdev_init_nofail(dev);
     s = SYS_BUS_DEVICE(dev);
 
@@ -544,7 +543,6 @@ static void cg3_init(hwaddr addr, qemu_irq irq, int vram_size, int width,
     qdev_prop_set_uint16(dev, "width", width);
     qdev_prop_set_uint16(dev, "height", height);
     qdev_prop_set_uint16(dev, "depth", depth);
-    qdev_prop_set_uint64(dev, "prom-addr", addr);
     qdev_init_nofail(dev);
     s = SYS_BUS_DEVICE(dev);
 
diff --git a/hw/timer/exynos4210_mct.c b/hw/timer/exynos4210_mct.c
index 0c189348ae..a2ec3920f8 100644
--- a/hw/timer/exynos4210_mct.c
+++ b/hw/timer/exynos4210_mct.c
@@ -53,6 +53,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/log.h"
 #include "hw/sysbus.h"
 #include "qemu/timer.h"
 #include "qemu/main-loop.h"
@@ -1372,8 +1373,9 @@ break;
     case L0_TCNTO: case L1_TCNTO:
     case L0_ICNTO: case L1_ICNTO:
     case L0_FRCNTO: case L1_FRCNTO:
-        fprintf(stderr, "\n[exynos4210.mct: write to RO register "
-                TARGET_FMT_plx "]\n\n", offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "exynos4210.mct: write to RO register " TARGET_FMT_plx,
+                      offset);
         break;
 
     case L0_INT_CSTAT: case L1_INT_CSTAT:
diff --git a/hw/timer/exynos4210_pwm.c b/hw/timer/exynos4210_pwm.c
index f5765075c7..87f63f057e 100644
--- a/hw/timer/exynos4210_pwm.c
+++ b/hw/timer/exynos4210_pwm.c
@@ -21,6 +21,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/log.h"
 #include "hw/sysbus.h"
 #include "qemu/timer.h"
 #include "qemu-common.h"
@@ -252,9 +253,9 @@ static uint64_t exynos4210_pwm_read(void *opaque, hwaddr offset,
         break;
 
     default:
-        fprintf(stderr,
-                "[exynos4210.pwm: bad read offset " TARGET_FMT_plx "]\n",
-                offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "exynos4210.pwm: bad read offset " TARGET_FMT_plx,
+                      offset);
         break;
     }
     return value;
@@ -343,9 +344,9 @@ static void exynos4210_pwm_write(void *opaque, hwaddr offset,
         break;
 
     default:
-        fprintf(stderr,
-                "[exynos4210.pwm: bad write offset " TARGET_FMT_plx "]\n",
-                offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "exynos4210.pwm: bad write offset " TARGET_FMT_plx,
+                      offset);
         break;
 
     }
diff --git a/hw/timer/exynos4210_rtc.c b/hw/timer/exynos4210_rtc.c
index 1a648c5d9e..4607833e3e 100644
--- a/hw/timer/exynos4210_rtc.c
+++ b/hw/timer/exynos4210_rtc.c
@@ -26,6 +26,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/log.h"
 #include "hw/sysbus.h"
 #include "qemu/timer.h"
 #include "qemu-common.h"
@@ -370,9 +371,9 @@ static uint64_t exynos4210_rtc_read(void *opaque, hwaddr offset,
         break;
 
     default:
-        fprintf(stderr,
-                "[exynos4210.rtc: bad read offset " TARGET_FMT_plx "]\n",
-                offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "exynos4210.rtc: bad read offset " TARGET_FMT_plx,
+                      offset);
         break;
     }
     return value;
@@ -433,9 +434,9 @@ static void exynos4210_rtc_write(void *opaque, hwaddr offset,
         if (value > TICNT_THRESHOLD) {
             s->reg_ticcnt = value;
         } else {
-            fprintf(stderr,
-                    "[exynos4210.rtc: bad TICNT value %u ]\n",
-                    (uint32_t)value);
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "exynos4210.rtc: bad TICNT value %u",
+                          (uint32_t)value);
         }
         break;
 
@@ -500,9 +501,9 @@ static void exynos4210_rtc_write(void *opaque, hwaddr offset,
         break;
 
     default:
-        fprintf(stderr,
-                "[exynos4210.rtc: bad write offset " TARGET_FMT_plx "]\n",
-                offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "exynos4210.rtc: bad write offset " TARGET_FMT_plx,
+                      offset);
         break;
 
     }
diff --git a/hw/usb/bus.c b/hw/usb/bus.c
index 24f1608b4b..5939b273b9 100644
--- a/hw/usb/bus.c
+++ b/hw/usb/bus.c
@@ -762,9 +762,7 @@ static void usb_set_attached(Object *obj, bool value, Error **errp)
 
     if (value) {
         usb_device_attach(dev, &err);
-        if (err) {
-            error_propagate(errp, err);
-        }
+        error_propagate(errp, err);
     } else {
         usb_device_detach(dev);
     }
diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index f0af852709..a2d3143bf4 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -2063,7 +2063,7 @@ static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid,
 static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid)
 {
     XHCIState *xhci = epctx->xhci;
-    XHCIStreamContext *stctx;
+    XHCIStreamContext *stctx = NULL;
     XHCITransfer *xfer;
     XHCIRing *ring;
     USBEndpoint *ep = NULL;
@@ -2186,6 +2186,8 @@ static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid)
             break;
         }
     }
+    /* update ring dequeue ptr */
+    xhci_set_ep_state(xhci, epctx, stctx, epctx->state);
     epctx->kick_active--;
 
     ep = xhci_epid_to_usbep(epctx);
diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c
index c9876a5b0f..f9c8eafe06 100644
--- a/hw/usb/host-libusb.c
+++ b/hw/usb/host-libusb.c
@@ -159,7 +159,10 @@ static void usb_host_attach_kernel(USBHostDevice *s);
 #define BULK_TIMEOUT         0        /* unlimited */
 #define INTR_TIMEOUT         0        /* unlimited */
 
-#if LIBUSBX_API_VERSION >= 0x01000103
+#ifndef LIBUSB_API_VERSION
+# define LIBUSB_API_VERSION LIBUSBX_API_VERSION
+#endif
+#if LIBUSB_API_VERSION >= 0x01000103
 # define HAVE_STREAMS 1
 #endif
 
@@ -269,7 +272,7 @@ static int usb_host_get_port(libusb_device *dev, char *port, size_t len)
     size_t off;
     int rc, i;
 
-#if LIBUSBX_API_VERSION >= 0x01000102
+#if LIBUSB_API_VERSION >= 0x01000102
     rc = libusb_get_port_numbers(dev, path, 7);
 #else
     rc = libusb_get_port_path(ctx, dev, path, 7);
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index f3ba9b9007..6b33b9f55d 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -478,8 +478,13 @@ static void vfio_listener_region_add(MemoryListener *listener,
         giommu->iommu_offset = section->offset_within_address_space -
                                section->offset_within_region;
         giommu->container = container;
-        giommu->n.notify = vfio_iommu_map_notify;
-        giommu->n.notifier_flags = IOMMU_NOTIFIER_ALL;
+        llend = int128_add(int128_make64(section->offset_within_region),
+                           section->size);
+        llend = int128_sub(llend, int128_one());
+        iommu_notifier_init(&giommu->n, vfio_iommu_map_notify,
+                            IOMMU_NOTIFIER_ALL,
+                            section->offset_within_region,
+                            int128_get64(llend));
         QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
 
         memory_region_register_iommu_notifier(giommu->iommu, &giommu->n);
@@ -550,7 +555,8 @@ static void vfio_listener_region_del(MemoryListener *listener,
         VFIOGuestIOMMU *giommu;
 
         QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
-            if (giommu->iommu == section->mr) {
+            if (giommu->iommu == section->mr &&
+                giommu->n.start == section->offset_within_region) {
                 memory_region_unregister_iommu_notifier(giommu->iommu,
                                                         &giommu->n);
                 QLIST_REMOVE(giommu, giommu_next);
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index e995e32dee..349085ea12 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -660,7 +660,7 @@ static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr)
     VFIOConfigWindowQuirk *window;
 
     if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
-        !vdev->vga || nr != 5) {
+        !vdev->vga || nr != 5 || !vdev->bars[5].ioport) {
         return;
     }
 
@@ -1367,45 +1367,14 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
     uint16_t cmd_orig, cmd;
     Error *err = NULL;
 
-    /* This must be an Intel VGA device. */
-    if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) ||
-        !vfio_is_vga(vdev) || nr != 4) {
-        return;
-    }
-
-    /*
-     * IGD is not a standard, they like to change their specs often.  We
-     * only attempt to support back to SandBridge and we hope that newer
-     * devices maintain compatibility with generation 8.
-     */
-    gen = igd_gen(vdev);
-    if (gen != 6 && gen != 8) {
-        error_report("IGD device %s is unsupported by IGD quirks, "
-                     "try SandyBridge or newer", vdev->vbasedev.name);
-        return;
-    }
-
-    /*
-     * Regardless of running in UPT or legacy mode, the guest graphics
-     * driver may attempt to use stolen memory, however only legacy mode
-     * has BIOS support for reserving stolen memory in the guest VM.
-     * Emulate the GMCH register in all cases and zero out the stolen
-     * memory size here. Legacy mode may request allocation and re-write
-     * this below.
-     */
-    gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4);
-    gmch &= ~((gen < 8 ? 0x1f : 0xff) << (gen < 8 ? 3 : 8));
-
-    /* GMCH is read-only, emulated */
-    pci_set_long(vdev->pdev.config + IGD_GMCH, gmch);
-    pci_set_long(vdev->pdev.wmask + IGD_GMCH, 0);
-    pci_set_long(vdev->emulated_config_bits + IGD_GMCH, ~0);
-
     /*
-     * This must be at address 00:02.0 for us to even onsider enabling
-     * legacy mode.  The vBIOS has dependencies on the PCI bus address.
+     * This must be an Intel VGA device at address 00:02.0 for us to even
+     * consider enabling legacy mode.  The vBIOS has dependencies on the
+     * PCI bus address.
      */
-    if (&vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev),
+    if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) ||
+        !vfio_is_vga(vdev) || nr != 4 ||
+        &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev),
                                        0, PCI_DEVFN(0x2, 0))) {
         return;
     }
@@ -1425,6 +1394,18 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
     }
 
     /*
+     * IGD is not a standard, they like to change their specs often.  We
+     * only attempt to support back to SandBridge and we hope that newer
+     * devices maintain compatibility with generation 8.
+     */
+    gen = igd_gen(vdev);
+    if (gen != 6 && gen != 8) {
+        error_report("IGD device %s is unsupported in legacy mode, "
+                     "try SandyBridge or newer", vdev->vbasedev.name);
+        return;
+    }
+
+    /*
      * Most of what we're doing here is to enable the ROM to run, so if
      * there's no ROM, there's no point in setting up this quirk.
      * NB. We only seem to get BIOS ROMs, so a UEFI VM would need CSM support.
@@ -1479,6 +1460,8 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
         goto out;
     }
 
+    gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4);
+
     /*
      * If IGD VGA Disable is clear (expected) and VGA is not already enabled,
      * try to enable it.  Probably shouldn't be using legacy mode without VGA,
@@ -1549,11 +1532,12 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
      * when IVD (IGD VGA Disable) is clear, but the claim is that it's unused,
      * so let's not waste VM memory for it.
      */
+    gmch &= ~((gen < 8 ? 0x1f : 0xff) << (gen < 8 ? 3 : 8));
+
     if (vdev->igd_gms) {
         if (vdev->igd_gms <= 0x10) {
             gms_mb = vdev->igd_gms * 32;
             gmch |= vdev->igd_gms << (gen < 8 ? 3 : 8);
-            pci_set_long(vdev->pdev.config + IGD_GMCH, gmch);
         } else {
             error_report("Unsupported IGD GMS value 0x%x", vdev->igd_gms);
             vdev->igd_gms = 0;
@@ -1573,6 +1557,11 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
     fw_cfg_add_file(fw_cfg_find(), "etc/igd-bdsm-size",
                     bdsm_size, sizeof(*bdsm_size));
 
+    /* GMCH is read-only, emulated */
+    pci_set_long(vdev->pdev.config + IGD_GMCH, gmch);
+    pci_set_long(vdev->pdev.wmask + IGD_GMCH, 0);
+    pci_set_long(vdev->emulated_config_bits + IGD_GMCH, ~0);
+
     /* BDSM is read-write, emulated.  The BIOS needs to be able to write it */
     pci_set_long(vdev->pdev.config + IGD_BDSM, 0);
     pci_set_long(vdev->pdev.wmask + IGD_BDSM, ~0);
diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index 6926eedd3f..1f7a7c1ae1 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -11,8 +11,11 @@ virtio_set_status(void *vdev, uint8_t val) "vdev %p val %u"
 
 # hw/virtio/virtio-rng.c
 virtio_rng_guest_not_ready(void *rng) "rng %p: guest not ready"
+virtio_rng_cpu_is_stopped(void *rng, int size) "rng %p: cpu is stopped, dropping %d bytes"
+virtio_rng_popped(void *rng) "rng %p: elem popped"
 virtio_rng_pushed(void *rng, size_t len) "rng %p: %zd bytes pushed"
 virtio_rng_request(void *rng, size_t size, unsigned quota) "rng %p: %zd bytes requested, %u bytes quota left"
+virtio_rng_vm_state_change(void *rng, int running, int state) "rng %p: state change to running %d state %d"
 
 # hw/virtio/virtio-balloon.c
 #
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index febe519bbd..0001e60b77 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -425,10 +425,8 @@ static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size)
 static int vhost_dev_has_iommu(struct vhost_dev *dev)
 {
     VirtIODevice *vdev = dev->vdev;
-    AddressSpace *dma_as = vdev->dma_as;
 
-    return memory_region_is_iommu(dma_as->root) &&
-           virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
+    return virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
 }
 
 static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr,
@@ -720,6 +718,70 @@ static void vhost_region_del(MemoryListener *listener,
     }
 }
 
+static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
+{
+    struct vhost_iommu *iommu = container_of(n, struct vhost_iommu, n);
+    struct vhost_dev *hdev = iommu->hdev;
+    hwaddr iova = iotlb->iova + iommu->iommu_offset;
+
+    if (hdev->vhost_ops->vhost_invalidate_device_iotlb(hdev, iova,
+                                                       iotlb->addr_mask + 1)) {
+        error_report("Fail to invalidate device iotlb");
+    }
+}
+
+static void vhost_iommu_region_add(MemoryListener *listener,
+                                   MemoryRegionSection *section)
+{
+    struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+                                         iommu_listener);
+    struct vhost_iommu *iommu;
+    Int128 end;
+
+    if (!memory_region_is_iommu(section->mr)) {
+        return;
+    }
+
+    iommu = g_malloc0(sizeof(*iommu));
+    end = int128_add(int128_make64(section->offset_within_region),
+                     section->size);
+    end = int128_sub(end, int128_one());
+    iommu_notifier_init(&iommu->n, vhost_iommu_unmap_notify,
+                        IOMMU_NOTIFIER_UNMAP,
+                        section->offset_within_region,
+                        int128_get64(end));
+    iommu->mr = section->mr;
+    iommu->iommu_offset = section->offset_within_address_space -
+                          section->offset_within_region;
+    iommu->hdev = dev;
+    memory_region_register_iommu_notifier(section->mr, &iommu->n);
+    QLIST_INSERT_HEAD(&dev->iommu_list, iommu, iommu_next);
+    /* TODO: can replay help performance here? */
+}
+
+static void vhost_iommu_region_del(MemoryListener *listener,
+                                   MemoryRegionSection *section)
+{
+    struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+                                         iommu_listener);
+    struct vhost_iommu *iommu;
+
+    if (!memory_region_is_iommu(section->mr)) {
+        return;
+    }
+
+    QLIST_FOREACH(iommu, &dev->iommu_list, iommu_next) {
+        if (iommu->mr == section->mr &&
+            iommu->n.start == section->offset_within_region) {
+            memory_region_unregister_iommu_notifier(iommu->mr,
+                                                    &iommu->n);
+            QLIST_REMOVE(iommu, iommu_next);
+            g_free(iommu);
+            break;
+        }
+    }
+}
+
 static void vhost_region_nop(MemoryListener *listener,
                              MemoryRegionSection *section)
 {
@@ -1161,17 +1223,6 @@ static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq)
     event_notifier_cleanup(&vq->masked_notifier);
 }
 
-static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
-{
-    struct vhost_dev *hdev = container_of(n, struct vhost_dev, n);
-
-    if (hdev->vhost_ops->vhost_invalidate_device_iotlb(hdev,
-                                                       iotlb->iova,
-                                                       iotlb->addr_mask + 1)) {
-        error_report("Fail to invalidate device iotlb");
-    }
-}
-
 int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
                    VhostBackendType backend_type, uint32_t busyloop_timeout)
 {
@@ -1244,8 +1295,10 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
         .priority = 10
     };
 
-    hdev->n.notify = vhost_iommu_unmap_notify;
-    hdev->n.notifier_flags = IOMMU_NOTIFIER_UNMAP;
+    hdev->iommu_listener = (MemoryListener) {
+        .region_add = vhost_iommu_region_add,
+        .region_del = vhost_iommu_region_del,
+    };
 
     if (hdev->migration_blocker == NULL) {
         if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) {
@@ -1455,8 +1508,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
     }
 
     if (vhost_dev_has_iommu(hdev)) {
-        memory_region_register_iommu_notifier(vdev->dma_as->root,
-                                              &hdev->n);
+        memory_listener_register(&hdev->iommu_listener, vdev->dma_as);
     }
 
     r = hdev->vhost_ops->vhost_set_mem_table(hdev, hdev->mem);
@@ -1538,8 +1590,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
 
     if (vhost_dev_has_iommu(hdev)) {
         hdev->vhost_ops->vhost_set_iotlb_callback(hdev, false);
-        memory_region_unregister_iommu_notifier(vdev->dma_as->root,
-                                                &hdev->n);
+        memory_listener_unregister(&hdev->iommu_listener);
     }
     vhost_log_put(hdev, true);
     hdev->started = false;
diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
index a886011e75..3042232daf 100644
--- a/hw/virtio/virtio-bus.c
+++ b/hw/virtio/virtio-bus.c
@@ -25,6 +25,7 @@
 #include "qemu/osdep.h"
 #include "hw/hw.h"
 #include "qemu/error-report.h"
+#include "qapi/error.h"
 #include "hw/qdev.h"
 #include "hw/virtio/virtio-bus.h"
 #include "hw/virtio/virtio.h"
@@ -48,20 +49,33 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp)
     VirtioBusClass *klass = VIRTIO_BUS_GET_CLASS(bus);
     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
     bool has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
+    Error *local_err = NULL;
 
     DPRINTF("%s: plug device.\n", qbus->name);
 
     if (klass->pre_plugged != NULL) {
-        klass->pre_plugged(qbus->parent, errp);
+        klass->pre_plugged(qbus->parent, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
     }
 
     /* Get the features of the plugged device. */
     assert(vdc->get_features != NULL);
     vdev->host_features = vdc->get_features(vdev, vdev->host_features,
-                                            errp);
+                                            &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
 
     if (klass->device_plugged != NULL) {
-        klass->device_plugged(qbus->parent, errp);
+        klass->device_plugged(qbus->parent, &local_err);
+    }
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
     }
 
     if (klass->get_dma_as != NULL && has_iommu) {
diff --git a/hw/virtio/virtio-rng.c b/hw/virtio/virtio-rng.c
index 9639f4e89b..a6ee501051 100644
--- a/hw/virtio/virtio-rng.c
+++ b/hw/virtio/virtio-rng.c
@@ -53,6 +53,15 @@ static void chr_read(void *opaque, const void *buf, size_t size)
         return;
     }
 
+    /* we can't modify the virtqueue until
+     * our state is fully synced
+     */
+
+    if (!runstate_check(RUN_STATE_RUNNING)) {
+        trace_virtio_rng_cpu_is_stopped(vrng, size);
+        return;
+    }
+
     vrng->quota_remaining -= size;
 
     offset = 0;
@@ -61,6 +70,7 @@ static void chr_read(void *opaque, const void *buf, size_t size)
         if (!elem) {
             break;
         }
+        trace_virtio_rng_popped(vrng);
         len = iov_from_buf(elem->in_sg, elem->in_num,
                            0, buf + offset, size - offset);
         offset += len;
@@ -120,17 +130,21 @@ static uint64_t get_features(VirtIODevice *vdev, uint64_t f, Error **errp)
     return f;
 }
 
-static int virtio_rng_post_load(void *opaque, int version_id)
+static void virtio_rng_vm_state_change(void *opaque, int running,
+                                       RunState state)
 {
     VirtIORNG *vrng = opaque;
 
+    trace_virtio_rng_vm_state_change(vrng, running, state);
+
     /* We may have an element ready but couldn't process it due to a quota
-     * limit.  Make sure to try again after live migration when the quota may
-     * have been reset.
+     * limit or because CPU was stopped.  Make sure to try again when the
+     * CPU restart.
      */
-    virtio_rng_process(vrng);
 
-    return 0;
+    if (running && is_guest_ready(vrng)) {
+        virtio_rng_process(vrng);
+    }
 }
 
 static void check_rate_limit(void *opaque)
@@ -198,6 +212,9 @@ static void virtio_rng_device_realize(DeviceState *dev, Error **errp)
     vrng->rate_limit_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
                                                check_rate_limit, vrng);
     vrng->activate_timer = true;
+
+    vrng->vmstate = qemu_add_vm_change_state_handler(virtio_rng_vm_state_change,
+                                                     vrng);
 }
 
 static void virtio_rng_device_unrealize(DeviceState *dev, Error **errp)
@@ -205,6 +222,7 @@ static void virtio_rng_device_unrealize(DeviceState *dev, Error **errp)
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VirtIORNG *vrng = VIRTIO_RNG(dev);
 
+    qemu_del_vm_change_state_handler(vrng->vmstate);
     timer_del(vrng->rate_limit_timer);
     timer_free(vrng->rate_limit_timer);
     virtio_cleanup(vdev);
@@ -218,7 +236,6 @@ static const VMStateDescription vmstate_virtio_rng = {
         VMSTATE_VIRTIO_DEVICE,
         VMSTATE_END_OF_LIST()
     },
-    .post_load =  virtio_rng_post_load,
 };
 
 static Property virtio_rng_properties[] = {
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 82b6060b2a..03592c542a 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -1528,7 +1528,18 @@ static void virtio_queue_notify_vq(VirtQueue *vq)
 
 void virtio_queue_notify(VirtIODevice *vdev, int n)
 {
-    virtio_queue_notify_vq(&vdev->vq[n]);
+    VirtQueue *vq = &vdev->vq[n];
+
+    if (unlikely(!vq->vring.desc || vdev->broken)) {
+        return;
+    }
+
+    trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
+    if (vq->handle_aio_output) {
+        event_notifier_set(&vq->host_notifier);
+    } else if (vq->handle_output) {
+        vq->handle_output(vdev, vq);
+    }
 }
 
 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
diff --git a/include/block/aio.h b/include/block/aio.h
index 677b6ffc25..406e32305a 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -511,6 +511,15 @@ void aio_co_schedule(AioContext *ctx, struct Coroutine *co);
 void aio_co_wake(struct Coroutine *co);
 
 /**
+ * aio_co_enter:
+ * @ctx: the context to run the coroutine
+ * @co: the coroutine to run
+ *
+ * Enter a coroutine in the specified AioContext.
+ */
+void aio_co_enter(AioContext *ctx, struct Coroutine *co);
+
+/**
  * Return the AioContext whose event loop runs in the current thread.
  *
  * If called from an IOThread this will be the IOThread's AioContext.  If
diff --git a/include/block/block.h b/include/block/block.h
index 5149260827..144df0ddfb 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -366,6 +366,8 @@ void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp);
 void bdrv_invalidate_cache_all(Error **errp);
 int bdrv_inactivate_all(void);
 
+void blk_resume_after_migration(Error **errp);
+
 /* Ensure contents are flushed to disk.  */
 int bdrv_flush(BlockDriverState *bs);
 int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
@@ -379,12 +381,13 @@ void bdrv_drain_all(void);
 
 #define BDRV_POLL_WHILE(bs, cond) ({                       \
     bool waited_ = false;                                  \
+    bool busy_ = true;                                     \
     BlockDriverState *bs_ = (bs);                          \
     AioContext *ctx_ = bdrv_get_aio_context(bs_);          \
     if (aio_context_in_iothread(ctx_)) {                   \
-        while ((cond)) {                                   \
-            aio_poll(ctx_, true);                          \
-            waited_ = true;                                \
+        while ((cond) || busy_) {                          \
+            busy_ = aio_poll(ctx_, (cond));                \
+            waited_ |= !!(cond) | busy_;                   \
         }                                                  \
     } else {                                               \
         assert(qemu_get_current_aio_context() ==           \
@@ -396,11 +399,16 @@ void bdrv_drain_all(void);
          */                                                \
         assert(!bs_->wakeup);                              \
         bs_->wakeup = true;                                \
-        while ((cond)) {                                   \
-            aio_context_release(ctx_);                     \
-            aio_poll(qemu_get_aio_context(), true);        \
-            aio_context_acquire(ctx_);                     \
-            waited_ = true;                                \
+        while (busy_) {                                    \
+            if ((cond)) {                                  \
+                waited_ = busy_ = true;                    \
+                aio_context_release(ctx_);                 \
+                aio_poll(qemu_get_aio_context(), true);    \
+                aio_context_acquire(ctx_);                 \
+            } else {                                       \
+                busy_ = aio_poll(ctx_, false);             \
+                waited_ |= busy_;                          \
+            }                                              \
         }                                                  \
         bs_->wakeup = false;                               \
     }                                                      \
@@ -426,6 +434,8 @@ int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
                             int64_t sector_num, int nb_sectors, int *pnum);
 
 bool bdrv_is_read_only(BlockDriverState *bs);
+int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only, Error **errp);
+int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp);
 bool bdrv_is_sg(BlockDriverState *bs);
 bool bdrv_is_inserted(BlockDriverState *bs);
 int bdrv_media_changed(BlockDriverState *bs);
@@ -501,7 +511,7 @@ int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
 void bdrv_img_create(const char *filename, const char *fmt,
                      const char *base_filename, const char *base_fmt,
                      char *options, uint64_t img_size, int flags,
-                     Error **errp, bool quiet);
+                     bool quiet, Error **errp);
 
 /* Returns the alignment in bytes that is required so that no bounce buffer
  * is required throughout the stack */
@@ -556,6 +566,11 @@ bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
 AioContext *bdrv_get_aio_context(BlockDriverState *bs);
 
 /**
+ * Transfer control to @co in the aio context of @bs
+ */
+void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co);
+
+/**
  * bdrv_set_aio_context:
  *
  * Changes the #AioContext used for fd handlers, timers, and BHs by this
@@ -571,6 +586,22 @@ void bdrv_io_plug(BlockDriverState *bs);
 void bdrv_io_unplug(BlockDriverState *bs);
 
 /**
+ * bdrv_parent_drained_begin:
+ *
+ * Begin a quiesced section of all users of @bs. This is part of
+ * bdrv_drained_begin.
+ */
+void bdrv_parent_drained_begin(BlockDriverState *bs);
+
+/**
+ * bdrv_parent_drained_end:
+ *
+ * End a quiesced section of all users of @bs. This is part of
+ * bdrv_drained_end.
+ */
+void bdrv_parent_drained_end(BlockDriverState *bs);
+
+/**
  * bdrv_drained_begin:
  *
  * Begin a quiesced section for exclusive access to the BDS, by disabling
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 59400bd848..4f8cd29ae4 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -805,16 +805,16 @@ void commit_start(const char *job_id, BlockDriverState *bs,
  * a node name should be autogenerated.
  * @cb: Completion function for the job.
  * @opaque: Opaque pointer value passed to @cb.
- * @errp: Error object.
  * @auto_complete: Auto complete the job.
+ * @errp: Error object.
  *
  */
 void commit_active_start(const char *job_id, BlockDriverState *bs,
                          BlockDriverState *base, int creation_flags,
                          int64_t speed, BlockdevOnError on_error,
                          const char *filter_node_name,
-                         BlockCompletionFunc *cb, void *opaque, Error **errp,
-                         bool auto_complete);
+                         BlockCompletionFunc *cb, void *opaque,
+                         bool auto_complete, Error **errp);
 /*
  * mirror_start:
  * @job_id: The id of the newly-created job, or %NULL to use the
diff --git a/include/crypto/block.h b/include/crypto/block.h
index b6971de921..4a053a3ffa 100644
--- a/include/crypto/block.h
+++ b/include/crypto/block.h
@@ -30,23 +30,23 @@ typedef struct QCryptoBlock QCryptoBlock;
  * and QCryptoBlockOpenOptions in qapi/crypto.json */
 
 typedef ssize_t (*QCryptoBlockReadFunc)(QCryptoBlock *block,
+                                        void *opaque,
                                         size_t offset,
                                         uint8_t *buf,
                                         size_t buflen,
-                                        Error **errp,
-                                        void *opaque);
+                                        Error **errp);
 
 typedef ssize_t (*QCryptoBlockInitFunc)(QCryptoBlock *block,
+                                        void *opaque,
                                         size_t headerlen,
-                                        Error **errp,
-                                        void *opaque);
+                                        Error **errp);
 
 typedef ssize_t (*QCryptoBlockWriteFunc)(QCryptoBlock *block,
+                                         void *opaque,
                                          size_t offset,
                                          const uint8_t *buf,
                                          size_t buflen,
-                                         Error **errp,
-                                         void *opaque);
+                                         Error **errp);
 
 /**
  * qcrypto_block_has_format:
diff --git a/include/exec/memory.h b/include/exec/memory.h
index e39256ad03..99e0f54d86 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -55,6 +55,8 @@ typedef enum {
     IOMMU_RW   = 3,
 } IOMMUAccessFlags;
 
+#define IOMMU_ACCESS_FLAG(r, w) (((r) ? IOMMU_RO : 0) | ((w) ? IOMMU_WO : 0))
+
 struct IOMMUTLBEntry {
     AddressSpace    *target_as;
     hwaddr           iova;
@@ -77,13 +79,30 @@ typedef enum {
 
 #define IOMMU_NOTIFIER_ALL (IOMMU_NOTIFIER_MAP | IOMMU_NOTIFIER_UNMAP)
 
+struct IOMMUNotifier;
+typedef void (*IOMMUNotify)(struct IOMMUNotifier *notifier,
+                            IOMMUTLBEntry *data);
+
 struct IOMMUNotifier {
-    void (*notify)(struct IOMMUNotifier *notifier, IOMMUTLBEntry *data);
+    IOMMUNotify notify;
     IOMMUNotifierFlag notifier_flags;
+    /* Notify for address space range start <= addr <= end */
+    hwaddr start;
+    hwaddr end;
     QLIST_ENTRY(IOMMUNotifier) node;
 };
 typedef struct IOMMUNotifier IOMMUNotifier;
 
+static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
+                                       IOMMUNotifierFlag flags,
+                                       hwaddr start, hwaddr end)
+{
+    n->notify = fn;
+    n->notifier_flags = flags;
+    n->start = start;
+    n->end = end;
+}
+
 /* New-style MMIO accessors can indicate that the transaction failed.
  * A zero (MEMTX_OK) response means success; anything else is a failure
  * of some kind. The memory subsystem will bitwise-OR together results
@@ -174,6 +193,8 @@ struct MemoryRegionIOMMUOps {
     void (*notify_flag_changed)(MemoryRegion *iommu,
                                 IOMMUNotifierFlag old_flags,
                                 IOMMUNotifierFlag new_flags);
+    /* Set this up to provide customized IOMMU replay function */
+    void (*replay)(MemoryRegion *iommu, IOMMUNotifier *notifier);
 };
 
 typedef struct CoalescedMemoryRange CoalescedMemoryRange;
@@ -222,6 +243,9 @@ struct MemoryRegion {
     IOMMUNotifierFlag iommu_notify_flags;
 };
 
+#define IOMMU_NOTIFIER_FOREACH(n, mr) \
+    QLIST_FOREACH((n), &(mr)->iommu_notify, node)
+
 /**
  * MemoryListener: callbacks structure for updates to the physical memory map
  *
@@ -668,6 +692,21 @@ void memory_region_notify_iommu(MemoryRegion *mr,
                                 IOMMUTLBEntry entry);
 
 /**
+ * memory_region_notify_one: notify a change in an IOMMU translation
+ *                           entry to a single notifier
+ *
+ * This works just like memory_region_notify_iommu(), but it only
+ * notifies a specific notifier, not all of them.
+ *
+ * @notifier: the notifier to be notified
+ * @entry: the new entry in the IOMMU translation table.  The entry
+ *         replaces all old entries for the same virtual I/O address range.
+ *         Deleted entries have .@perm == 0.
+ */
+void memory_region_notify_one(IOMMUNotifier *notifier,
+                              IOMMUTLBEntry *entry);
+
+/**
  * memory_region_register_iommu_notifier: register a notifier for changes to
  * IOMMU translation entries.
  *
@@ -693,6 +732,14 @@ void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n,
                                 bool is_write);
 
 /**
+ * memory_region_iommu_replay_all: replay existing IOMMU translations
+ * to all the notifiers registered.
+ *
+ * @mr: the memory region to observe
+ */
+void memory_region_iommu_replay_all(MemoryRegion *mr);
+
+/**
  * memory_region_unregister_iommu_notifier: unregister a notifier for
  * changes to IOMMU translation entries.
  *
@@ -871,6 +918,53 @@ void memory_region_set_dirty(MemoryRegion *mr, hwaddr addr,
  */
 bool memory_region_test_and_clear_dirty(MemoryRegion *mr, hwaddr addr,
                                         hwaddr size, unsigned client);
+
+/**
+ * memory_region_snapshot_and_clear_dirty: Get a snapshot of the dirty
+ *                                         bitmap and clear it.
+ *
+ * Creates a snapshot of the dirty bitmap, clears the dirty bitmap and
+ * returns the snapshot.  The snapshot can then be used to query dirty
+ * status, using memory_region_snapshot_get_dirty.  Unlike
+ * memory_region_test_and_clear_dirty this allows to query the same
+ * page multiple times, which is especially useful for display updates
+ * where the scanlines often are not page aligned.
+ *
+ * The dirty bitmap region which gets copyed into the snapshot (and
+ * cleared afterwards) can be larger than requested.  The boundaries
+ * are rounded up/down so complete bitmap longs (covering 64 pages on
+ * 64bit hosts) can be copied over into the bitmap snapshot.  Which
+ * isn't a problem for display updates as the extra pages are outside
+ * the visible area, and in case the visible area changes a full
+ * display redraw is due anyway.  Should other use cases for this
+ * function emerge we might have to revisit this implementation
+ * detail.
+ *
+ * Use g_free to release DirtyBitmapSnapshot.
+ *
+ * @mr: the memory region being queried.
+ * @addr: the address (relative to the start of the region) being queried.
+ * @size: the size of the range being queried.
+ * @client: the user of the logging information; typically %DIRTY_MEMORY_VGA.
+ */
+DirtyBitmapSnapshot *memory_region_snapshot_and_clear_dirty(MemoryRegion *mr,
+                                                            hwaddr addr,
+                                                            hwaddr size,
+                                                            unsigned client);
+
+/**
+ * memory_region_snapshot_get_dirty: Check whether a range of bytes is dirty
+ *                                   in the specified dirty bitmap snapshot.
+ *
+ * @mr: the memory region being queried.
+ * @snap: the dirty bitmap snapshot
+ * @addr: the address (relative to the start of the region) being queried.
+ * @size: the size of the range being queried.
+ */
+bool memory_region_snapshot_get_dirty(MemoryRegion *mr,
+                                      DirtyBitmapSnapshot *snap,
+                                      hwaddr addr, hwaddr size);
+
 /**
  * memory_region_sync_dirty_bitmap: Synchronize a region's dirty bitmap with
  *                                  any external TLBs (e.g. kvm)
@@ -1426,13 +1520,11 @@ void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val);
 
 struct MemoryRegionCache {
     hwaddr xlat;
-    void *ptr;
     hwaddr len;
-    MemoryRegion *mr;
-    bool is_write;
+    AddressSpace *as;
 };
 
-#define MEMORY_REGION_CACHE_INVALID ((MemoryRegionCache) { .mr = NULL })
+#define MEMORY_REGION_CACHE_INVALID ((MemoryRegionCache) { .as = NULL })
 
 /* address_space_cache_init: prepare for repeated access to a physical
  * memory region
@@ -1688,7 +1780,7 @@ address_space_read_cached(MemoryRegionCache *cache, hwaddr addr,
                           void *buf, int len)
 {
     assert(addr < cache->len && len <= cache->len - addr);
-    memcpy(buf, cache->ptr + addr, len);
+    address_space_read(cache->as, cache->xlat + addr, MEMTXATTRS_UNSPECIFIED, buf, len);
 }
 
 /**
@@ -1704,7 +1796,7 @@ address_space_write_cached(MemoryRegionCache *cache, hwaddr addr,
                            void *buf, int len)
 {
     assert(addr < cache->len && len <= cache->len - addr);
-    memcpy(cache->ptr + addr, buf, len);
+    address_space_write(cache->as, cache->xlat + addr, MEMTXATTRS_UNSPECIFIED, buf, len);
 }
 
 #endif
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 29647303b0..dbe2f08d47 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -53,7 +53,7 @@ static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset)
 }
 
 long qemu_getrampagesize(void);
-ram_addr_t last_ram_offset(void);
+unsigned long last_ram_page(void);
 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
                                    bool share, const char *mem_path,
                                    Error **errp);
@@ -343,6 +343,13 @@ bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
                                               ram_addr_t length,
                                               unsigned client);
 
+DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty
+    (ram_addr_t start, ram_addr_t length, unsigned client);
+
+bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap,
+                                            ram_addr_t start,
+                                            ram_addr_t length);
+
 static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start,
                                                          ram_addr_t length)
 {
@@ -354,11 +361,13 @@ static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start,
 
 static inline
 uint64_t cpu_physical_memory_sync_dirty_bitmap(unsigned long *dest,
+                                               RAMBlock *rb,
                                                ram_addr_t start,
                                                ram_addr_t length,
-                                               int64_t *real_dirty_pages)
+                                               uint64_t *real_dirty_pages)
 {
     ram_addr_t addr;
+    start = rb->offset + start;
     unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
     uint64_t num_dirty = 0;
 
@@ -411,7 +420,5 @@ uint64_t cpu_physical_memory_sync_dirty_bitmap(unsigned long *dest,
 
     return num_dirty;
 }
-
-void migration_bitmap_extend(ram_addr_t old, ram_addr_t new);
 #endif
 #endif
diff --git a/include/hw/acpi/vmgenid.h b/include/hw/acpi/vmgenid.h
index db7fa0e633..7beb9592fb 100644
--- a/include/hw/acpi/vmgenid.h
+++ b/include/hw/acpi/vmgenid.h
@@ -21,8 +21,10 @@ typedef struct VmGenIdState {
     DeviceClass parent_obj;
     QemuUUID guid;                /* The 128-bit GUID seen by the guest */
     uint8_t vmgenid_addr_le[8];   /* Address of the GUID (little-endian) */
+    bool write_pointer_available;
 } VmGenIdState;
 
+/* returns NULL unless there is exactly one device */
 static inline Object *find_vmgenid_dev(void)
 {
     return object_resolve_path_type("", VMGENID_DEVICE, NULL);
diff --git a/include/hw/arm/aspeed_soc.h b/include/hw/arm/aspeed_soc.h
index dbec0c1598..4c5fc66a1e 100644
--- a/include/hw/arm/aspeed_soc.h
+++ b/include/hw/arm/aspeed_soc.h
@@ -20,6 +20,7 @@
 #include "hw/i2c/aspeed_i2c.h"
 #include "hw/ssi/aspeed_smc.h"
 #include "hw/watchdog/wdt_aspeed.h"
+#include "hw/net/ftgmac100.h"
 
 #define ASPEED_SPIS_NUM  2
 
@@ -39,6 +40,7 @@ typedef struct AspeedSoCState {
     AspeedSMCState spi[ASPEED_SPIS_NUM];
     AspeedSDMCState sdmc;
     AspeedWDTState wdt;
+    FTGMAC100State ftgmac100;
 } AspeedSoCState;
 
 #define TYPE_ASPEED_SOC "aspeed-soc"
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 269d0ba399..31d9c72fb0 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -135,6 +135,7 @@ struct MachineClass {
     bool rom_file_has_mr;
     int minimum_page_bits;
     bool has_hotpluggable_cpus;
+    int numa_mem_align_shift;
 
     HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
                                            DeviceState *dev);
diff --git a/include/hw/compat.h b/include/hw/compat.h
index fc8c3e0600..846b90eb67 100644
--- a/include/hw/compat.h
+++ b/include/hw/compat.h
@@ -1,6 +1,9 @@
 #ifndef HW_COMPAT_H
 #define HW_COMPAT_H
 
+#define HW_COMPAT_2_9 \
+    /* empty */
+
 #define HW_COMPAT_2_8 \
     {\
         .driver   = "fw_cfg_mem",\
@@ -131,6 +134,10 @@
         .driver   = "fw_cfg_io",\
         .property = "dma_enabled",\
         .value    = "off",\
+    },{\
+        .driver   = "vmgenid",\
+        .property = "x-write-pointer-available",\
+        .value    = "off",\
     },
 
 #define HW_COMPAT_2_3 \
diff --git a/include/hw/devices.h b/include/hw/devices.h
index 7475b714de..861ddea8af 100644
--- a/include/hw/devices.h
+++ b/include/hw/devices.h
@@ -62,9 +62,4 @@ void tc6393xb_gpio_out_set(TC6393xbState *s, int line,
 qemu_irq *tc6393xb_gpio_in_get(TC6393xbState *s);
 qemu_irq tc6393xb_l3v_get(TC6393xbState *s);
 
-/* sm501.c */
-void sm501_init(struct MemoryRegion *address_space_mem, uint32_t base,
-                uint32_t local_mem_bytes, qemu_irq irq,
-                Chardev *chr);
-
 #endif
diff --git a/include/hw/i386/apic_internal.h b/include/hw/i386/apic_internal.h
index 20ad28c95b..1209eb483a 100644
--- a/include/hw/i386/apic_internal.h
+++ b/include/hw/i386/apic_internal.h
@@ -189,8 +189,6 @@ struct APICCommonState {
     DeviceState *vapic;
     hwaddr vapic_paddr; /* note: persistence via kvmvapic */
     bool legacy_instance_id;
-
-    int apic_irq_delivered; /* for saving static variable */
 };
 
 typedef struct VAPICState {
diff --git a/include/hw/i386/ich9.h b/include/hw/i386/ich9.h
index 18dcca7ebc..673d13d28f 100644
--- a/include/hw/i386/ich9.h
+++ b/include/hw/i386/ich9.h
@@ -21,7 +21,6 @@ void ich9_lpc_pm_init(PCIDevice *pci_lpc, bool smm_enabled);
 I2CBus *ich9_smb_init(PCIBus *bus, int devfn, uint32_t smb_io_base);
 
 void ich9_generate_smi(void);
-void ich9_generate_nmi(void);
 
 #define ICH9_CC_SIZE (16 * 1024) /* 16KB. Chipset configuration registers */
 
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index fe645aa93a..3e51876b75 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -63,6 +63,7 @@ typedef union VTD_IR_TableEntry VTD_IR_TableEntry;
 typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress;
 typedef struct VTDIrq VTDIrq;
 typedef struct VTD_MSIMessage VTD_MSIMessage;
+typedef struct IntelIOMMUNotifierNode IntelIOMMUNotifierNode;
 
 /* Context-Entry */
 struct VTDContextEntry {
@@ -83,6 +84,8 @@ struct VTDAddressSpace {
     uint8_t devfn;
     AddressSpace as;
     MemoryRegion iommu;
+    MemoryRegion root;
+    MemoryRegion sys_alias;
     MemoryRegion iommu_ir;      /* Interrupt region: 0xfeeXXXXX */
     IntelIOMMUState *iommu_state;
     VTDContextCacheEntry context_cache_entry;
@@ -247,6 +250,11 @@ struct VTD_MSIMessage {
 /* When IR is enabled, all MSI/MSI-X data bits should be zero */
 #define VTD_IR_MSI_DATA          (0)
 
+struct IntelIOMMUNotifierNode {
+    VTDAddressSpace *vtd_as;
+    QLIST_ENTRY(IntelIOMMUNotifierNode) next;
+};
+
 /* The iommu (DMAR) device state struct */
 struct IntelIOMMUState {
     X86IOMMUState x86_iommu;
@@ -284,6 +292,8 @@ struct IntelIOMMUState {
     MemoryRegionIOMMUOps iommu_ops;
     GHashTable *vtd_as_by_busptr;   /* VTDBus objects indexed by PCIBus* reference */
     VTDBus *vtd_as_by_bus_num[VTD_PCI_BUS_MAX]; /* VTDBus objects indexed by bus number */
+    /* list of registered notifiers */
+    QLIST_HEAD(, IntelIOMMUNotifierNode) notifiers_list;
 
     /* interrupt remapping */
     bool intr_enabled;              /* Whether guest enabled IR */
diff --git a/include/hw/net/cadence_gem.h b/include/hw/net/cadence_gem.h
index c469ffe69b..35de622063 100644
--- a/include/hw/net/cadence_gem.h
+++ b/include/hw/net/cadence_gem.h
@@ -50,6 +50,7 @@ typedef struct CadenceGEMState {
     uint8_t num_priority_queues;
     uint8_t num_type1_screeners;
     uint8_t num_type2_screeners;
+    uint32_t revision;
 
     /* GEM registers backing store */
     uint32_t regs[CADENCE_GEM_MAXREG];
diff --git a/include/hw/net/ftgmac100.h b/include/hw/net/ftgmac100.h
new file mode 100644
index 0000000000..d9bc589fbf
--- /dev/null
+++ b/include/hw/net/ftgmac100.h
@@ -0,0 +1,64 @@
+/*
+ * Faraday FTGMAC100 Gigabit Ethernet
+ *
+ * Copyright (C) 2016-2017, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef FTGMAC100_H
+#define FTGMAC100_H
+
+#define TYPE_FTGMAC100 "ftgmac100"
+#define FTGMAC100(obj) OBJECT_CHECK(FTGMAC100State, (obj), TYPE_FTGMAC100)
+
+#include "hw/sysbus.h"
+#include "net/net.h"
+
+typedef struct FTGMAC100State {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    NICState *nic;
+    NICConf conf;
+    qemu_irq irq;
+    MemoryRegion iomem;
+
+    uint8_t *frame;
+
+    uint32_t irq_state;
+    uint32_t isr;
+    uint32_t ier;
+    uint32_t rx_enabled;
+    uint32_t rx_ring;
+    uint32_t rx_descriptor;
+    uint32_t tx_ring;
+    uint32_t tx_descriptor;
+    uint32_t math[2];
+    uint32_t rbsr;
+    uint32_t itc;
+    uint32_t aptcr;
+    uint32_t dblac;
+    uint32_t revr;
+    uint32_t fear1;
+    uint32_t tpafcr;
+    uint32_t maccr;
+    uint32_t phycr;
+    uint32_t phydata;
+    uint32_t fcr;
+
+
+    uint32_t phy_status;
+    uint32_t phy_control;
+    uint32_t phy_advertise;
+    uint32_t phy_int;
+    uint32_t phy_int_mask;
+
+    bool aspeed;
+    uint32_t txdes0_edotr;
+    uint32_t rxdes0_edorr;
+} FTGMAC100State;
+
+#endif
diff --git a/include/hw/net/mii.h b/include/hw/net/mii.h
index 9fdd7bbe75..6ce48a6d78 100644
--- a/include/hw/net/mii.h
+++ b/include/hw/net/mii.h
@@ -22,13 +22,20 @@
 #define MII_H
 
 /* PHY registers */
-#define MII_BMCR            0
-#define MII_BMSR            1
-#define MII_PHYID1          2
-#define MII_PHYID2          3
-#define MII_ANAR            4
-#define MII_ANLPAR          5
-#define MII_ANER            6
+#define MII_BMCR            0  /* Basic mode control register */
+#define MII_BMSR            1  /* Basic mode status register */
+#define MII_PHYID1          2  /* ID register 1 */
+#define MII_PHYID2          3  /* ID register 2 */
+#define MII_ANAR            4  /* Autonegotiation advertisement */
+#define MII_ANLPAR          5  /* Autonegotiation lnk partner abilities */
+#define MII_ANER            6  /* Autonegotiation expansion */
+#define MII_ANNP            7  /* Autonegotiation next page */
+#define MII_ANLPRNP         8  /* Autonegotiation link partner rx next page */
+#define MII_CTRL1000        9  /* 1000BASE-T control */
+#define MII_STAT1000        10 /* 1000BASE-T status */
+#define MII_MDDACR          13 /* MMD access control */
+#define MII_MDDAADR         14 /* MMD access address data */
+#define MII_EXTSTAT         15 /* Extended Status */
 #define MII_NSR             16
 #define MII_LBREMR          17
 #define MII_REC             18
@@ -38,19 +45,33 @@
 /* PHY registers fields */
 #define MII_BMCR_RESET      (1 << 15)
 #define MII_BMCR_LOOPBACK   (1 << 14)
-#define MII_BMCR_SPEED      (1 << 13)
-#define MII_BMCR_AUTOEN     (1 << 12)
-#define MII_BMCR_FD         (1 << 8)
+#define MII_BMCR_SPEED100   (1 << 13)  /* LSB of Speed (100) */
+#define MII_BMCR_SPEED      MII_BMCR_SPEED100
+#define MII_BMCR_AUTOEN     (1 << 12) /* Autonegotiation enable */
+#define MII_BMCR_PDOWN      (1 << 11) /* Enable low power state */
+#define MII_BMCR_ISOLATE    (1 << 10) /* Isolate data paths from MII */
+#define MII_BMCR_ANRESTART  (1 << 9)  /* Auto negotiation restart */
+#define MII_BMCR_FD         (1 << 8)  /* Set duplex mode */
+#define MII_BMCR_CTST       (1 << 7)  /* Collision test */
+#define MII_BMCR_SPEED1000  (1 << 6)  /* MSB of Speed (1000) */
 
-#define MII_BMSR_100TX_FD   (1 << 14)
-#define MII_BMSR_100TX_HD   (1 << 13)
-#define MII_BMSR_10T_FD     (1 << 12)
-#define MII_BMSR_10T_HD     (1 << 11)
-#define MII_BMSR_MFPS       (1 << 6)
-#define MII_BMSR_AN_COMP    (1 << 5)
-#define MII_BMSR_AUTONEG    (1 << 3)
-#define MII_BMSR_LINK_ST    (1 << 2)
+#define MII_BMSR_100TX_FD   (1 << 14) /* Can do 100mbps, full-duplex */
+#define MII_BMSR_100TX_HD   (1 << 13) /* Can do 100mbps, half-duplex */
+#define MII_BMSR_10T_FD     (1 << 12) /* Can do 10mbps, full-duplex */
+#define MII_BMSR_10T_HD     (1 << 11) /* Can do 10mbps, half-duplex */
+#define MII_BMSR_100T2_FD   (1 << 10) /* Can do 100mbps T2, full-duplex */
+#define MII_BMSR_100T2_HD   (1 << 9)  /* Can do 100mbps T2, half-duplex */
+#define MII_BMSR_EXTSTAT    (1 << 8)  /* Extended status in register 15 */
+#define MII_BMSR_MFPS       (1 << 6)  /* MII Frame Preamble Suppression */
+#define MII_BMSR_AN_COMP    (1 << 5)  /* Auto-negotiation complete */
+#define MII_BMSR_RFAULT     (1 << 4)  /* Remote fault */
+#define MII_BMSR_AUTONEG    (1 << 3)  /* Able to do auto-negotiation */
+#define MII_BMSR_LINK_ST    (1 << 2)  /* Link status */
+#define MII_BMSR_JABBER     (1 << 1)  /* Jabber detected */
+#define MII_BMSR_EXTCAP     (1 << 0)  /* Ext-reg capability */
 
+#define MII_ANAR_PAUSE_ASYM (1 << 11) /* Try for asymetric pause */
+#define MII_ANAR_PAUSE      (1 << 10) /* Try for pause */
 #define MII_ANAR_TXFD       (1 << 8)
 #define MII_ANAR_TX         (1 << 7)
 #define MII_ANAR_10FD       (1 << 6)
@@ -58,17 +79,31 @@
 #define MII_ANAR_CSMACD     (1 << 0)
 
 #define MII_ANLPAR_ACK      (1 << 14)
+#define MII_ANLPAR_PAUSEASY (1 << 11) /* can pause asymmetrically */
+#define MII_ANLPAR_PAUSE    (1 << 10) /* can pause */
 #define MII_ANLPAR_TXFD     (1 << 8)
 #define MII_ANLPAR_TX       (1 << 7)
 #define MII_ANLPAR_10FD     (1 << 6)
 #define MII_ANLPAR_10       (1 << 5)
 #define MII_ANLPAR_CSMACD   (1 << 0)
 
+#define MII_ANER_NWAY       (1 << 0) /* Can do N-way auto-nego */
+
+#define MII_CTRL1000_FULL   (1 << 9)  /* 1000BASE-T full duplex */
+#define MII_CTRL1000_HALF   (1 << 8)  /* 1000BASE-T half duplex */
+
+#define MII_STAT1000_FULL   (1 << 11) /* 1000BASE-T full duplex */
+#define MII_STAT1000_HALF   (1 << 10) /* 1000BASE-T half duplex */
+
 /* List of vendor identifiers */
 /* RealTek 8201 */
 #define RTL8201CP_PHYID1    0x0000
 #define RTL8201CP_PHYID2    0x8201
 
+/* RealTek 8211E */
+#define RTL8211E_PHYID1     0x001c
+#define RTL8211E_PHYID2     0xc915
+
 /* National Semiconductor DP83848 */
 #define DP83848_PHYID1      0x2000
 #define DP83848_PHYID2      0x5c90
diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h
index d22ad8dd3b..3752ddc93a 100644
--- a/include/hw/pci/pci_ids.h
+++ b/include/hw/pci/pci_ids.h
@@ -207,6 +207,9 @@
 
 #define PCI_VENDOR_ID_MARVELL            0x11ab
 
+#define PCI_VENDOR_ID_SILICON_MOTION     0x126f
+#define PCI_DEVICE_ID_SM501              0x0501
+
 #define PCI_VENDOR_ID_ENSONIQ            0x1274
 #define PCI_DEVICE_ID_ENSONIQ_ES1370     0x5000
 
diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h
index fa531d5c26..5524247cdc 100644
--- a/include/hw/ppc/spapr_drc.h
+++ b/include/hw/ppc/spapr_drc.h
@@ -154,6 +154,7 @@ typedef struct sPAPRDRConnector {
     bool awaiting_release;
     bool signalled;
     bool awaiting_allocation;
+    bool awaiting_allocation_skippable;
 
     /* device pointer, via link property */
     DeviceState *dev;
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index b44b476765..4bf86b0ad8 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -113,19 +113,6 @@ typedef struct DeviceClass {
      * TODO remove once we're there
      */
     bool cannot_instantiate_with_device_add_yet;
-    /*
-     * Does this device model survive object_unref(object_new(TNAME))?
-     * All device models should, and this flag shouldn't exist.  Some
-     * devices crash in object_new(), some crash or hang in
-     * object_unref().  Makes introspecting properties with
-     * qmp_device_list_properties() dangerous.  Bad, because it's used
-     * by -device FOO,help.  This flag serves to protect that code.
-     * It should never be set without a comment explaining why it is
-     * set.
-     * TODO remove once we're there
-     */
-    bool cannot_destroy_with_object_finalize_yet;
-
     bool hotpluggable;
 
     /* callbacks */
@@ -386,7 +373,8 @@ Object *qdev_get_machine(void);
 /* FIXME: make this a link<> */
 void qdev_set_parent_bus(DeviceState *dev, BusState *bus);
 
-extern int qdev_hotplug;
+extern bool qdev_hotplug;
+extern bool qdev_hot_removed;
 
 char *qdev_get_dev_path(DeviceState *dev);
 
diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h
index 7ac315331a..1d69fa7a8f 100644
--- a/include/hw/qdev-properties.h
+++ b/include/hw/qdev-properties.h
@@ -188,7 +188,8 @@ void qdev_prop_set_chr(DeviceState *dev, const char *name, Chardev *value);
 void qdev_prop_set_netdev(DeviceState *dev, const char *name, NetClientState *value);
 void qdev_prop_set_drive(DeviceState *dev, const char *name,
                          BlockBackend *value, Error **errp);
-void qdev_prop_set_macaddr(DeviceState *dev, const char *name, uint8_t *value);
+void qdev_prop_set_macaddr(DeviceState *dev, const char *name,
+                           const uint8_t *value);
 void qdev_prop_set_enum(DeviceState *dev, const char *name, int value);
 /* FIXME: Remove opaque pointer properties.  */
 void qdev_prop_set_ptr(DeviceState *dev, const char *name, void *value);
diff --git a/include/hw/s390x/css.h b/include/hw/s390x/css.h
index c96c862057..f1f0d7f07a 100644
--- a/include/hw/s390x/css.h
+++ b/include/hw/s390x/css.h
@@ -23,6 +23,8 @@
 #define MAX_CSSID 255
 #define MAX_CHPID 255
 
+#define MAX_ISC 7
+
 #define MAX_CIWS 62
 
 #define VIRTUAL_CSSID 0xfe
@@ -124,9 +126,15 @@ void css_generate_css_crws(uint8_t cssid);
 void css_clear_sei_pending(void);
 void css_adapter_interrupt(uint8_t isc);
 
-#define CSS_IO_ADAPTER_VIRTIO 1
-int css_register_io_adapter(uint8_t type, uint8_t isc, bool swap,
-                            bool maskable, uint32_t *id);
+typedef enum {
+    CSS_IO_ADAPTER_VIRTIO = 0,
+    CSS_IO_ADAPTER_PCI = 1,
+    CSS_IO_ADAPTER_TYPE_NUMS,
+} CssIoAdapterType;
+
+uint32_t css_get_adapter_id(CssIoAdapterType type, uint8_t isc);
+void css_register_io_adapters(CssIoAdapterType type, bool swap, bool maskable,
+                              Error **errp);
 
 #ifndef CONFIG_USER_ONLY
 SubchDev *css_find_subch(uint8_t m, uint8_t cssid, uint8_t ssid,
@@ -172,6 +180,11 @@ extern PropertyInfo css_devid_propinfo;
 #define DEFINE_PROP_CSS_DEV_ID(_n, _s, _f) \
     DEFINE_PROP(_n, _s, _f, css_devid_propinfo, CssDevId)
 
+extern PropertyInfo css_devid_ro_propinfo;
+
+#define DEFINE_PROP_CSS_DEV_ID_RO(_n, _s, _f) \
+    DEFINE_PROP(_n, _s, _f, css_devid_ro_propinfo, CssDevId)
+
 /**
  * Create a subchannel for the given bus id.
  *
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index 52f633ec89..a45032163d 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -37,10 +37,20 @@ struct vhost_log {
     vhost_log_chunk_t *log;
 };
 
+struct vhost_dev;
+struct vhost_iommu {
+    struct vhost_dev *hdev;
+    MemoryRegion *mr;
+    hwaddr iommu_offset;
+    IOMMUNotifier n;
+    QLIST_ENTRY(vhost_iommu) iommu_next;
+};
+
 struct vhost_memory;
 struct vhost_dev {
     VirtIODevice *vdev;
     MemoryListener memory_listener;
+    MemoryListener iommu_listener;
     struct vhost_memory *mem;
     int n_mem_sections;
     MemoryRegionSection *mem_sections;
@@ -64,6 +74,7 @@ struct vhost_dev {
     void *opaque;
     struct vhost_log *log;
     QLIST_ENTRY(vhost_dev) entry;
+    QLIST_HEAD(, vhost_iommu) iommu_list;
     IOMMUNotifier n;
 };
 
diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
index f3a98a3261..f3ffdceca4 100644
--- a/include/hw/virtio/virtio-gpu.h
+++ b/include/hw/virtio/virtio-gpu.h
@@ -72,6 +72,8 @@ struct virtio_gpu_conf {
     uint64_t max_hostmem;
     uint32_t max_outputs;
     uint32_t flags;
+    uint32_t xres;
+    uint32_t yres;
 };
 
 struct virtio_gpu_ctrl_command {
diff --git a/include/hw/virtio/virtio-input.h b/include/hw/virtio/virtio-input.h
index 55db31087a..91df57eca4 100644
--- a/include/hw/virtio/virtio-input.h
+++ b/include/hw/virtio/virtio-input.h
@@ -62,7 +62,10 @@ struct VirtIOInput {
     VirtQueue                         *evt, *sts;
     char                              *serial;
 
-    virtio_input_event                *queue;
+    struct {
+        virtio_input_event event;
+        VirtQueueElement *elem;
+    }                                 *queue;
     uint32_t                          qindex, qsize;
 
     bool                              active;
diff --git a/include/hw/virtio/virtio-rng.h b/include/hw/virtio/virtio-rng.h
index 2d40abdbdb..922dce7cac 100644
--- a/include/hw/virtio/virtio-rng.h
+++ b/include/hw/virtio/virtio-rng.h
@@ -45,6 +45,8 @@ typedef struct VirtIORNG {
     QEMUTimer *rate_limit_timer;
     int64_t quota_remaining;
     bool activate_timer;
+
+    VMChangeStateEntry *vmstate;
 } VirtIORNG;
 
 #endif
diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
index f536f77e68..8c8453cf19 100644
--- a/include/hw/virtio/virtio-scsi.h
+++ b/include/hw/virtio/virtio-scsi.h
@@ -121,9 +121,25 @@ typedef struct VirtIOSCSIReq {
     } req;
 } VirtIOSCSIReq;
 
-void virtio_scsi_common_realize(DeviceState *dev, Error **errp,
-                                VirtIOHandleOutput ctrl, VirtIOHandleOutput evt,
-                                VirtIOHandleOutput cmd);
+static inline void virtio_scsi_acquire(VirtIOSCSI *s)
+{
+    if (s->ctx) {
+        aio_context_acquire(s->ctx);
+    }
+}
+
+static inline void virtio_scsi_release(VirtIOSCSI *s)
+{
+    if (s->ctx) {
+        aio_context_release(s->ctx);
+    }
+}
+
+void virtio_scsi_common_realize(DeviceState *dev,
+                                VirtIOHandleOutput ctrl,
+                                VirtIOHandleOutput evt,
+                                VirtIOHandleOutput cmd,
+                                Error **errp);
 
 void virtio_scsi_common_unrealize(DeviceState *dev, Error **errp);
 bool virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq);
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 15efcf2057..7b6edbafd7 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -34,7 +34,7 @@ struct VirtQueue;
 static inline hwaddr vring_align(hwaddr addr,
                                              unsigned long align)
 {
-    return (addr + align - 1) & ~(align - 1);
+    return QEMU_ALIGN_UP(addr, align);
 }
 
 typedef struct VirtQueue VirtQueue;
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 5720c884f4..ba1a16cbc1 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -128,18 +128,6 @@ struct MigrationIncomingState {
 MigrationIncomingState *migration_incoming_get_current(void);
 void migration_incoming_state_destroy(void);
 
-/*
- * An outstanding page request, on the source, having been received
- * and queued
- */
-struct MigrationSrcPageRequest {
-    RAMBlock *rb;
-    hwaddr    offset;
-    hwaddr    len;
-
-    QSIMPLEQ_ENTRY(MigrationSrcPageRequest) next_req;
-};
-
 struct MigrationState
 {
     size_t bytes_xfer;
@@ -166,14 +154,9 @@ struct MigrationState
     int64_t total_time;
     int64_t downtime;
     int64_t expected_downtime;
-    int64_t dirty_pages_rate;
-    int64_t dirty_bytes_rate;
     bool enabled_capabilities[MIGRATION_CAPABILITY__MAX];
     int64_t xbzrle_cache_size;
     int64_t setup_time;
-    int64_t dirty_sync_count;
-    /* Count of requests incoming from destination */
-    int64_t postcopy_requests;
 
     /* Flag set once the migration has been asked to enter postcopy */
     bool start_postcopy;
@@ -186,11 +169,6 @@ struct MigrationState
     /* Flag set once the migration thread called bdrv_inactivate_all */
     bool block_inactive;
 
-    /* Queue of outstanding page requests from the destination */
-    QemuMutex src_page_req_mutex;
-    QSIMPLEQ_HEAD(src_page_requests, MigrationSrcPageRequest) src_page_requests;
-    /* The RAMBlock used in the last src_page_request */
-    RAMBlock *last_req_rb;
     /* The semaphore is used to notify COLO thread that failover is finished */
     QemuSemaphore colo_exit_sem;
 
@@ -256,11 +234,11 @@ void remove_migration_state_change_notifier(Notifier *notify);
 MigrationState *migrate_init(const MigrationParams *params);
 bool migration_is_blocked(Error **errp);
 bool migration_in_setup(MigrationState *);
-bool migration_is_idle(MigrationState *s);
+bool migration_is_idle(void);
 bool migration_has_finished(MigrationState *);
 bool migration_has_failed(MigrationState *);
 /* True if outgoing migration has entered postcopy phase */
-bool migration_in_postcopy(MigrationState *);
+bool migration_in_postcopy(void);
 /* ...and after the device transmission */
 bool migration_in_postcopy_after_devices(MigrationState *);
 MigrationState *migrate_get_current(void);
@@ -272,15 +250,14 @@ void migrate_decompress_threads_join(void);
 uint64_t ram_bytes_remaining(void);
 uint64_t ram_bytes_transferred(void);
 uint64_t ram_bytes_total(void);
+uint64_t ram_dirty_sync_count(void);
+uint64_t ram_dirty_pages_rate(void);
+uint64_t ram_postcopy_requests(void);
 void free_xbzrle_decoded_buf(void);
 
 void acct_update_position(QEMUFile *f, size_t size, bool zero);
 
-uint64_t dup_mig_bytes_transferred(void);
 uint64_t dup_mig_pages_transferred(void);
-uint64_t skipped_mig_bytes_transferred(void);
-uint64_t skipped_mig_pages_transferred(void);
-uint64_t norm_mig_bytes_transferred(void);
 uint64_t norm_mig_pages_transferred(void);
 uint64_t xbzrle_mig_bytes_transferred(void);
 uint64_t xbzrle_mig_pages_transferred(void);
@@ -293,8 +270,7 @@ void ram_debug_dump_bitmap(unsigned long *todump, bool expected);
 /* For outgoing discard bitmap */
 int ram_postcopy_send_discard_bitmap(MigrationState *ms);
 /* For incoming postcopy discard */
-int ram_discard_range(MigrationIncomingState *mis, const char *block_name,
-                      uint64_t start, size_t length);
+int ram_discard_range(const char *block_name, uint64_t start, size_t length);
 int ram_postcopy_incoming_init(MigrationIncomingState *mis);
 void ram_postcopy_migrated_memory_release(MigrationState *ms);
 
@@ -377,9 +353,8 @@ void savevm_skip_configuration(void);
 int global_state_store(void);
 void global_state_store_running(void);
 
-void flush_page_queue(MigrationState *ms);
-int ram_save_queue_pages(MigrationState *ms, const char *rbname,
-                         ram_addr_t start, ram_addr_t len);
+void migration_page_queue_free(void);
+int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len);
 uint64_t ram_pagesize_summary(void);
 
 PostcopyState postcopy_state_get(void);
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index f2dbf8410a..dad3984c07 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -56,7 +56,8 @@ typedef struct SaveVMHandlers {
 
     /* This runs outside the iothread lock!  */
     int (*save_live_setup)(QEMUFile *f, void *opaque);
-    void (*save_live_pending)(QEMUFile *f, void *opaque, uint64_t max_size,
+    void (*save_live_pending)(QEMUFile *f, void *opaque,
+                              uint64_t threshold_size,
                               uint64_t *non_postcopiable_pending,
                               uint64_t *postcopiable_pending);
     LoadStateHandler *load_state;
diff --git a/include/net/eth.h b/include/net/eth.h
index afeb45be34..09054a506d 100644
--- a/include/net/eth.h
+++ b/include/net/eth.h
@@ -209,6 +209,7 @@ struct tcp_hdr {
 #define ETH_P_IPV6                (0x86dd)
 #define ETH_P_VLAN                (0x8100)
 #define ETH_P_DVLAN               (0x88a8)
+#define ETH_P_NCSI                (0x88f8)
 #define ETH_P_UNKNOWN             (0xffff)
 #define VLAN_VID_MASK             0x0fff
 #define IP_HEADER_VERSION_4       (4)
diff --git a/include/qemu/bitmap.h b/include/qemu/bitmap.h
index 63ea2d0b1e..c318da12d7 100644
--- a/include/qemu/bitmap.h
+++ b/include/qemu/bitmap.h
@@ -220,6 +220,8 @@ void bitmap_set(unsigned long *map, long i, long len);
 void bitmap_set_atomic(unsigned long *map, long i, long len);
 void bitmap_clear(unsigned long *map, long start, long nr);
 bool bitmap_test_and_clear_atomic(unsigned long *map, long start, long nr);
+void bitmap_copy_and_clear_atomic(unsigned long *dst, unsigned long *src,
+                                  long nr);
 unsigned long bitmap_find_next_zero_area(unsigned long *map,
                                          unsigned long size,
                                          unsigned long start,
diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h
index e0ce9ffb28..18e610083a 100644
--- a/include/qemu/compiler.h
+++ b/include/qemu/compiler.h
@@ -24,17 +24,9 @@
 
 #define QEMU_NORETURN __attribute__ ((__noreturn__))
 
-#if QEMU_GNUC_PREREQ(3, 4)
 #define QEMU_WARN_UNUSED_RESULT __attribute__((warn_unused_result))
-#else
-#define QEMU_WARN_UNUSED_RESULT
-#endif
 
-#if QEMU_GNUC_PREREQ(4, 0)
 #define QEMU_SENTINEL __attribute__((sentinel))
-#else
-#define QEMU_SENTINEL
-#endif
 
 #if QEMU_GNUC_PREREQ(4, 3)
 #define QEMU_ARTIFICIAL __attribute__((always_inline, artificial))
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
index e60beaff81..a4509bd977 100644
--- a/include/qemu/coroutine.h
+++ b/include/qemu/coroutine.h
@@ -77,6 +77,11 @@ void qemu_coroutine_enter(Coroutine *coroutine);
 void qemu_coroutine_enter_if_inactive(Coroutine *co);
 
 /**
+ * Transfer control to a coroutine and associate it with ctx
+ */
+void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co);
+
+/**
  * Transfer control back to a coroutine's caller
  *
  * This function does not return until the coroutine is re-entered using
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
index a38be42253..95cf4f4163 100644
--- a/include/qemu/host-utils.h
+++ b/include/qemu/host-utils.h
@@ -115,37 +115,7 @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
  */
 static inline int clz32(uint32_t val)
 {
-#if QEMU_GNUC_PREREQ(3, 4)
     return val ? __builtin_clz(val) : 32;
-#else
-    /* Binary search for the leading one bit.  */
-    int cnt = 0;
-
-    if (!(val & 0xFFFF0000U)) {
-        cnt += 16;
-        val <<= 16;
-    }
-    if (!(val & 0xFF000000U)) {
-        cnt += 8;
-        val <<= 8;
-    }
-    if (!(val & 0xF0000000U)) {
-        cnt += 4;
-        val <<= 4;
-    }
-    if (!(val & 0xC0000000U)) {
-        cnt += 2;
-        val <<= 2;
-    }
-    if (!(val & 0x80000000U)) {
-        cnt++;
-        val <<= 1;
-    }
-    if (!(val & 0x80000000U)) {
-        cnt++;
-    }
-    return cnt;
-#endif
 }
 
 /**
@@ -168,19 +138,7 @@ static inline int clo32(uint32_t val)
  */
 static inline int clz64(uint64_t val)
 {
-#if QEMU_GNUC_PREREQ(3, 4)
     return val ? __builtin_clzll(val) : 64;
-#else
-    int cnt = 0;
-
-    if (!(val >> 32)) {
-        cnt += 32;
-    } else {
-        val >>= 32;
-    }
-
-    return cnt + clz32(val);
-#endif
 }
 
 /**
@@ -203,39 +161,7 @@ static inline int clo64(uint64_t val)
  */
 static inline int ctz32(uint32_t val)
 {
-#if QEMU_GNUC_PREREQ(3, 4)
     return val ? __builtin_ctz(val) : 32;
-#else
-    /* Binary search for the trailing one bit.  */
-    int cnt;
-
-    cnt = 0;
-    if (!(val & 0x0000FFFFUL)) {
-        cnt += 16;
-        val >>= 16;
-    }
-    if (!(val & 0x000000FFUL)) {
-        cnt += 8;
-        val >>= 8;
-    }
-    if (!(val & 0x0000000FUL)) {
-        cnt += 4;
-        val >>= 4;
-    }
-    if (!(val & 0x00000003UL)) {
-        cnt += 2;
-        val >>= 2;
-    }
-    if (!(val & 0x00000001UL)) {
-        cnt++;
-        val >>= 1;
-    }
-    if (!(val & 0x00000001UL)) {
-        cnt++;
-    }
-
-    return cnt;
-#endif
 }
 
 /**
@@ -258,19 +184,7 @@ static inline int cto32(uint32_t val)
  */
 static inline int ctz64(uint64_t val)
 {
-#if QEMU_GNUC_PREREQ(3, 4)
     return val ? __builtin_ctzll(val) : 64;
-#else
-    int cnt;
-
-    cnt = 0;
-    if (!((uint32_t)val)) {
-        cnt += 32;
-        val >>= 32;
-    }
-
-    return cnt + ctz32(val);
-#endif
 }
 
 /**
@@ -322,15 +236,7 @@ static inline int clrsb64(uint64_t val)
  */
 static inline int ctpop8(uint8_t val)
 {
-#if QEMU_GNUC_PREREQ(3, 4)
     return __builtin_popcount(val);
-#else
-    val = (val & 0x55) + ((val >> 1) & 0x55);
-    val = (val & 0x33) + ((val >> 2) & 0x33);
-    val = (val + (val >> 4)) & 0x0f;
-
-    return val;
-#endif
 }
 
 /**
@@ -339,16 +245,7 @@ static inline int ctpop8(uint8_t val)
  */
 static inline int ctpop16(uint16_t val)
 {
-#if QEMU_GNUC_PREREQ(3, 4)
     return __builtin_popcount(val);
-#else
-    val = (val & 0x5555) + ((val >> 1) & 0x5555);
-    val = (val & 0x3333) + ((val >> 2) & 0x3333);
-    val = (val + (val >> 4)) & 0x0f0f;
-    val = (val + (val >> 8)) & 0x00ff;
-
-    return val;
-#endif
 }
 
 /**
@@ -357,16 +254,7 @@ static inline int ctpop16(uint16_t val)
  */
 static inline int ctpop32(uint32_t val)
 {
-#if QEMU_GNUC_PREREQ(3, 4)
     return __builtin_popcount(val);
-#else
-    val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
-    val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
-    val = (val + (val >> 4)) & 0x0f0f0f0f;
-    val = (val * 0x01010101) >> 24;
-
-    return val;
-#endif
 }
 
 /**
@@ -375,16 +263,7 @@ static inline int ctpop32(uint32_t val)
  */
 static inline int ctpop64(uint64_t val)
 {
-#if QEMU_GNUC_PREREQ(3, 4)
     return __builtin_popcountll(val);
-#else
-    val = (val & 0x5555555555555555ULL) + ((val >> 1) & 0x5555555555555555ULL);
-    val = (val & 0x3333333333333333ULL) + ((val >> 2) & 0x3333333333333333ULL);
-    val = (val + (val >> 4)) & 0x0f0f0f0f0f0f0f0fULL;
-    val = (val * 0x0101010101010101ULL) >> 56;
-
-    return val;
-#endif
 }
 
 /**
diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h
index 5f1bab9b3e..af285321b8 100644
--- a/include/qemu/sockets.h
+++ b/include/qemu/sockets.h
@@ -36,8 +36,9 @@ int inet_ai_family_from_address(InetSocketAddress *addr,
                                 Error **errp);
 InetSocketAddress *inet_parse(const char *str, Error **errp);
 int inet_connect(const char *str, Error **errp);
-int inet_connect_saddr(InetSocketAddress *saddr, Error **errp,
-                       NonBlockingConnectHandler *callback, void *opaque);
+int inet_connect_saddr(InetSocketAddress *saddr,
+                       NonBlockingConnectHandler *callback, void *opaque,
+                       Error **errp);
 
 NetworkAddressFamily inet_netfamily(int family);
 
@@ -45,8 +46,8 @@ int unix_listen(const char *path, char *ostr, int olen, Error **errp);
 int unix_connect(const char *path, Error **errp);
 
 SocketAddress *socket_parse(const char *str, Error **errp);
-int socket_connect(SocketAddress *addr, Error **errp,
-                   NonBlockingConnectHandler *callback, void *opaque);
+int socket_connect(SocketAddress *addr, NonBlockingConnectHandler *callback,
+                   void *opaque, Error **errp);
 int socket_listen(SocketAddress *addr, Error **errp);
 void socket_listen_cleanup(int fd, Error **errp);
 int socket_dgram(SocketAddress *remote, SocketAddress *local, Error **errp);
@@ -119,4 +120,15 @@ SocketAddress *socket_remote_address(int fd, Error **errp);
  */
 char *socket_address_to_string(struct SocketAddress *addr, Error **errp);
 
+/**
+ * socket_address_crumple:
+ * @addr_flat: the socket address to crumple
+ *
+ * Convert SocketAddressFlat to SocketAddress.  Caller is responsible
+ * for freeing with qapi_free_SocketAddress().
+ *
+ * Returns: the argument converted to SocketAddress.
+ */
+SocketAddress *socket_address_crumple(SocketAddressFlat *addr_flat);
+
 #endif /* QEMU_SOCKETS_H */
diff --git a/include/qemu/thread-win32.h b/include/qemu/thread-win32.h
index 5fb6541ae9..4c4a261cf4 100644
--- a/include/qemu/thread-win32.h
+++ b/include/qemu/thread-win32.h
@@ -4,8 +4,7 @@
 #include <windows.h>
 
 struct QemuMutex {
-    CRITICAL_SECTION lock;
-    LONG owner;
+    SRWLOCK lock;
 };
 
 typedef struct QemuRecMutex QemuRecMutex;
@@ -19,9 +18,7 @@ int qemu_rec_mutex_trylock(QemuRecMutex *mutex);
 void qemu_rec_mutex_unlock(QemuRecMutex *mutex);
 
 struct QemuCond {
-    LONG waiters, target;
-    HANDLE sema;
-    HANDLE continue_event;
+    CONDITION_VARIABLE var;
 };
 
 struct QemuSemaphore {
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index e1742f2f3d..8a1eb74839 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -869,6 +869,7 @@ int64_t cpu_get_icount_raw(void);
 int64_t cpu_get_icount(void);
 int64_t cpu_get_clock(void);
 int64_t cpu_icount_to_ns(int64_t icount);
+void    cpu_update_icount(CPUState *cpu);
 
 /*******************************************/
 /* host CPU ticks (if available) */
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index e95f28cfec..f08d327aec 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -23,6 +23,7 @@ typedef struct CPUAddressSpace CPUAddressSpace;
 typedef struct CPUState CPUState;
 typedef struct DeviceListener DeviceListener;
 typedef struct DeviceState DeviceState;
+typedef struct DirtyBitmapSnapshot DirtyBitmapSnapshot;
 typedef struct DisplayChangeListener DisplayChangeListener;
 typedef struct DisplayState DisplayState;
 typedef struct DisplaySurface DisplaySurface;
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index c3292efe1c..5d10359c8f 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -332,6 +332,7 @@ struct CPUState {
     /* updates protected by BQL */
     uint32_t interrupt_request;
     int singlestep_enabled;
+    int64_t icount_budget;
     int64_t icount_extra;
     sigjmp_buf jmp_env;
 
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 096c17fce0..7462228ac1 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -58,6 +58,14 @@ typedef struct BlockDevOps {
      * Runs when the size changed (e.g. monitor command block_resize)
      */
     void (*resize_cb)(void *opaque);
+    /*
+     * Runs when the backend receives a drain request.
+     */
+    void (*drained_begin)(void *opaque);
+    /*
+     * Runs when the backend's last drain request ends.
+     */
+    void (*drained_end)(void *opaque);
 } BlockDevOps;
 
 /* This struct is embedded in (the private) BlockBackend struct and contains
diff --git a/include/sysemu/hostmem.h b/include/sysemu/hostmem.h
index ecae0cff19..ed6a437f4d 100644
--- a/include/sysemu/hostmem.h
+++ b/include/sysemu/hostmem.h
@@ -62,6 +62,7 @@ struct HostMemoryBackend {
     MemoryRegion mr;
 };
 
+bool host_memory_backend_mr_inited(HostMemoryBackend *backend);
 MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend,
                                              Error **errp);
 
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 576c7ce640..16175f7295 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -67,7 +67,7 @@ int qemu_reset_requested_get(void);
 void qemu_system_killed(int signal, pid_t pid);
 void qemu_system_reset(bool report);
 void qemu_system_guest_panicked(GuestPanicInformation *info);
-size_t qemu_target_page_bits(void);
+size_t qemu_target_page_size(void);
 
 void qemu_add_exit_notifier(Notifier *notify);
 void qemu_remove_exit_notifier(Notifier *notify);
diff --git a/io/channel-socket.c b/io/channel-socket.c
index f546c6830e..53386b7ba3 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -140,7 +140,7 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc,
     int fd;
 
     trace_qio_channel_socket_connect_sync(ioc, addr);
-    fd = socket_connect(addr, errp, NULL, NULL);
+    fd = socket_connect(addr, NULL, NULL, errp);
     if (fd < 0) {
         trace_qio_channel_socket_connect_fail(ioc);
         return -1;
@@ -331,16 +331,10 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
 {
     QIOChannelSocket *cioc;
 
-    cioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET));
-    cioc->fd = -1;
+    cioc = qio_channel_socket_new();
     cioc->remoteAddrLen = sizeof(ioc->remoteAddr);
     cioc->localAddrLen = sizeof(ioc->localAddr);
 
-#ifdef WIN32
-    QIO_CHANNEL(cioc)->event = CreateEvent(NULL, FALSE, FALSE, NULL);
-#endif
-
-
  retry:
     trace_qio_channel_socket_accept(ioc);
     cioc->fd = qemu_accept(ioc->fd, (struct sockaddr *)&cioc->remoteAddr,
diff --git a/io/dns-resolver.c b/io/dns-resolver.c
index 0ac6b23c02..759d1b40d7 100644
--- a/io/dns-resolver.c
+++ b/io/dns-resolver.c
@@ -158,6 +158,7 @@ int qio_dns_resolver_lookup_sync(QIODNSResolver *resolver,
 
     case SOCKET_ADDRESS_KIND_UNIX:
     case SOCKET_ADDRESS_KIND_VSOCK:
+    case SOCKET_ADDRESS_KIND_FD:
         return qio_dns_resolver_lookup_sync_nop(resolver,
                                                 addr,
                                                 naddrs,
@@ -165,8 +166,7 @@ int qio_dns_resolver_lookup_sync(QIODNSResolver *resolver,
                                                 errp);
 
     default:
-        error_setg(errp, "Unknown socket address kind");
-        return -1;
+        abort();
     }
 }
 
diff --git a/memory.c b/memory.c
index 64b0a605ef..b727f5ec0e 100644
--- a/memory.c
+++ b/memory.c
@@ -906,12 +906,6 @@ void memory_region_transaction_begin(void)
     ++memory_region_transaction_depth;
 }
 
-static void memory_region_clear_pending(void)
-{
-    memory_region_update_pending = false;
-    ioeventfd_update_pending = false;
-}
-
 void memory_region_transaction_commit(void)
 {
     AddressSpace *as;
@@ -927,14 +921,14 @@ void memory_region_transaction_commit(void)
             QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
                 address_space_update_topology(as);
             }
-
+            memory_region_update_pending = false;
             MEMORY_LISTENER_CALL_GLOBAL(commit, Forward);
         } else if (ioeventfd_update_pending) {
             QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
                 address_space_update_ioeventfds(as);
             }
+            ioeventfd_update_pending = false;
         }
-        memory_region_clear_pending();
    }
 }
 
@@ -1589,7 +1583,7 @@ static void memory_region_update_iommu_notify_flags(MemoryRegion *mr)
     IOMMUNotifierFlag flags = IOMMU_NOTIFIER_NONE;
     IOMMUNotifier *iommu_notifier;
 
-    QLIST_FOREACH(iommu_notifier, &mr->iommu_notify, node) {
+    IOMMU_NOTIFIER_FOREACH(iommu_notifier, mr) {
         flags |= iommu_notifier->notifier_flags;
     }
 
@@ -1612,6 +1606,7 @@ void memory_region_register_iommu_notifier(MemoryRegion *mr,
 
     /* We need to register for at least one bitfield */
     assert(n->notifier_flags != IOMMU_NOTIFIER_NONE);
+    assert(n->start <= n->end);
     QLIST_INSERT_HEAD(&mr->iommu_notify, n, node);
     memory_region_update_iommu_notify_flags(mr);
 }
@@ -1631,6 +1626,12 @@ void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n,
     hwaddr addr, granularity;
     IOMMUTLBEntry iotlb;
 
+    /* If the IOMMU has its own replay callback, override */
+    if (mr->iommu_ops->replay) {
+        mr->iommu_ops->replay(mr, n);
+        return;
+    }
+
     granularity = memory_region_iommu_get_min_page_size(mr);
 
     for (addr = 0; addr < memory_region_size(mr); addr += granularity) {
@@ -1647,6 +1648,15 @@ void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n,
     }
 }
 
+void memory_region_iommu_replay_all(MemoryRegion *mr)
+{
+    IOMMUNotifier *notifier;
+
+    IOMMU_NOTIFIER_FOREACH(notifier, mr) {
+        memory_region_iommu_replay(mr, notifier, false);
+    }
+}
+
 void memory_region_unregister_iommu_notifier(MemoryRegion *mr,
                                              IOMMUNotifier *n)
 {
@@ -1658,24 +1668,40 @@ void memory_region_unregister_iommu_notifier(MemoryRegion *mr,
     memory_region_update_iommu_notify_flags(mr);
 }
 
-void memory_region_notify_iommu(MemoryRegion *mr,
-                                IOMMUTLBEntry entry)
+void memory_region_notify_one(IOMMUNotifier *notifier,
+                              IOMMUTLBEntry *entry)
 {
-    IOMMUNotifier *iommu_notifier;
     IOMMUNotifierFlag request_flags;
 
-    assert(memory_region_is_iommu(mr));
+    /*
+     * Skip the notification if the notification does not overlap
+     * with registered range.
+     */
+    if (notifier->start > entry->iova + entry->addr_mask + 1 ||
+        notifier->end < entry->iova) {
+        return;
+    }
 
-    if (entry.perm & IOMMU_RW) {
+    if (entry->perm & IOMMU_RW) {
         request_flags = IOMMU_NOTIFIER_MAP;
     } else {
         request_flags = IOMMU_NOTIFIER_UNMAP;
     }
 
-    QLIST_FOREACH(iommu_notifier, &mr->iommu_notify, node) {
-        if (iommu_notifier->notifier_flags & request_flags) {
-            iommu_notifier->notify(iommu_notifier, &entry);
-        }
+    if (notifier->notifier_flags & request_flags) {
+        notifier->notify(notifier, entry);
+    }
+}
+
+void memory_region_notify_iommu(MemoryRegion *mr,
+                                IOMMUTLBEntry entry)
+{
+    IOMMUNotifier *iommu_notifier;
+
+    assert(memory_region_is_iommu(mr));
+
+    IOMMU_NOTIFIER_FOREACH(iommu_notifier, mr) {
+        memory_region_notify_one(iommu_notifier, &entry);
     }
 }
 
@@ -1722,6 +1748,23 @@ bool memory_region_test_and_clear_dirty(MemoryRegion *mr, hwaddr addr,
                 memory_region_get_ram_addr(mr) + addr, size, client);
 }
 
+DirtyBitmapSnapshot *memory_region_snapshot_and_clear_dirty(MemoryRegion *mr,
+                                                            hwaddr addr,
+                                                            hwaddr size,
+                                                            unsigned client)
+{
+    assert(mr->ram_block);
+    return cpu_physical_memory_snapshot_and_clear_dirty(
+                memory_region_get_ram_addr(mr) + addr, size, client);
+}
+
+bool memory_region_snapshot_get_dirty(MemoryRegion *mr, DirtyBitmapSnapshot *snap,
+                                      hwaddr addr, hwaddr size)
+{
+    assert(mr->ram_block);
+    return cpu_physical_memory_snapshot_get_dirty(snap,
+                memory_region_get_ram_addr(mr) + addr, size);
+}
 
 void memory_region_sync_dirty_bitmap(MemoryRegion *mr)
 {
diff --git a/migration/block.c b/migration/block.c
index 7734ff728a..060087fa32 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -885,6 +885,8 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
     int64_t total_sectors = 0;
     int nr_sectors;
     int ret;
+    BlockDriverInfo bdi;
+    int cluster_size = BLOCK_SIZE;
 
     do {
         addr = qemu_get_be64(f);
@@ -919,6 +921,15 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
                     error_report_err(local_err);
                     return -EINVAL;
                 }
+
+                ret = bdrv_get_info(blk_bs(blk), &bdi);
+                if (ret == 0 && bdi.cluster_size > 0 &&
+                    bdi.cluster_size <= BLOCK_SIZE &&
+                    BLOCK_SIZE % bdi.cluster_size == 0) {
+                    cluster_size = bdi.cluster_size;
+                } else {
+                    cluster_size = BLOCK_SIZE;
+                }
             }
 
             if (total_sectors - addr < BDRV_SECTORS_PER_DIRTY_CHUNK) {
@@ -932,10 +943,30 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
                                         nr_sectors * BDRV_SECTOR_SIZE,
                                         BDRV_REQ_MAY_UNMAP);
             } else {
+                int i;
+                int64_t cur_addr;
+                uint8_t *cur_buf;
+
                 buf = g_malloc(BLOCK_SIZE);
                 qemu_get_buffer(f, buf, BLOCK_SIZE);
-                ret = blk_pwrite(blk, addr * BDRV_SECTOR_SIZE, buf,
-                                 nr_sectors * BDRV_SECTOR_SIZE, 0);
+                for (i = 0; i < BLOCK_SIZE / cluster_size; i++) {
+                    cur_addr = addr * BDRV_SECTOR_SIZE + i * cluster_size;
+                    cur_buf = buf + i * cluster_size;
+
+                    if ((!block_mig_state.zero_blocks ||
+                        cluster_size < BLOCK_SIZE) &&
+                        buffer_is_zero(cur_buf, cluster_size)) {
+                        ret = blk_pwrite_zeroes(blk, cur_addr,
+                                                cluster_size,
+                                                BDRV_REQ_MAY_UNMAP);
+                    } else {
+                        ret = blk_pwrite(blk, cur_addr, cur_buf,
+                                         cluster_size, 0);
+                    }
+                    if (ret < 0) {
+                        break;
+                    }
+                }
                 g_free(buf);
             }
 
diff --git a/migration/migration.c b/migration/migration.c
index 54060f749a..353f2728cf 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -109,7 +109,6 @@ MigrationState *migrate_get_current(void)
     };
 
     if (!once) {
-        qemu_mutex_init(&current_migration.src_page_req_mutex);
         current_migration.parameters.tls_creds = g_strdup("");
         current_migration.parameters.tls_hostname = g_strdup("");
         once = true;
@@ -349,6 +348,14 @@ static void process_incoming_migration_bh(void *opaque)
         exit(EXIT_FAILURE);
     }
 
+    /* If we get an error here, just don't restart the VM yet. */
+    blk_resume_after_migration(&local_err);
+    if (local_err) {
+        error_free(local_err);
+        local_err = NULL;
+        autostart = false;
+    }
+
     /*
      * This must happen after all error conditions are dealt with and
      * we're sure the VM is going to be running on this host.
@@ -428,9 +435,6 @@ static void process_incoming_migration_co(void *opaque)
         qemu_thread_join(&mis->colo_incoming_thread);
     }
 
-    qemu_fclose(f);
-    free_xbzrle_decoded_buf();
-
     if (ret < 0) {
         migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
                           MIGRATION_STATUS_FAILED);
@@ -439,6 +443,9 @@ static void process_incoming_migration_co(void *opaque)
         exit(EXIT_FAILURE);
     }
 
+    qemu_fclose(f);
+    free_xbzrle_decoded_buf();
+
     mis->bh = qemu_bh_new(process_incoming_migration_bh, mis);
     qemu_bh_schedule(mis->bh);
 }
@@ -643,16 +650,19 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s)
     info->ram->transferred = ram_bytes_transferred();
     info->ram->total = ram_bytes_total();
     info->ram->duplicate = dup_mig_pages_transferred();
-    info->ram->skipped = skipped_mig_pages_transferred();
+    /* legacy value.  It is not used anymore */
+    info->ram->skipped = 0;
     info->ram->normal = norm_mig_pages_transferred();
-    info->ram->normal_bytes = norm_mig_bytes_transferred();
+    info->ram->normal_bytes = norm_mig_pages_transferred() *
+        qemu_target_page_size();
     info->ram->mbps = s->mbps;
-    info->ram->dirty_sync_count = s->dirty_sync_count;
-    info->ram->postcopy_requests = s->postcopy_requests;
+    info->ram->dirty_sync_count = ram_dirty_sync_count();
+    info->ram->postcopy_requests = ram_postcopy_requests();
+    info->ram->page_size = qemu_target_page_size();
 
     if (s->state != MIGRATION_STATUS_COMPLETED) {
         info->ram->remaining = ram_bytes_remaining();
-        info->ram->dirty_pages_rate = s->dirty_pages_rate;
+        info->ram->dirty_pages_rate = ram_dirty_pages_rate();
     }
 }
 
@@ -947,7 +957,7 @@ static void migrate_fd_cleanup(void *opaque)
     qemu_bh_delete(s->cleanup_bh);
     s->cleanup_bh = NULL;
 
-    flush_page_queue(s);
+    migration_page_queue_free();
 
     if (s->to_dst_file) {
         trace_migrate_fd_cleanup();
@@ -1053,21 +1063,21 @@ bool migration_has_failed(MigrationState *s)
             s->state == MIGRATION_STATUS_FAILED);
 }
 
-bool migration_in_postcopy(MigrationState *s)
+bool migration_in_postcopy(void)
 {
+    MigrationState *s = migrate_get_current();
+
     return (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
 }
 
 bool migration_in_postcopy_after_devices(MigrationState *s)
 {
-    return migration_in_postcopy(s) && s->postcopy_after_devices;
+    return migration_in_postcopy() && s->postcopy_after_devices;
 }
 
-bool migration_is_idle(MigrationState *s)
+bool migration_is_idle(void)
 {
-    if (!s) {
-        s = migrate_get_current();
-    }
+    MigrationState *s = migrate_get_current();
 
     switch (s->state) {
     case MIGRATION_STATUS_NONE:
@@ -1108,22 +1118,15 @@ MigrationState *migrate_init(const MigrationParams *params)
     s->mbps = 0.0;
     s->downtime = 0;
     s->expected_downtime = 0;
-    s->dirty_pages_rate = 0;
-    s->dirty_bytes_rate = 0;
     s->setup_time = 0;
-    s->dirty_sync_count = 0;
     s->start_postcopy = false;
     s->postcopy_after_devices = false;
-    s->postcopy_requests = 0;
     s->migration_thread_running = false;
-    s->last_req_rb = NULL;
     error_free(s->error);
     s->error = NULL;
 
     migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
 
-    QSIMPLEQ_INIT(&s->src_page_requests);
-
     s->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
     return s;
 }
@@ -1139,7 +1142,7 @@ int migrate_add_blocker(Error *reason, Error **errp)
         return -EACCES;
     }
 
-    if (migration_is_idle(NULL)) {
+    if (migration_is_idle()) {
         migration_blockers = g_slist_prepend(migration_blockers, reason);
         return 0;
     }
@@ -1477,7 +1480,7 @@ static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
         return;
     }
 
-    if (ram_save_queue_pages(ms, rbname, start, len)) {
+    if (ram_save_queue_pages(rbname, start, len)) {
         mark_source_rp_bad(ms);
     }
 }
@@ -1907,7 +1910,12 @@ static void *migration_thread(void *opaque)
     int64_t initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
     int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
     int64_t initial_bytes = 0;
-    int64_t max_size = 0;
+    /*
+     * The final stage happens when the remaining data is smaller than
+     * this threshold; it's calculated from the requested downtime and
+     * measured bandwidth
+     */
+    int64_t threshold_size = 0;
     int64_t start_time = initial_time;
     int64_t end_time;
     bool old_vm_running = false;
@@ -1938,7 +1946,6 @@ static void *migration_thread(void *opaque)
     qemu_savevm_state_begin(s->to_dst_file, &s->params);
 
     s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
-    current_active_state = MIGRATION_STATUS_ACTIVE;
     migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
                       MIGRATION_STATUS_ACTIVE);
 
@@ -1952,17 +1959,17 @@ static void *migration_thread(void *opaque)
         if (!qemu_file_rate_limit(s->to_dst_file)) {
             uint64_t pend_post, pend_nonpost;
 
-            qemu_savevm_state_pending(s->to_dst_file, max_size, &pend_nonpost,
-                                      &pend_post);
+            qemu_savevm_state_pending(s->to_dst_file, threshold_size,
+                                      &pend_nonpost, &pend_post);
             pending_size = pend_nonpost + pend_post;
-            trace_migrate_pending(pending_size, max_size,
+            trace_migrate_pending(pending_size, threshold_size,
                                   pend_post, pend_nonpost);
-            if (pending_size && pending_size >= max_size) {
+            if (pending_size && pending_size >= threshold_size) {
                 /* Still a significant amount to transfer */
 
                 if (migrate_postcopy_ram() &&
                     s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE &&
-                    pend_nonpost <= max_size &&
+                    pend_nonpost <= threshold_size &&
                     atomic_read(&s->start_postcopy)) {
 
                     if (!postcopy_start(s, &old_vm_running)) {
@@ -1994,17 +2001,18 @@ static void *migration_thread(void *opaque)
                                          initial_bytes;
             uint64_t time_spent = current_time - initial_time;
             double bandwidth = (double)transferred_bytes / time_spent;
-            max_size = bandwidth * s->parameters.downtime_limit;
+            threshold_size = bandwidth * s->parameters.downtime_limit;
 
             s->mbps = (((double) transferred_bytes * 8.0) /
                     ((double) time_spent / 1000.0)) / 1000.0 / 1000.0;
 
             trace_migrate_transferred(transferred_bytes, time_spent,
-                                      bandwidth, max_size);
+                                      bandwidth, threshold_size);
             /* if we haven't sent anything, we don't want to recalculate
                10000 is a small enough number for our purposes */
-            if (s->dirty_bytes_rate && transferred_bytes > 10000) {
-                s->expected_downtime = s->dirty_bytes_rate / bandwidth;
+            if (ram_dirty_pages_rate() && transferred_bytes > 10000) {
+                s->expected_downtime = ram_dirty_pages_rate() *
+                    qemu_target_page_size() / bandwidth;
             }
 
             qemu_file_reset_rate_limit(s->to_dst_file);
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index dc80dbb67f..85fd8d72b3 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -123,7 +123,7 @@ bool postcopy_ram_supported_by_host(void)
     struct uffdio_range range_struct;
     uint64_t feature_mask;
 
-    if ((1ul << qemu_target_page_bits()) > pagesize) {
+    if (qemu_target_page_size() > pagesize) {
         error_report("Target page size bigger than host page size");
         goto out;
     }
@@ -213,8 +213,6 @@ out:
 static int init_range(const char *block_name, void *host_addr,
                       ram_addr_t offset, ram_addr_t length, void *opaque)
 {
-    MigrationIncomingState *mis = opaque;
-
     trace_postcopy_init_range(block_name, host_addr, offset, length);
 
     /*
@@ -223,7 +221,7 @@ static int init_range(const char *block_name, void *host_addr,
      * - we're going to get the copy from the source anyway.
      * (Precopy will just overwrite this data, so doesn't need the discard)
      */
-    if (ram_discard_range(mis, block_name, 0, length)) {
+    if (ram_discard_range(block_name, 0, length)) {
         return -1;
     }
 
@@ -271,7 +269,7 @@ static int cleanup_range(const char *block_name, void *host_addr,
  */
 int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
 {
-    if (qemu_ram_foreach_block(init_range, mis)) {
+    if (qemu_ram_foreach_block(init_range, NULL)) {
         return -1;
     }
 
@@ -745,10 +743,10 @@ PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms,
 void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds,
                                 unsigned long start, unsigned long length)
 {
-    size_t tp_bits = qemu_target_page_bits();
+    size_t tp_size = qemu_target_page_size();
     /* Convert to byte offsets within the RAM block */
-    pds->start_list[pds->cur_entry] = (start - pds->offset) << tp_bits;
-    pds->length_list[pds->cur_entry] = length << tp_bits;
+    pds->start_list[pds->cur_entry] = (start - pds->offset) * tp_size;
+    pds->length_list[pds->cur_entry] = length * tp_size;
     trace_postcopy_discard_send_range(pds->ramblock_name, start, length);
     pds->cur_entry++;
     pds->nsentwords++;
diff --git a/migration/ram.c b/migration/ram.c
index de1e0a3b18..f48664ec62 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -45,10 +45,6 @@
 #include "qemu/rcu_queue.h"
 #include "migration/colo.h"
 
-static int dirty_rate_high_cnt;
-
-static uint64_t bitmap_sync_count;
-
 /***********************************************************/
 /* ram save/restore */
 
@@ -96,11 +92,17 @@ static void XBZRLE_cache_unlock(void)
         qemu_mutex_unlock(&XBZRLE.lock);
 }
 
-/*
- * called from qmp_migrate_set_cache_size in main thread, possibly while
- * a migration is in progress.
- * A running migration maybe using the cache and might finish during this
- * call, hence changes to the cache are protected by XBZRLE.lock().
+/**
+ * xbzrle_cache_resize: resize the xbzrle cache
+ *
+ * This function is called from qmp_migrate_set_cache_size in main
+ * thread, possibly while a migration is in progress.  A running
+ * migration may be using the cache and might finish during this call,
+ * hence changes to the cache are protected by XBZRLE.lock().
+ *
+ * Returns the new_size or negative in case of error.
+ *
+ * @new_size: new cache size
  */
 int64_t xbzrle_cache_resize(int64_t new_size)
 {
@@ -136,115 +138,171 @@ out:
     return ret;
 }
 
-/* accounting for migration statistics */
-typedef struct AccountingInfo {
-    uint64_t dup_pages;
-    uint64_t skipped_pages;
+struct RAMBitmap {
+    struct rcu_head rcu;
+    /* Main migration bitmap */
+    unsigned long *bmap;
+    /* bitmap of pages that haven't been sent even once
+     * only maintained and used in postcopy at the moment
+     * where it's used to send the dirtymap at the start
+     * of the postcopy phase
+     */
+    unsigned long *unsentmap;
+};
+typedef struct RAMBitmap RAMBitmap;
+
+/*
+ * An outstanding page request, on the source, having been received
+ * and queued
+ */
+struct RAMSrcPageRequest {
+    RAMBlock *rb;
+    hwaddr    offset;
+    hwaddr    len;
+
+    QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
+};
+
+/* State of RAM for migration */
+struct RAMState {
+    /* QEMUFile used for this migration */
+    QEMUFile *f;
+    /* Last block that we have visited searching for dirty pages */
+    RAMBlock *last_seen_block;
+    /* Last block from where we have sent data */
+    RAMBlock *last_sent_block;
+    /* Last dirty target page we have sent */
+    ram_addr_t last_page;
+    /* last ram version we have seen */
+    uint32_t last_version;
+    /* We are in the first round */
+    bool ram_bulk_stage;
+    /* How many times we have dirty too many pages */
+    int dirty_rate_high_cnt;
+    /* How many times we have synchronized the bitmap */
+    uint64_t bitmap_sync_count;
+    /* these variables are used for bitmap sync */
+    /* last time we did a full bitmap_sync */
+    int64_t time_last_bitmap_sync;
+    /* bytes transferred at start_time */
+    uint64_t bytes_xfer_prev;
+    /* number of dirty pages since start_time */
+    uint64_t num_dirty_pages_period;
+    /* xbzrle misses since the beginning of the period */
+    uint64_t xbzrle_cache_miss_prev;
+    /* number of iterations at the beginning of period */
+    uint64_t iterations_prev;
+    /* Accounting fields */
+    /* number of zero pages.  It used to be pages filled by the same char. */
+    uint64_t zero_pages;
+    /* number of normal transferred pages */
     uint64_t norm_pages;
+    /* Iterations since start */
     uint64_t iterations;
+    /* xbzrle transmitted bytes.  Notice that this is with
+     * compression, they can't be calculated from the pages */
     uint64_t xbzrle_bytes;
+    /* xbzrle transmmited pages */
     uint64_t xbzrle_pages;
+    /* xbzrle number of cache miss */
     uint64_t xbzrle_cache_miss;
+    /* xbzrle miss rate */
     double xbzrle_cache_miss_rate;
+    /* xbzrle number of overflows */
     uint64_t xbzrle_overflows;
-} AccountingInfo;
+    /* number of dirty bits in the bitmap */
+    uint64_t migration_dirty_pages;
+    /* total number of bytes transferred */
+    uint64_t bytes_transferred;
+    /* number of dirtied pages in the last second */
+    uint64_t dirty_pages_rate;
+    /* Count of requests incoming from destination */
+    uint64_t postcopy_requests;
+    /* protects modification of the bitmap */
+    QemuMutex bitmap_mutex;
+    /* Ram Bitmap protected by RCU */
+    RAMBitmap *ram_bitmap;
+    /* The RAMBlock used in the last src_page_requests */
+    RAMBlock *last_req_rb;
+    /* Queue of outstanding page requests from the destination */
+    QemuMutex src_page_req_mutex;
+    QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests;
+};
+typedef struct RAMState RAMState;
 
-static AccountingInfo acct_info;
+static RAMState ram_state;
 
-static void acct_clear(void)
+uint64_t dup_mig_pages_transferred(void)
 {
-    memset(&acct_info, 0, sizeof(acct_info));
+    return ram_state.zero_pages;
 }
 
-uint64_t dup_mig_bytes_transferred(void)
+uint64_t norm_mig_pages_transferred(void)
 {
-    return acct_info.dup_pages * TARGET_PAGE_SIZE;
+    return ram_state.norm_pages;
 }
 
-uint64_t dup_mig_pages_transferred(void)
+uint64_t xbzrle_mig_bytes_transferred(void)
 {
-    return acct_info.dup_pages;
+    return ram_state.xbzrle_bytes;
 }
 
-uint64_t skipped_mig_bytes_transferred(void)
+uint64_t xbzrle_mig_pages_transferred(void)
 {
-    return acct_info.skipped_pages * TARGET_PAGE_SIZE;
+    return ram_state.xbzrle_pages;
 }
 
-uint64_t skipped_mig_pages_transferred(void)
+uint64_t xbzrle_mig_pages_cache_miss(void)
 {
-    return acct_info.skipped_pages;
+    return ram_state.xbzrle_cache_miss;
 }
 
-uint64_t norm_mig_bytes_transferred(void)
+double xbzrle_mig_cache_miss_rate(void)
 {
-    return acct_info.norm_pages * TARGET_PAGE_SIZE;
+    return ram_state.xbzrle_cache_miss_rate;
 }
 
-uint64_t norm_mig_pages_transferred(void)
+uint64_t xbzrle_mig_pages_overflow(void)
 {
-    return acct_info.norm_pages;
+    return ram_state.xbzrle_overflows;
 }
 
-uint64_t xbzrle_mig_bytes_transferred(void)
+uint64_t ram_bytes_transferred(void)
 {
-    return acct_info.xbzrle_bytes;
+    return ram_state.bytes_transferred;
 }
 
-uint64_t xbzrle_mig_pages_transferred(void)
+uint64_t ram_bytes_remaining(void)
 {
-    return acct_info.xbzrle_pages;
+    return ram_state.migration_dirty_pages * TARGET_PAGE_SIZE;
 }
 
-uint64_t xbzrle_mig_pages_cache_miss(void)
+uint64_t ram_dirty_sync_count(void)
 {
-    return acct_info.xbzrle_cache_miss;
+    return ram_state.bitmap_sync_count;
 }
 
-double xbzrle_mig_cache_miss_rate(void)
+uint64_t ram_dirty_pages_rate(void)
 {
-    return acct_info.xbzrle_cache_miss_rate;
+    return ram_state.dirty_pages_rate;
 }
 
-uint64_t xbzrle_mig_pages_overflow(void)
+uint64_t ram_postcopy_requests(void)
 {
-    return acct_info.xbzrle_overflows;
+    return ram_state.postcopy_requests;
 }
 
-/* This is the last block that we have visited serching for dirty pages
- */
-static RAMBlock *last_seen_block;
-/* This is the last block from where we have sent data */
-static RAMBlock *last_sent_block;
-static ram_addr_t last_offset;
-static QemuMutex migration_bitmap_mutex;
-static uint64_t migration_dirty_pages;
-static uint32_t last_version;
-static bool ram_bulk_stage;
-
 /* used by the search for pages to send */
 struct PageSearchStatus {
     /* Current block being searched */
     RAMBlock    *block;
-    /* Current offset to search from */
-    ram_addr_t   offset;
+    /* Current page to search from */
+    unsigned long page;
     /* Set once we wrap around */
     bool         complete_round;
 };
 typedef struct PageSearchStatus PageSearchStatus;
 
-static struct BitmapRcu {
-    struct rcu_head rcu;
-    /* Main migration bitmap */
-    unsigned long *bmap;
-    /* bitmap of pages that haven't been sent even once
-     * only maintained and used in postcopy at the moment
-     * where it's used to send the dirtymap at the start
-     * of the postcopy phase
-     */
-    unsigned long *unsentmap;
-} *migration_bitmap_rcu;
-
 struct CompressParam {
     bool done;
     bool quit;
@@ -278,7 +336,6 @@ static QemuCond comp_done_cond;
 /* The empty QEMUFileOps will be used by file in CompressParam */
 static const QEMUFileOps empty_ops = { };
 
-static bool compression_switch;
 static DecompressParam *decomp_param;
 static QemuThread *decompress_threads;
 static QemuMutex decomp_done_lock;
@@ -323,6 +380,7 @@ static inline void terminate_compression_threads(void)
     int idx, thread_count;
 
     thread_count = migrate_compress_threads();
+
     for (idx = 0; idx < thread_count; idx++) {
         qemu_mutex_lock(&comp_param[idx].mutex);
         comp_param[idx].quit = true;
@@ -361,7 +419,6 @@ void migrate_compress_threads_create(void)
     if (!migrate_use_compression()) {
         return;
     }
-    compression_switch = true;
     thread_count = migrate_compress_threads();
     compress_threads = g_new0(QemuThread, thread_count);
     comp_param = g_new0(CompressParam, thread_count);
@@ -383,38 +440,45 @@ void migrate_compress_threads_create(void)
 }
 
 /**
- * save_page_header: Write page header to wire
+ * save_page_header: write page header to wire
  *
  * If this is the 1st block, it also writes the block identification
  *
- * Returns: Number of bytes written
+ * Returns the number of bytes written
  *
  * @f: QEMUFile where to send the data
  * @block: block that contains the page we want to send
  * @offset: offset inside the block for the page
  *          in the lower bits, it contains flags
  */
-static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
+static size_t save_page_header(RAMState *rs, RAMBlock *block, ram_addr_t offset)
 {
     size_t size, len;
 
-    qemu_put_be64(f, offset);
+    if (block == rs->last_sent_block) {
+        offset |= RAM_SAVE_FLAG_CONTINUE;
+    }
+    qemu_put_be64(rs->f, offset);
     size = 8;
 
     if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
         len = strlen(block->idstr);
-        qemu_put_byte(f, len);
-        qemu_put_buffer(f, (uint8_t *)block->idstr, len);
+        qemu_put_byte(rs->f, len);
+        qemu_put_buffer(rs->f, (uint8_t *)block->idstr, len);
         size += 1 + len;
+        rs->last_sent_block = block;
     }
     return size;
 }
 
-/* Reduce amount of guest cpu execution to hopefully slow down memory writes.
- * If guest dirty memory rate is reduced below the rate at which we can
- * transfer pages to the destination then we should be able to complete
- * migration. Some workloads dirty memory way too fast and will not effectively
- * converge, even with auto-converge.
+/**
+ * mig_throttle_guest_down: throotle down the guest
+ *
+ * Reduce amount of guest cpu execution to hopefully slow down memory
+ * writes. If guest dirty memory rate is reduced below the rate at
+ * which we can transfer pages to the destination then we should be
+ * able to complete migration. Some workloads dirty memory way too
+ * fast and will not effectively converge, even with auto-converge.
  */
 static void mig_throttle_guest_down(void)
 {
@@ -431,22 +495,28 @@ static void mig_throttle_guest_down(void)
     }
 }
 
-/* Update the xbzrle cache to reflect a page that's been sent as all 0.
+/**
+ * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
+ *
+ * @rs: current RAM state
+ * @current_addr: address for the zero page
+ *
+ * Update the xbzrle cache to reflect a page that's been sent as all 0.
  * The important thing is that a stale (not-yet-0'd) page be replaced
  * by the new data.
  * As a bonus, if the page wasn't in the cache it gets added so that
- * when a small write is made into the 0'd page it gets XBZRLE sent
+ * when a small write is made into the 0'd page it gets XBZRLE sent.
  */
-static void xbzrle_cache_zero_page(ram_addr_t current_addr)
+static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
 {
-    if (ram_bulk_stage || !migrate_use_xbzrle()) {
+    if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
         return;
     }
 
     /* We don't care if this fails to allocate a new cache page
      * as long as it updated an old one */
     cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
-                 bitmap_sync_count);
+                 rs->bitmap_sync_count);
 }
 
 #define ENCODING_FLAG_XBZRLE 0x1
@@ -458,27 +528,25 @@ static void xbzrle_cache_zero_page(ram_addr_t current_addr)
  *          0 means that page is identical to the one already sent
  *          -1 means that xbzrle would be longer than normal
  *
- * @f: QEMUFile where to send the data
- * @current_data:
- * @current_addr:
+ * @rs: current RAM state
+ * @current_data: pointer to the address of the page contents
+ * @current_addr: addr of the page
  * @block: block that contains the page we want to send
  * @offset: offset inside the block for the page
  * @last_stage: if we are at the completion stage
- * @bytes_transferred: increase it with the number of transferred bytes
  */
-static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
+static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
                             ram_addr_t current_addr, RAMBlock *block,
-                            ram_addr_t offset, bool last_stage,
-                            uint64_t *bytes_transferred)
+                            ram_addr_t offset, bool last_stage)
 {
     int encoded_len = 0, bytes_xbzrle;
     uint8_t *prev_cached_page;
 
-    if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) {
-        acct_info.xbzrle_cache_miss++;
+    if (!cache_is_cached(XBZRLE.cache, current_addr, rs->bitmap_sync_count)) {
+        rs->xbzrle_cache_miss++;
         if (!last_stage) {
             if (cache_insert(XBZRLE.cache, current_addr, *current_data,
-                             bitmap_sync_count) == -1) {
+                             rs->bitmap_sync_count) == -1) {
                 return -1;
             } else {
                 /* update *current_data when the page has been
@@ -503,7 +571,7 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
         return 0;
     } else if (encoded_len == -1) {
         trace_save_xbzrle_page_overflow();
-        acct_info.xbzrle_overflows++;
+        rs->xbzrle_overflows++;
         /* update data in the cache */
         if (!last_stage) {
             memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
@@ -518,92 +586,86 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
     }
 
     /* Send XBZRLE based compressed page */
-    bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE);
-    qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
-    qemu_put_be16(f, encoded_len);
-    qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
+    bytes_xbzrle = save_page_header(rs, block,
+                                    offset | RAM_SAVE_FLAG_XBZRLE);
+    qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
+    qemu_put_be16(rs->f, encoded_len);
+    qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
     bytes_xbzrle += encoded_len + 1 + 2;
-    acct_info.xbzrle_pages++;
-    acct_info.xbzrle_bytes += bytes_xbzrle;
-    *bytes_transferred += bytes_xbzrle;
+    rs->xbzrle_pages++;
+    rs->xbzrle_bytes += bytes_xbzrle;
+    rs->bytes_transferred += bytes_xbzrle;
 
     return 1;
 }
 
-/* Called with rcu_read_lock() to protect migration_bitmap
- * rb: The RAMBlock  to search for dirty pages in
- * start: Start address (typically so we can continue from previous page)
- * ram_addr_abs: Pointer into which to store the address of the dirty page
- *               within the global ram_addr space
+/**
+ * migration_bitmap_find_dirty: find the next dirty page from start
+ *
+ * Called with rcu_read_lock() to protect migration_bitmap
  *
- * Returns: byte offset within memory region of the start of a dirty page
+ * Returns the byte offset within memory region of the start of a dirty page
+ *
+ * @rs: current RAM state
+ * @rb: RAMBlock where to search for dirty pages
+ * @start: page where we start the search
  */
 static inline
-ram_addr_t migration_bitmap_find_dirty(RAMBlock *rb,
-                                       ram_addr_t start,
-                                       ram_addr_t *ram_addr_abs)
+unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
+                                          unsigned long start)
 {
     unsigned long base = rb->offset >> TARGET_PAGE_BITS;
-    unsigned long nr = base + (start >> TARGET_PAGE_BITS);
+    unsigned long nr = base + start;
     uint64_t rb_size = rb->used_length;
     unsigned long size = base + (rb_size >> TARGET_PAGE_BITS);
     unsigned long *bitmap;
 
     unsigned long next;
 
-    bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
-    if (ram_bulk_stage && nr > base) {
+    bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
+    if (rs->ram_bulk_stage && nr > base) {
         next = nr + 1;
     } else {
         next = find_next_bit(bitmap, size, nr);
     }
 
-    *ram_addr_abs = next << TARGET_PAGE_BITS;
-    return (next - base) << TARGET_PAGE_BITS;
+    return next - base;
 }
 
-static inline bool migration_bitmap_clear_dirty(ram_addr_t addr)
+static inline bool migration_bitmap_clear_dirty(RAMState *rs,
+                                                RAMBlock *rb,
+                                                unsigned long page)
 {
     bool ret;
-    int nr = addr >> TARGET_PAGE_BITS;
-    unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
+    unsigned long *bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
+    unsigned long nr = (rb->offset >> TARGET_PAGE_BITS) + page;
 
     ret = test_and_clear_bit(nr, bitmap);
 
     if (ret) {
-        migration_dirty_pages--;
+        rs->migration_dirty_pages--;
     }
     return ret;
 }
 
-static int64_t num_dirty_pages_period;
-static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
+static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb,
+                                        ram_addr_t start, ram_addr_t length)
 {
     unsigned long *bitmap;
-    bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
-    migration_dirty_pages += cpu_physical_memory_sync_dirty_bitmap(bitmap,
-                             start, length, &num_dirty_pages_period);
-}
-
-/* Fix me: there are too many global variables used in migration process. */
-static int64_t start_time;
-static int64_t bytes_xfer_prev;
-static uint64_t xbzrle_cache_miss_prev;
-static uint64_t iterations_prev;
-
-static void migration_bitmap_sync_init(void)
-{
-    start_time = 0;
-    bytes_xfer_prev = 0;
-    num_dirty_pages_period = 0;
-    xbzrle_cache_miss_prev = 0;
-    iterations_prev = 0;
+    bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
+    rs->migration_dirty_pages +=
+        cpu_physical_memory_sync_dirty_bitmap(bitmap, rb, start, length,
+                                              &rs->num_dirty_pages_period);
 }
 
-/* Returns a summary bitmap of the page sizes of all RAMBlocks;
- * for VMs with just normal pages this is equivalent to the
- * host page size.  If it's got some huge pages then it's the OR
- * of all the different page sizes.
+/**
+ * ram_pagesize_summary: calculate all the pagesizes of a VM
+ *
+ * Returns a summary bitmap of the page sizes of all RAMBlocks
+ *
+ * For VMs with just normal pages this is equivalent to the host page
+ * size. If it's got some huge pages then it's the OR of all the
+ * different page sizes.
  */
 uint64_t ram_pagesize_summary(void)
 {
@@ -617,40 +679,39 @@ uint64_t ram_pagesize_summary(void)
     return summary;
 }
 
-static void migration_bitmap_sync(void)
+static void migration_bitmap_sync(RAMState *rs)
 {
     RAMBlock *block;
-    MigrationState *s = migrate_get_current();
     int64_t end_time;
-    int64_t bytes_xfer_now;
+    uint64_t bytes_xfer_now;
 
-    bitmap_sync_count++;
+    rs->bitmap_sync_count++;
 
-    if (!bytes_xfer_prev) {
-        bytes_xfer_prev = ram_bytes_transferred();
+    if (!rs->bytes_xfer_prev) {
+        rs->bytes_xfer_prev = ram_bytes_transferred();
     }
 
-    if (!start_time) {
-        start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+    if (!rs->time_last_bitmap_sync) {
+        rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
     }
 
     trace_migration_bitmap_sync_start();
     memory_global_dirty_log_sync();
 
-    qemu_mutex_lock(&migration_bitmap_mutex);
+    qemu_mutex_lock(&rs->bitmap_mutex);
     rcu_read_lock();
     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
-        migration_bitmap_sync_range(block->offset, block->used_length);
+        migration_bitmap_sync_range(rs, block, 0, block->used_length);
     }
     rcu_read_unlock();
-    qemu_mutex_unlock(&migration_bitmap_mutex);
+    qemu_mutex_unlock(&rs->bitmap_mutex);
 
-    trace_migration_bitmap_sync_end(num_dirty_pages_period);
+    trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
 
     end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 
     /* more than 1 second = 1000 millisecons */
-    if (end_time > start_time + 1000) {
+    if (end_time > rs->time_last_bitmap_sync + 1000) {
         if (migrate_auto_converge()) {
             /* The following detection logic can be refined later. For now:
                Check to see if the dirtied bytes is 50% more than the approx.
@@ -659,94 +720,87 @@ static void migration_bitmap_sync(void)
                throttling */
             bytes_xfer_now = ram_bytes_transferred();
 
-            if (s->dirty_pages_rate &&
-               (num_dirty_pages_period * TARGET_PAGE_SIZE >
-                   (bytes_xfer_now - bytes_xfer_prev)/2) &&
-               (dirty_rate_high_cnt++ >= 2)) {
+            if (rs->dirty_pages_rate &&
+               (rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
+                   (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
+               (rs->dirty_rate_high_cnt++ >= 2)) {
                     trace_migration_throttle();
-                    dirty_rate_high_cnt = 0;
+                    rs->dirty_rate_high_cnt = 0;
                     mig_throttle_guest_down();
              }
-             bytes_xfer_prev = bytes_xfer_now;
+             rs->bytes_xfer_prev = bytes_xfer_now;
         }
 
         if (migrate_use_xbzrle()) {
-            if (iterations_prev != acct_info.iterations) {
-                acct_info.xbzrle_cache_miss_rate =
-                   (double)(acct_info.xbzrle_cache_miss -
-                            xbzrle_cache_miss_prev) /
-                   (acct_info.iterations - iterations_prev);
+            if (rs->iterations_prev != rs->iterations) {
+                rs->xbzrle_cache_miss_rate =
+                   (double)(rs->xbzrle_cache_miss -
+                            rs->xbzrle_cache_miss_prev) /
+                   (rs->iterations - rs->iterations_prev);
             }
-            iterations_prev = acct_info.iterations;
-            xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss;
+            rs->iterations_prev = rs->iterations;
+            rs->xbzrle_cache_miss_prev = rs->xbzrle_cache_miss;
         }
-        s->dirty_pages_rate = num_dirty_pages_period * 1000
-            / (end_time - start_time);
-        s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
-        start_time = end_time;
-        num_dirty_pages_period = 0;
+        rs->dirty_pages_rate = rs->num_dirty_pages_period * 1000
+            / (end_time - rs->time_last_bitmap_sync);
+        rs->time_last_bitmap_sync = end_time;
+        rs->num_dirty_pages_period = 0;
     }
-    s->dirty_sync_count = bitmap_sync_count;
     if (migrate_use_events()) {
-        qapi_event_send_migration_pass(bitmap_sync_count, NULL);
+        qapi_event_send_migration_pass(rs->bitmap_sync_count, NULL);
     }
 }
 
 /**
- * save_zero_page: Send the zero page to the stream
+ * save_zero_page: send the zero page to the stream
  *
- * Returns: Number of pages written.
+ * Returns the number of pages written.
  *
- * @f: QEMUFile where to send the data
+ * @rs: current RAM state
  * @block: block that contains the page we want to send
  * @offset: offset inside the block for the page
  * @p: pointer to the page
- * @bytes_transferred: increase it with the number of transferred bytes
  */
-static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
-                          uint8_t *p, uint64_t *bytes_transferred)
+static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
+                          uint8_t *p)
 {
     int pages = -1;
 
     if (is_zero_range(p, TARGET_PAGE_SIZE)) {
-        acct_info.dup_pages++;
-        *bytes_transferred += save_page_header(f, block,
-                                               offset | RAM_SAVE_FLAG_COMPRESS);
-        qemu_put_byte(f, 0);
-        *bytes_transferred += 1;
+        rs->zero_pages++;
+        rs->bytes_transferred +=
+            save_page_header(rs, block, offset | RAM_SAVE_FLAG_COMPRESS);
+        qemu_put_byte(rs->f, 0);
+        rs->bytes_transferred += 1;
         pages = 1;
     }
 
     return pages;
 }
 
-static void ram_release_pages(MigrationState *ms, const char *block_name,
-                              uint64_t offset, int pages)
+static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
 {
-    if (!migrate_release_ram() || !migration_in_postcopy(ms)) {
+    if (!migrate_release_ram() || !migration_in_postcopy()) {
         return;
     }
 
-    ram_discard_range(NULL, block_name, offset, pages << TARGET_PAGE_BITS);
+    ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS);
 }
 
 /**
- * ram_save_page: Send the given page to the stream
+ * ram_save_page: send the given page to the stream
  *
- * Returns: Number of pages written.
+ * Returns the number of pages written.
  *          < 0 - error
  *          >=0 - Number of pages written - this might legally be 0
  *                if xbzrle noticed the page was the same.
  *
- * @ms: The current migration state.
- * @f: QEMUFile where to send the data
+ * @rs: current RAM state
  * @block: block that contains the page we want to send
  * @offset: offset inside the block for the page
  * @last_stage: if we are at the completion stage
- * @bytes_transferred: increase it with the number of transferred bytes
  */
-static int ram_save_page(MigrationState *ms, QEMUFile *f, PageSearchStatus *pss,
-                         bool last_stage, uint64_t *bytes_transferred)
+static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
 {
     int pages = -1;
     uint64_t bytes_xmit;
@@ -755,16 +809,16 @@ static int ram_save_page(MigrationState *ms, QEMUFile *f, PageSearchStatus *pss,
     int ret;
     bool send_async = true;
     RAMBlock *block = pss->block;
-    ram_addr_t offset = pss->offset;
+    ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
 
     p = block->host + offset;
 
     /* In doubt sent page as normal */
     bytes_xmit = 0;
-    ret = ram_control_save_page(f, block->offset,
+    ret = ram_control_save_page(rs->f, block->offset,
                            offset, TARGET_PAGE_SIZE, &bytes_xmit);
     if (bytes_xmit) {
-        *bytes_transferred += bytes_xmit;
+        rs->bytes_transferred += bytes_xmit;
         pages = 1;
     }
 
@@ -772,29 +826,26 @@ static int ram_save_page(MigrationState *ms, QEMUFile *f, PageSearchStatus *pss,
 
     current_addr = block->offset + offset;
 
-    if (block == last_sent_block) {
-        offset |= RAM_SAVE_FLAG_CONTINUE;
-    }
     if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
         if (ret != RAM_SAVE_CONTROL_DELAYED) {
             if (bytes_xmit > 0) {
-                acct_info.norm_pages++;
+                rs->norm_pages++;
             } else if (bytes_xmit == 0) {
-                acct_info.dup_pages++;
+                rs->zero_pages++;
             }
         }
     } else {
-        pages = save_zero_page(f, block, offset, p, bytes_transferred);
+        pages = save_zero_page(rs, block, offset, p);
         if (pages > 0) {
             /* Must let xbzrle know, otherwise a previous (now 0'd) cached
              * page would be stale
              */
-            xbzrle_cache_zero_page(current_addr);
-            ram_release_pages(ms, block->idstr, pss->offset, pages);
-        } else if (!ram_bulk_stage &&
-                   !migration_in_postcopy(ms) && migrate_use_xbzrle()) {
-            pages = save_xbzrle_page(f, &p, current_addr, block,
-                                     offset, last_stage, bytes_transferred);
+            xbzrle_cache_zero_page(rs, current_addr);
+            ram_release_pages(block->idstr, offset, pages);
+        } else if (!rs->ram_bulk_stage &&
+                   !migration_in_postcopy() && migrate_use_xbzrle()) {
+            pages = save_xbzrle_page(rs, &p, current_addr, block,
+                                     offset, last_stage);
             if (!last_stage) {
                 /* Can't send this cached data async, since the cache page
                  * might get updated before it gets to the wire
@@ -806,18 +857,18 @@ static int ram_save_page(MigrationState *ms, QEMUFile *f, PageSearchStatus *pss,
 
     /* XBZRLE overflow or normal page */
     if (pages == -1) {
-        *bytes_transferred += save_page_header(f, block,
-                                               offset | RAM_SAVE_FLAG_PAGE);
+        rs->bytes_transferred += save_page_header(rs, block,
+                                                  offset | RAM_SAVE_FLAG_PAGE);
         if (send_async) {
-            qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE,
+            qemu_put_buffer_async(rs->f, p, TARGET_PAGE_SIZE,
                                   migrate_release_ram() &
-                                  migration_in_postcopy(ms));
+                                  migration_in_postcopy());
         } else {
-            qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
+            qemu_put_buffer(rs->f, p, TARGET_PAGE_SIZE);
         }
-        *bytes_transferred += TARGET_PAGE_SIZE;
+        rs->bytes_transferred += TARGET_PAGE_SIZE;
         pages = 1;
-        acct_info.norm_pages++;
+        rs->norm_pages++;
     }
 
     XBZRLE_cache_unlock();
@@ -828,10 +879,11 @@ static int ram_save_page(MigrationState *ms, QEMUFile *f, PageSearchStatus *pss,
 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
                                 ram_addr_t offset)
 {
+    RAMState *rs = &ram_state;
     int bytes_sent, blen;
     uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
 
-    bytes_sent = save_page_header(f, block, offset |
+    bytes_sent = save_page_header(rs, block, offset |
                                   RAM_SAVE_FLAG_COMPRESS_PAGE);
     blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
                                      migrate_compress_level());
@@ -841,16 +893,13 @@ static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
         error_report("compressed data failed!");
     } else {
         bytes_sent += blen;
-        ram_release_pages(migrate_get_current(), block->idstr,
-                          offset & TARGET_PAGE_MASK, 1);
+        ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
     }
 
     return bytes_sent;
 }
 
-static uint64_t bytes_transferred;
-
-static void flush_compressed_data(QEMUFile *f)
+static void flush_compressed_data(RAMState *rs)
 {
     int idx, len, thread_count;
 
@@ -870,8 +919,8 @@ static void flush_compressed_data(QEMUFile *f)
     for (idx = 0; idx < thread_count; idx++) {
         qemu_mutex_lock(&comp_param[idx].mutex);
         if (!comp_param[idx].quit) {
-            len = qemu_put_qemu_file(f, comp_param[idx].file);
-            bytes_transferred += len;
+            len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
+            rs->bytes_transferred += len;
         }
         qemu_mutex_unlock(&comp_param[idx].mutex);
     }
@@ -884,9 +933,8 @@ static inline void set_compress_params(CompressParam *param, RAMBlock *block,
     param->offset = offset;
 }
 
-static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block,
-                                           ram_addr_t offset,
-                                           uint64_t *bytes_transferred)
+static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
+                                           ram_addr_t offset)
 {
     int idx, thread_count, bytes_xmit = -1, pages = -1;
 
@@ -896,14 +944,14 @@ static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block,
         for (idx = 0; idx < thread_count; idx++) {
             if (comp_param[idx].done) {
                 comp_param[idx].done = false;
-                bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file);
+                bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
                 qemu_mutex_lock(&comp_param[idx].mutex);
                 set_compress_params(&comp_param[idx], block, offset);
                 qemu_cond_signal(&comp_param[idx].cond);
                 qemu_mutex_unlock(&comp_param[idx].mutex);
                 pages = 1;
-                acct_info.norm_pages++;
-                *bytes_transferred += bytes_xmit;
+                rs->norm_pages++;
+                rs->bytes_transferred += bytes_xmit;
                 break;
             }
         }
@@ -921,40 +969,37 @@ static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block,
 /**
  * ram_save_compressed_page: compress the given page and send it to the stream
  *
- * Returns: Number of pages written.
+ * Returns the number of pages written.
  *
- * @ms: The current migration state.
- * @f: QEMUFile where to send the data
+ * @rs: current RAM state
  * @block: block that contains the page we want to send
  * @offset: offset inside the block for the page
  * @last_stage: if we are at the completion stage
- * @bytes_transferred: increase it with the number of transferred bytes
  */
-static int ram_save_compressed_page(MigrationState *ms, QEMUFile *f,
-                                    PageSearchStatus *pss, bool last_stage,
-                                    uint64_t *bytes_transferred)
+static int ram_save_compressed_page(RAMState *rs, PageSearchStatus *pss,
+                                    bool last_stage)
 {
     int pages = -1;
     uint64_t bytes_xmit = 0;
     uint8_t *p;
     int ret, blen;
     RAMBlock *block = pss->block;
-    ram_addr_t offset = pss->offset;
+    ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
 
     p = block->host + offset;
 
-    ret = ram_control_save_page(f, block->offset,
+    ret = ram_control_save_page(rs->f, block->offset,
                                 offset, TARGET_PAGE_SIZE, &bytes_xmit);
     if (bytes_xmit) {
-        *bytes_transferred += bytes_xmit;
+        rs->bytes_transferred += bytes_xmit;
         pages = 1;
     }
     if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
         if (ret != RAM_SAVE_CONTROL_DELAYED) {
             if (bytes_xmit > 0) {
-                acct_info.norm_pages++;
+                rs->norm_pages++;
             } else if (bytes_xmit == 0) {
-                acct_info.dup_pages++;
+                rs->zero_pages++;
             }
         }
     } else {
@@ -964,35 +1009,33 @@ static int ram_save_compressed_page(MigrationState *ms, QEMUFile *f,
          * out, keeping this order is important, because the 'cont' flag
          * is used to avoid resending the block name.
          */
-        if (block != last_sent_block) {
-            flush_compressed_data(f);
-            pages = save_zero_page(f, block, offset, p, bytes_transferred);
+        if (block != rs->last_sent_block) {
+            flush_compressed_data(rs);
+            pages = save_zero_page(rs, block, offset, p);
             if (pages == -1) {
                 /* Make sure the first page is sent out before other pages */
-                bytes_xmit = save_page_header(f, block, offset |
+                bytes_xmit = save_page_header(rs, block, offset |
                                               RAM_SAVE_FLAG_COMPRESS_PAGE);
-                blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
+                blen = qemu_put_compression_data(rs->f, p, TARGET_PAGE_SIZE,
                                                  migrate_compress_level());
                 if (blen > 0) {
-                    *bytes_transferred += bytes_xmit + blen;
-                    acct_info.norm_pages++;
+                    rs->bytes_transferred += bytes_xmit + blen;
+                    rs->norm_pages++;
                     pages = 1;
                 } else {
-                    qemu_file_set_error(f, blen);
+                    qemu_file_set_error(rs->f, blen);
                     error_report("compressed data failed!");
                 }
             }
             if (pages > 0) {
-                ram_release_pages(ms, block->idstr, pss->offset, pages);
+                ram_release_pages(block->idstr, offset, pages);
             }
         } else {
-            offset |= RAM_SAVE_FLAG_CONTINUE;
-            pages = save_zero_page(f, block, offset, p, bytes_transferred);
+            pages = save_zero_page(rs, block, offset, p);
             if (pages == -1) {
-                pages = compress_page_with_multi_thread(f, block, offset,
-                                                        bytes_transferred);
+                pages = compress_page_with_multi_thread(rs, block, offset);
             } else {
-                ram_release_pages(ms, block->idstr, pss->offset, pages);
+                ram_release_pages(block->idstr, offset, pages);
             }
         }
     }
@@ -1000,25 +1043,21 @@ static int ram_save_compressed_page(MigrationState *ms, QEMUFile *f,
     return pages;
 }
 
-/*
- * Find the next dirty page and update any state associated with
- * the search process.
+/**
+ * find_dirty_block: find the next dirty page and update any state
+ * associated with the search process.
  *
- * Returns: True if a page is found
+ * Returns if a page is found
  *
- * @f: Current migration stream.
- * @pss: Data about the state of the current dirty page scan.
- * @*again: Set to false if the search has scanned the whole of RAM
- * *ram_addr_abs: Pointer into which to store the address of the dirty page
- *               within the global ram_addr space
+ * @rs: current RAM state
+ * @pss: data about the state of the current dirty page scan
+ * @again: set to false if the search has scanned the whole of RAM
  */
-static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss,
-                             bool *again, ram_addr_t *ram_addr_abs)
+static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
 {
-    pss->offset = migration_bitmap_find_dirty(pss->block, pss->offset,
-                                              ram_addr_abs);
-    if (pss->complete_round && pss->block == last_seen_block &&
-        pss->offset >= last_offset) {
+    pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
+    if (pss->complete_round && pss->block == rs->last_seen_block &&
+        pss->page >= rs->last_page) {
         /*
          * We've been once around the RAM and haven't found anything.
          * Give up.
@@ -1026,22 +1065,21 @@ static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss,
         *again = false;
         return false;
     }
-    if (pss->offset >= pss->block->used_length) {
+    if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) {
         /* Didn't find anything in this RAM Block */
-        pss->offset = 0;
+        pss->page = 0;
         pss->block = QLIST_NEXT_RCU(pss->block, next);
         if (!pss->block) {
             /* Hit the end of the list */
             pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
             /* Flag that we've looped */
             pss->complete_round = true;
-            ram_bulk_stage = false;
+            rs->ram_bulk_stage = false;
             if (migrate_use_xbzrle()) {
                 /* If xbzrle is on, stop using the data compression at this
                  * point. In theory, xbzrle can do better than compression.
                  */
-                flush_compressed_data(f);
-                compression_switch = false;
+                flush_compressed_data(rs);
             }
         }
         /* Didn't find anything this time, but try again on the new block */
@@ -1055,61 +1093,59 @@ static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss,
     }
 }
 
-/*
+/**
+ * unqueue_page: gets a page of the queue
+ *
  * Helper for 'get_queued_page' - gets a page off the queue
- *      ms:      MigrationState in
- * *offset:      Used to return the offset within the RAMBlock
- * ram_addr_abs: global offset in the dirty/sent bitmaps
  *
- * Returns:      block (or NULL if none available)
+ * Returns the block of the page (or NULL if none available)
+ *
+ * @rs: current RAM state
+ * @offset: used to return the offset within the RAMBlock
  */
-static RAMBlock *unqueue_page(MigrationState *ms, ram_addr_t *offset,
-                              ram_addr_t *ram_addr_abs)
+static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
 {
     RAMBlock *block = NULL;
 
-    qemu_mutex_lock(&ms->src_page_req_mutex);
-    if (!QSIMPLEQ_EMPTY(&ms->src_page_requests)) {
-        struct MigrationSrcPageRequest *entry =
-                                QSIMPLEQ_FIRST(&ms->src_page_requests);
+    qemu_mutex_lock(&rs->src_page_req_mutex);
+    if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
+        struct RAMSrcPageRequest *entry =
+                                QSIMPLEQ_FIRST(&rs->src_page_requests);
         block = entry->rb;
         *offset = entry->offset;
-        *ram_addr_abs = (entry->offset + entry->rb->offset) &
-                        TARGET_PAGE_MASK;
 
         if (entry->len > TARGET_PAGE_SIZE) {
             entry->len -= TARGET_PAGE_SIZE;
             entry->offset += TARGET_PAGE_SIZE;
         } else {
             memory_region_unref(block->mr);
-            QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
+            QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
             g_free(entry);
         }
     }
-    qemu_mutex_unlock(&ms->src_page_req_mutex);
+    qemu_mutex_unlock(&rs->src_page_req_mutex);
 
     return block;
 }
 
-/*
- * Unqueue a page from the queue fed by postcopy page requests; skips pages
- * that are already sent (!dirty)
+/**
+ * get_queued_page: unqueue a page from the postocpy requests
+ *
+ * Skips pages that are already sent (!dirty)
  *
- *      ms:      MigrationState in
- *     pss:      PageSearchStatus structure updated with found block/offset
- * ram_addr_abs: global offset in the dirty/sent bitmaps
+ * Returns if a queued page is found
  *
- * Returns:      true if a queued page is found
+ * @rs: current RAM state
+ * @pss: data about the state of the current dirty page scan
  */
-static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss,
-                            ram_addr_t *ram_addr_abs)
+static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
 {
     RAMBlock  *block;
     ram_addr_t offset;
     bool dirty;
 
     do {
-        block = unqueue_page(ms, &offset, ram_addr_abs);
+        block = unqueue_page(rs, &offset);
         /*
          * We're sending this page, and since it's postcopy nothing else
          * will dirty it, and we must make sure it doesn't get sent again
@@ -1118,18 +1154,18 @@ static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss,
          */
         if (block) {
             unsigned long *bitmap;
-            bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
-            dirty = test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, bitmap);
+            unsigned long page;
+
+            bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
+            page = (block->offset + offset) >> TARGET_PAGE_BITS;
+            dirty = test_bit(page, bitmap);
             if (!dirty) {
-                trace_get_queued_page_not_dirty(
-                    block->idstr, (uint64_t)offset,
-                    (uint64_t)*ram_addr_abs,
-                    test_bit(*ram_addr_abs >> TARGET_PAGE_BITS,
-                         atomic_rcu_read(&migration_bitmap_rcu)->unsentmap));
+                trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
+                    page,
+                    test_bit(page,
+                             atomic_rcu_read(&rs->ram_bitmap)->unsentmap));
             } else {
-                trace_get_queued_page(block->idstr,
-                                      (uint64_t)offset,
-                                      (uint64_t)*ram_addr_abs);
+                trace_get_queued_page(block->idstr, (uint64_t)offset, page);
             }
         }
 
@@ -1142,7 +1178,7 @@ static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss,
          * in (migration_bitmap_find_and_reset_dirty) that every page is
          * dirty, that's no longer true.
          */
-        ram_bulk_stage = false;
+        rs->ram_bulk_stage = false;
 
         /*
          * We want the background search to continue from the queued page
@@ -1150,52 +1186,58 @@ static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss,
          * it just requested.
          */
         pss->block = block;
-        pss->offset = offset;
+        pss->page = offset >> TARGET_PAGE_BITS;
     }
 
     return !!block;
 }
 
 /**
- * flush_page_queue: Flush any remaining pages in the ram request queue
- *    it should be empty at the end anyway, but in error cases there may be
- *    some left.
+ * migration_page_queue_free: drop any remaining pages in the ram
+ * request queue
+ *
+ * It should be empty at the end anyway, but in error cases there may
+ * be some left.  in case that there is any page left, we drop it.
  *
- * ms: MigrationState
  */
-void flush_page_queue(MigrationState *ms)
+void migration_page_queue_free(void)
 {
-    struct MigrationSrcPageRequest *mspr, *next_mspr;
+    struct RAMSrcPageRequest *mspr, *next_mspr;
+    RAMState *rs = &ram_state;
     /* This queue generally should be empty - but in the case of a failed
      * migration might have some droppings in.
      */
     rcu_read_lock();
-    QSIMPLEQ_FOREACH_SAFE(mspr, &ms->src_page_requests, next_req, next_mspr) {
+    QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
         memory_region_unref(mspr->rb->mr);
-        QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
+        QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
         g_free(mspr);
     }
     rcu_read_unlock();
 }
 
 /**
- * Queue the pages for transmission, e.g. a request from postcopy destination
- *   ms: MigrationStatus in which the queue is held
- *   rbname: The RAMBlock the request is for - may be NULL (to mean reuse last)
- *   start: Offset from the start of the RAMBlock
- *   len: Length (in bytes) to send
- *   Return: 0 on success
+ * ram_save_queue_pages: queue the page for transmission
+ *
+ * A request from postcopy destination for example.
+ *
+ * Returns zero on success or negative on error
+ *
+ * @rbname: Name of the RAMBLock of the request. NULL means the
+ *          same that last one.
+ * @start: starting address from the start of the RAMBlock
+ * @len: length (in bytes) to send
  */
-int ram_save_queue_pages(MigrationState *ms, const char *rbname,
-                         ram_addr_t start, ram_addr_t len)
+int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
 {
     RAMBlock *ramblock;
+    RAMState *rs = &ram_state;
 
-    ms->postcopy_requests++;
+    rs->postcopy_requests++;
     rcu_read_lock();
     if (!rbname) {
         /* Reuse last RAMBlock */
-        ramblock = ms->last_req_rb;
+        ramblock = rs->last_req_rb;
 
         if (!ramblock) {
             /*
@@ -1213,7 +1255,7 @@ int ram_save_queue_pages(MigrationState *ms, const char *rbname,
             error_report("ram_save_queue_pages no block '%s'", rbname);
             goto err;
         }
-        ms->last_req_rb = ramblock;
+        rs->last_req_rb = ramblock;
     }
     trace_ram_save_queue_pages(ramblock->idstr, start, len);
     if (start+len > ramblock->used_length) {
@@ -1223,16 +1265,16 @@ int ram_save_queue_pages(MigrationState *ms, const char *rbname,
         goto err;
     }
 
-    struct MigrationSrcPageRequest *new_entry =
-        g_malloc0(sizeof(struct MigrationSrcPageRequest));
+    struct RAMSrcPageRequest *new_entry =
+        g_malloc0(sizeof(struct RAMSrcPageRequest));
     new_entry->rb = ramblock;
     new_entry->offset = start;
     new_entry->len = len;
 
     memory_region_ref(ramblock->mr);
-    qemu_mutex_lock(&ms->src_page_req_mutex);
-    QSIMPLEQ_INSERT_TAIL(&ms->src_page_requests, new_entry, next_req);
-    qemu_mutex_unlock(&ms->src_page_req_mutex);
+    qemu_mutex_lock(&rs->src_page_req_mutex);
+    QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
+    qemu_mutex_unlock(&rs->src_page_req_mutex);
     rcu_read_unlock();
 
     return 0;
@@ -1243,51 +1285,43 @@ err:
 }
 
 /**
- * ram_save_target_page: Save one target page
+ * ram_save_target_page: save one target page
  *
+ * Returns the number of pages written
  *
- * @f: QEMUFile where to send the data
- * @block: pointer to block that contains the page we want to send
- * @offset: offset inside the block for the page;
+ * @rs: current RAM state
+ * @ms: current migration state
+ * @pss: data about the page we want to send
  * @last_stage: if we are at the completion stage
- * @bytes_transferred: increase it with the number of transferred bytes
- * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
- *
- * Returns: Number of pages written.
  */
-static int ram_save_target_page(MigrationState *ms, QEMUFile *f,
-                                PageSearchStatus *pss,
-                                bool last_stage,
-                                uint64_t *bytes_transferred,
-                                ram_addr_t dirty_ram_abs)
+static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
+                                bool last_stage)
 {
     int res = 0;
 
     /* Check the pages is dirty and if it is send it */
-    if (migration_bitmap_clear_dirty(dirty_ram_abs)) {
+    if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
         unsigned long *unsentmap;
-        if (compression_switch && migrate_use_compression()) {
-            res = ram_save_compressed_page(ms, f, pss,
-                                           last_stage,
-                                           bytes_transferred);
+        /*
+         * If xbzrle is on, stop using the data compression after first
+         * round of migration even if compression is enabled. In theory,
+         * xbzrle can do better than compression.
+         */
+        unsigned long page =
+            (pss->block->offset >> TARGET_PAGE_BITS) + pss->page;
+        if (migrate_use_compression()
+            && (rs->ram_bulk_stage || !migrate_use_xbzrle())) {
+            res = ram_save_compressed_page(rs, pss, last_stage);
         } else {
-            res = ram_save_page(ms, f, pss, last_stage,
-                                bytes_transferred);
+            res = ram_save_page(rs, pss, last_stage);
         }
 
         if (res < 0) {
             return res;
         }
-        unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
+        unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap;
         if (unsentmap) {
-            clear_bit(dirty_ram_abs >> TARGET_PAGE_BITS, unsentmap);
-        }
-        /* Only update last_sent_block if a block was actually sent; xbzrle
-         * might have decided the page was identical so didn't bother writing
-         * to the stream.
-         */
-        if (res > 0) {
-            last_sent_block = pss->block;
+            clear_bit(page, unsentmap);
         }
     }
 
@@ -1295,83 +1329,70 @@ static int ram_save_target_page(MigrationState *ms, QEMUFile *f,
 }
 
 /**
- * ram_save_host_page: Starting at *offset send pages up to the end
- *                     of the current host page.  It's valid for the initial
- *                     offset to point into the middle of a host page
- *                     in which case the remainder of the hostpage is sent.
- *                     Only dirty target pages are sent.
- *                     Note that the host page size may be a huge page for this
- *                     block.
+ * ram_save_host_page: save a whole host page
  *
- * Returns: Number of pages written.
+ * Starting at *offset send pages up to the end of the current host
+ * page. It's valid for the initial offset to point into the middle of
+ * a host page in which case the remainder of the hostpage is sent.
+ * Only dirty target pages are sent. Note that the host page size may
+ * be a huge page for this block.
  *
- * @f: QEMUFile where to send the data
- * @block: pointer to block that contains the page we want to send
- * @offset: offset inside the block for the page; updated to last target page
- *          sent
+ * Returns the number of pages written or negative on error
+ *
+ * @rs: current RAM state
+ * @ms: current migration state
+ * @pss: data about the page we want to send
  * @last_stage: if we are at the completion stage
- * @bytes_transferred: increase it with the number of transferred bytes
- * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
  */
-static int ram_save_host_page(MigrationState *ms, QEMUFile *f,
-                              PageSearchStatus *pss,
-                              bool last_stage,
-                              uint64_t *bytes_transferred,
-                              ram_addr_t dirty_ram_abs)
+static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
+                              bool last_stage)
 {
     int tmppages, pages = 0;
-    size_t pagesize = qemu_ram_pagesize(pss->block);
+    size_t pagesize_bits =
+        qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
 
     do {
-        tmppages = ram_save_target_page(ms, f, pss, last_stage,
-                                        bytes_transferred, dirty_ram_abs);
+        tmppages = ram_save_target_page(rs, pss, last_stage);
         if (tmppages < 0) {
             return tmppages;
         }
 
         pages += tmppages;
-        pss->offset += TARGET_PAGE_SIZE;
-        dirty_ram_abs += TARGET_PAGE_SIZE;
-    } while (pss->offset & (pagesize - 1));
+        pss->page++;
+    } while (pss->page & (pagesize_bits - 1));
 
     /* The offset we leave with is the last one we looked at */
-    pss->offset -= TARGET_PAGE_SIZE;
+    pss->page--;
     return pages;
 }
 
 /**
- * ram_find_and_save_block: Finds a dirty page and sends it to f
+ * ram_find_and_save_block: finds a dirty page and sends it to f
  *
  * Called within an RCU critical section.
  *
- * Returns:  The number of pages written
- *           0 means no dirty pages
+ * Returns the number of pages written where zero means no dirty pages
  *
- * @f: QEMUFile where to send the data
+ * @rs: current RAM state
  * @last_stage: if we are at the completion stage
- * @bytes_transferred: increase it with the number of transferred bytes
  *
  * On systems where host-page-size > target-page-size it will send all the
  * pages in a host page that are dirty.
  */
 
-static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
-                                   uint64_t *bytes_transferred)
+static int ram_find_and_save_block(RAMState *rs, bool last_stage)
 {
     PageSearchStatus pss;
-    MigrationState *ms = migrate_get_current();
     int pages = 0;
     bool again, found;
-    ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
-                                 ram_addr_t space */
 
     /* No dirty page as there is zero RAM */
     if (!ram_bytes_total()) {
         return pages;
     }
 
-    pss.block = last_seen_block;
-    pss.offset = last_offset;
+    pss.block = rs->last_seen_block;
+    pss.page = rs->last_page;
     pss.complete_round = false;
 
     if (!pss.block) {
@@ -1380,22 +1401,20 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
 
     do {
         again = true;
-        found = get_queued_page(ms, &pss, &dirty_ram_abs);
+        found = get_queued_page(rs, &pss);
 
         if (!found) {
             /* priority queue empty, so just search for something dirty */
-            found = find_dirty_block(f, &pss, &again, &dirty_ram_abs);
+            found = find_dirty_block(rs, &pss, &again);
         }
 
         if (found) {
-            pages = ram_save_host_page(ms, f, &pss,
-                                       last_stage, bytes_transferred,
-                                       dirty_ram_abs);
+            pages = ram_save_host_page(rs, &pss, last_stage);
         }
     } while (!pages && again);
 
-    last_seen_block = pss.block;
-    last_offset = pss.offset;
+    rs->last_seen_block = pss.block;
+    rs->last_page = pss.page;
 
     return pages;
 }
@@ -1403,30 +1422,17 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
 void acct_update_position(QEMUFile *f, size_t size, bool zero)
 {
     uint64_t pages = size / TARGET_PAGE_SIZE;
+    RAMState *rs = &ram_state;
+
     if (zero) {
-        acct_info.dup_pages += pages;
+        rs->zero_pages += pages;
     } else {
-        acct_info.norm_pages += pages;
-        bytes_transferred += size;
+        rs->norm_pages += pages;
+        rs->bytes_transferred += size;
         qemu_update_position(f, size);
     }
 }
 
-static ram_addr_t ram_save_remaining(void)
-{
-    return migration_dirty_pages;
-}
-
-uint64_t ram_bytes_remaining(void)
-{
-    return ram_save_remaining() * TARGET_PAGE_SIZE;
-}
-
-uint64_t ram_bytes_transferred(void)
-{
-    return bytes_transferred;
-}
-
 uint64_t ram_bytes_total(void)
 {
     RAMBlock *block;
@@ -1445,7 +1451,7 @@ void free_xbzrle_decoded_buf(void)
     xbzrle_decoded_buf = NULL;
 }
 
-static void migration_bitmap_free(struct BitmapRcu *bmap)
+static void migration_bitmap_free(RAMBitmap *bmap)
 {
     g_free(bmap->bmap);
     g_free(bmap->unsentmap);
@@ -1454,11 +1460,13 @@ static void migration_bitmap_free(struct BitmapRcu *bmap)
 
 static void ram_migration_cleanup(void *opaque)
 {
+    RAMState *rs = opaque;
+
     /* caller have hold iothread lock or is in a bh, so there is
      * no writing race against this migration_bitmap
      */
-    struct BitmapRcu *bitmap = migration_bitmap_rcu;
-    atomic_rcu_set(&migration_bitmap_rcu, NULL);
+    RAMBitmap *bitmap = rs->ram_bitmap;
+    atomic_rcu_set(&rs->ram_bitmap, NULL);
     if (bitmap) {
         memory_global_dirty_log_stop();
         call_rcu(bitmap, migration_bitmap_free, rcu);
@@ -1477,49 +1485,17 @@ static void ram_migration_cleanup(void *opaque)
     XBZRLE_cache_unlock();
 }
 
-static void reset_ram_globals(void)
+static void ram_state_reset(RAMState *rs)
 {
-    last_seen_block = NULL;
-    last_sent_block = NULL;
-    last_offset = 0;
-    last_version = ram_list.version;
-    ram_bulk_stage = true;
+    rs->last_seen_block = NULL;
+    rs->last_sent_block = NULL;
+    rs->last_page = 0;
+    rs->last_version = ram_list.version;
+    rs->ram_bulk_stage = true;
 }
 
 #define MAX_WAIT 50 /* ms, half buffered_file limit */
 
-void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
-{
-    /* called in qemu main thread, so there is
-     * no writing race against this migration_bitmap
-     */
-    if (migration_bitmap_rcu) {
-        struct BitmapRcu *old_bitmap = migration_bitmap_rcu, *bitmap;
-        bitmap = g_new(struct BitmapRcu, 1);
-        bitmap->bmap = bitmap_new(new);
-
-        /* prevent migration_bitmap content from being set bit
-         * by migration_bitmap_sync_range() at the same time.
-         * it is safe to migration if migration_bitmap is cleared bit
-         * at the same time.
-         */
-        qemu_mutex_lock(&migration_bitmap_mutex);
-        bitmap_copy(bitmap->bmap, old_bitmap->bmap, old);
-        bitmap_set(bitmap->bmap, old, new - old);
-
-        /* We don't have a way to safely extend the sentmap
-         * with RCU; so mark it as missing, entry to postcopy
-         * will fail.
-         */
-        bitmap->unsentmap = NULL;
-
-        atomic_rcu_set(&migration_bitmap_rcu, bitmap);
-        qemu_mutex_unlock(&migration_bitmap_mutex);
-        migration_dirty_pages += new - old;
-        call_rcu(old_bitmap, migration_bitmap_free, rcu);
-    }
-}
-
 /*
  * 'expected' is the value you expect the bitmap mostly to be full
  * of; it won't bother printing lines that are all this value.
@@ -1527,14 +1503,14 @@ void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
  */
 void ram_debug_dump_bitmap(unsigned long *todump, bool expected)
 {
-    int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
-
+    unsigned long ram_pages = last_ram_page();
+    RAMState *rs = &ram_state;
     int64_t cur;
     int64_t linelen = 128;
     char linebuf[129];
 
     if (!todump) {
-        todump = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
+        todump = atomic_rcu_read(&rs->ram_bitmap)->bmap;
     }
 
     for (cur = 0; cur < ram_pages; cur += linelen) {
@@ -1563,8 +1539,9 @@ void ram_debug_dump_bitmap(unsigned long *todump, bool expected)
 
 void ram_postcopy_migrated_memory_release(MigrationState *ms)
 {
+    RAMState *rs = &ram_state;
     struct RAMBlock *block;
-    unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
+    unsigned long *bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
 
     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
         unsigned long first = block->offset >> TARGET_PAGE_BITS;
@@ -1573,30 +1550,38 @@ void ram_postcopy_migrated_memory_release(MigrationState *ms)
 
         while (run_start < range) {
             unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
-            ram_discard_range(NULL, block->idstr, run_start << TARGET_PAGE_BITS,
+            ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS,
                               (run_end - run_start) << TARGET_PAGE_BITS);
             run_start = find_next_zero_bit(bitmap, range, run_end + 1);
         }
     }
 }
 
-/*
+/**
+ * postcopy_send_discard_bm_ram: discard a RAMBlock
+ *
+ * Returns zero on success
+ *
  * Callback from postcopy_each_ram_send_discard for each RAMBlock
  * Note: At this point the 'unsentmap' is the processed bitmap combined
  *       with the dirtymap; so a '1' means it's either dirty or unsent.
- * start,length: Indexes into the bitmap for the first bit
- *            representing the named block and length in target-pages
+ *
+ * @ms: current migration state
+ * @pds: state for postcopy
+ * @start: RAMBlock starting page
+ * @length: RAMBlock size
  */
 static int postcopy_send_discard_bm_ram(MigrationState *ms,
                                         PostcopyDiscardState *pds,
                                         unsigned long start,
                                         unsigned long length)
 {
+    RAMState *rs = &ram_state;
     unsigned long end = start + length; /* one after the end */
     unsigned long current;
     unsigned long *unsentmap;
 
-    unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
+    unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap;
     for (current = start; current < end; ) {
         unsigned long one = find_next_bit(unsentmap, end, current);
 
@@ -1621,13 +1606,18 @@ static int postcopy_send_discard_bm_ram(MigrationState *ms,
     return 0;
 }
 
-/*
+/**
+ * postcopy_each_ram_send_discard: discard all RAMBlocks
+ *
+ * Returns 0 for success or negative for error
+ *
  * Utility for the outgoing postcopy code.
  *   Calls postcopy_send_discard_bm_ram for each RAMBlock
  *   passing it bitmap indexes and name.
- * Returns: 0 on success
  * (qemu_ram_foreach_block ends up passing unscaled lengths
  *  which would mean postcopy code would have to deal with target page)
+ *
+ * @ms: current migration state
  */
 static int postcopy_each_ram_send_discard(MigrationState *ms)
 {
@@ -1656,22 +1646,27 @@ static int postcopy_each_ram_send_discard(MigrationState *ms)
     return 0;
 }
 
-/*
- * Helper for postcopy_chunk_hostpages; it's called twice to cleanup
- *   the two bitmaps, that are similar, but one is inverted.
+/**
+ * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
+ *
+ * Helper for postcopy_chunk_hostpages; it's called twice to
+ * canonicalize the two bitmaps, that are similar, but one is
+ * inverted.
  *
- * We search for runs of target-pages that don't start or end on a
- * host page boundary;
- * unsent_pass=true: Cleans up partially unsent host pages by searching
- *                 the unsentmap
- * unsent_pass=false: Cleans up partially dirty host pages by searching
- *                 the main migration bitmap
+ * Postcopy requires that all target pages in a hostpage are dirty or
+ * clean, not a mix.  This function canonicalizes the bitmaps.
  *
+ * @ms: current migration state
+ * @unsent_pass: if true we need to canonicalize partially unsent host pages
+ *               otherwise we need to canonicalize partially dirty host pages
+ * @block: block that contains the page we want to canonicalize
+ * @pds: state for postcopy
  */
 static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
                                           RAMBlock *block,
                                           PostcopyDiscardState *pds)
 {
+    RAMState *rs = &ram_state;
     unsigned long *bitmap;
     unsigned long *unsentmap;
     unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
@@ -1685,8 +1680,8 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
         return;
     }
 
-    bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
-    unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
+    bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
+    unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap;
 
     if (unsent_pass) {
         /* Find a sent page */
@@ -1769,7 +1764,7 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
                  * Remark them as dirty, updating the count for any pages
                  * that weren't previously dirty.
                  */
-                migration_dirty_pages += !test_and_set_bit(page, bitmap);
+                rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
             }
         }
 
@@ -1784,23 +1779,28 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
     }
 }
 
-/*
+/**
+ * postcopy_chuck_hostpages: discrad any partially sent host page
+ *
  * Utility for the outgoing postcopy code.
  *
  * Discard any partially sent host-page size chunks, mark any partially
  * dirty host-page size chunks as all dirty.  In this case the host-page
  * is the host-page for the particular RAMBlock, i.e. it might be a huge page
  *
- * Returns: 0 on success
+ * Returns zero on success
+ *
+ * @ms: current migration state
  */
 static int postcopy_chunk_hostpages(MigrationState *ms)
 {
+    RAMState *rs = &ram_state;
     struct RAMBlock *block;
 
     /* Easiest way to make sure we don't resume in the middle of a host-page */
-    last_seen_block = NULL;
-    last_sent_block = NULL;
-    last_offset     = 0;
+    rs->last_seen_block = NULL;
+    rs->last_sent_block = NULL;
+    rs->last_page = 0;
 
     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
         unsigned long first = block->offset >> TARGET_PAGE_BITS;
@@ -1822,7 +1822,11 @@ static int postcopy_chunk_hostpages(MigrationState *ms)
     return 0;
 }
 
-/*
+/**
+ * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
+ *
+ * Returns zero on success
+ *
  * Transmit the set of pages to be discarded after precopy to the target
  * these are pages that:
  *     a) Have been previously transmitted but are now dirty again
@@ -1830,18 +1834,21 @@ static int postcopy_chunk_hostpages(MigrationState *ms)
  *        any pages on the destination that have been mapped by background
  *        tasks get discarded (transparent huge pages is the specific concern)
  * Hopefully this is pretty sparse
+ *
+ * @ms: current migration state
  */
 int ram_postcopy_send_discard_bitmap(MigrationState *ms)
 {
+    RAMState *rs = &ram_state;
     int ret;
     unsigned long *bitmap, *unsentmap;
 
     rcu_read_lock();
 
     /* This should be our last sync, the src is now paused */
-    migration_bitmap_sync();
+    migration_bitmap_sync(rs);
 
-    unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
+    unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap;
     if (!unsentmap) {
         /* We don't have a safe way to resize the sentmap, so
          * if the bitmap was resized it will be NULL at this
@@ -1862,9 +1869,8 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms)
     /*
      * Update the unsentmap to be unsentmap = unsentmap | dirty
      */
-    bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
-    bitmap_or(unsentmap, unsentmap, bitmap,
-               last_ram_offset() >> TARGET_PAGE_BITS);
+    bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
+    bitmap_or(unsentmap, unsentmap, bitmap, last_ram_page());
 
 
     trace_ram_postcopy_send_discard_bitmap();
@@ -1878,28 +1884,27 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms)
     return ret;
 }
 
-/*
- * At the start of the postcopy phase of migration, any now-dirty
- * precopied pages are discarded.
+/**
+ * ram_discard_range: discard dirtied pages at the beginning of postcopy
  *
- * start, length describe a byte address range within the RAMBlock
+ * Returns zero on success
  *
- * Returns 0 on success.
+ * @rbname: name of the RAMBlock of the request. NULL means the
+ *          same that last one.
+ * @start: RAMBlock starting page
+ * @length: RAMBlock size
  */
-int ram_discard_range(MigrationIncomingState *mis,
-                      const char *block_name,
-                      uint64_t start, size_t length)
+int ram_discard_range(const char *rbname, uint64_t start, size_t length)
 {
     int ret = -1;
 
-    trace_ram_discard_range(block_name, start, length);
+    trace_ram_discard_range(rbname, start, length);
 
     rcu_read_lock();
-    RAMBlock *rb = qemu_ram_block_by_name(block_name);
+    RAMBlock *rb = qemu_ram_block_by_name(rbname);
 
     if (!rb) {
-        error_report("ram_discard_range: Failed to find block '%s'",
-                     block_name);
+        error_report("ram_discard_range: Failed to find block '%s'", rbname);
         goto err;
     }
 
@@ -1911,14 +1916,14 @@ err:
     return ret;
 }
 
-static int ram_save_init_globals(void)
+static int ram_state_init(RAMState *rs)
 {
-    int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
+    unsigned long ram_bitmap_pages;
 
-    dirty_rate_high_cnt = 0;
-    bitmap_sync_count = 0;
-    migration_bitmap_sync_init();
-    qemu_mutex_init(&migration_bitmap_mutex);
+    memset(rs, 0, sizeof(*rs));
+    qemu_mutex_init(&rs->bitmap_mutex);
+    qemu_mutex_init(&rs->src_page_req_mutex);
+    QSIMPLEQ_INIT(&rs->src_page_requests);
 
     if (migrate_use_xbzrle()) {
         XBZRLE_cache_lock();
@@ -1947,8 +1952,6 @@ static int ram_save_init_globals(void)
             XBZRLE.encoded_buf = NULL;
             return -1;
         }
-
-        acct_clear();
     }
 
     /* For memory_global_dirty_log_start below.  */
@@ -1956,19 +1959,18 @@ static int ram_save_init_globals(void)
 
     qemu_mutex_lock_ramlist();
     rcu_read_lock();
-    bytes_transferred = 0;
-    reset_ram_globals();
+    ram_state_reset(rs);
 
-    migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
+    rs->ram_bitmap = g_new0(RAMBitmap, 1);
     /* Skip setting bitmap if there is no RAM */
     if (ram_bytes_total()) {
-        ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
-        migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
-        bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
+        ram_bitmap_pages = last_ram_page();
+        rs->ram_bitmap->bmap = bitmap_new(ram_bitmap_pages);
+        bitmap_set(rs->ram_bitmap->bmap, 0, ram_bitmap_pages);
 
         if (migrate_postcopy_ram()) {
-            migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
-            bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
+            rs->ram_bitmap->unsentmap = bitmap_new(ram_bitmap_pages);
+            bitmap_set(rs->ram_bitmap->unsentmap, 0, ram_bitmap_pages);
         }
     }
 
@@ -1976,10 +1978,10 @@ static int ram_save_init_globals(void)
      * Count the total number of pages used by ram blocks not including any
      * gaps due to alignment or unplugs.
      */
-    migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
+    rs->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
 
     memory_global_dirty_log_start();
-    migration_bitmap_sync();
+    migration_bitmap_sync(rs);
     qemu_mutex_unlock_ramlist();
     qemu_mutex_unlock_iothread();
     rcu_read_unlock();
@@ -1987,22 +1989,33 @@ static int ram_save_init_globals(void)
     return 0;
 }
 
-/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
+/*
+ * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
  * long-running RCU critical section.  When rcu-reclaims in the code
  * start to become numerous it will be necessary to reduce the
  * granularity of these critical sections.
  */
 
+/**
+ * ram_save_setup: Setup RAM for migration
+ *
+ * Returns zero to indicate success and negative for error
+ *
+ * @f: QEMUFile where to send the data
+ * @opaque: RAMState pointer
+ */
 static int ram_save_setup(QEMUFile *f, void *opaque)
 {
+    RAMState *rs = opaque;
     RAMBlock *block;
 
     /* migration has already setup the bitmap, reuse it. */
     if (!migration_in_colo_state()) {
-        if (ram_save_init_globals() < 0) {
+        if (ram_state_init(rs) < 0) {
             return -1;
          }
     }
+    rs->f = f;
 
     rcu_read_lock();
 
@@ -2027,16 +2040,25 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
     return 0;
 }
 
+/**
+ * ram_save_iterate: iterative stage for migration
+ *
+ * Returns zero to indicate success and negative for error
+ *
+ * @f: QEMUFile where to send the data
+ * @opaque: RAMState pointer
+ */
 static int ram_save_iterate(QEMUFile *f, void *opaque)
 {
+    RAMState *rs = opaque;
     int ret;
     int i;
     int64_t t0;
     int done = 0;
 
     rcu_read_lock();
-    if (ram_list.version != last_version) {
-        reset_ram_globals();
+    if (ram_list.version != rs->last_version) {
+        ram_state_reset(rs);
     }
 
     /* Read version before ram_list.blocks */
@@ -2049,13 +2071,13 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
     while ((ret = qemu_file_rate_limit(f)) == 0) {
         int pages;
 
-        pages = ram_find_and_save_block(f, false, &bytes_transferred);
+        pages = ram_find_and_save_block(rs, false);
         /* no more pages to sent */
         if (pages == 0) {
             done = 1;
             break;
         }
-        acct_info.iterations++;
+        rs->iterations++;
 
         /* we want to check in the 1st loop, just in case it was the 1st time
            and we had to sync the dirty bitmap.
@@ -2071,7 +2093,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
         }
         i++;
     }
-    flush_compressed_data(f);
+    flush_compressed_data(rs);
     rcu_read_unlock();
 
     /*
@@ -2081,7 +2103,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
     ram_control_after_iterate(f, RAM_CONTROL_ROUND);
 
     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
-    bytes_transferred += 8;
+    rs->bytes_transferred += 8;
 
     ret = qemu_file_get_error(f);
     if (ret < 0) {
@@ -2091,13 +2113,24 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
     return done;
 }
 
-/* Called with iothread lock */
+/**
+ * ram_save_complete: function called to send the remaining amount of ram
+ *
+ * Returns zero to indicate success
+ *
+ * Called with iothread lock
+ *
+ * @f: QEMUFile where to send the data
+ * @opaque: RAMState pointer
+ */
 static int ram_save_complete(QEMUFile *f, void *opaque)
 {
+    RAMState *rs = opaque;
+
     rcu_read_lock();
 
-    if (!migration_in_postcopy(migrate_get_current())) {
-        migration_bitmap_sync();
+    if (!migration_in_postcopy()) {
+        migration_bitmap_sync(rs);
     }
 
     ram_control_before_iterate(f, RAM_CONTROL_FINISH);
@@ -2108,15 +2141,14 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
     while (true) {
         int pages;
 
-        pages = ram_find_and_save_block(f, !migration_in_colo_state(),
-                                        &bytes_transferred);
+        pages = ram_find_and_save_block(rs, !migration_in_colo_state());
         /* no more blocks to sent */
         if (pages == 0) {
             break;
         }
     }
 
-    flush_compressed_data(f);
+    flush_compressed_data(rs);
     ram_control_after_iterate(f, RAM_CONTROL_FINISH);
 
     rcu_read_unlock();
@@ -2130,18 +2162,19 @@ static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
                              uint64_t *non_postcopiable_pending,
                              uint64_t *postcopiable_pending)
 {
+    RAMState *rs = opaque;
     uint64_t remaining_size;
 
-    remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
+    remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
 
-    if (!migration_in_postcopy(migrate_get_current()) &&
+    if (!migration_in_postcopy() &&
         remaining_size < max_size) {
         qemu_mutex_lock_iothread();
         rcu_read_lock();
-        migration_bitmap_sync();
+        migration_bitmap_sync(rs);
         rcu_read_unlock();
         qemu_mutex_unlock_iothread();
-        remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
+        remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
     }
 
     /* We can do postcopy, and all the data is postcopiable */
@@ -2185,17 +2218,17 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
     return 0;
 }
 
-/* Must be called from within a rcu critical section.
+/**
+ * ram_block_from_stream: read a RAMBlock id from the migration stream
+ *
+ * Must be called from within a rcu critical section.
+ *
  * Returns a pointer from within the RCU-protected ram_list.
- */
-/*
- * Read a RAMBlock ID from the stream f.
  *
- * f: Stream to read from
- * flags: Page flags (mostly to see if it's a continuation of previous block)
+ * @f: QEMUFile where to read the data from
+ * @flags: Page flags (mostly to see if it's a continuation of previous block)
  */
-static inline RAMBlock *ram_block_from_stream(QEMUFile *f,
-                                              int flags)
+static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
 {
     static RAMBlock *block = NULL;
     char id[256];
@@ -2232,9 +2265,15 @@ static inline void *host_from_ram_block_offset(RAMBlock *block,
     return block->host + offset;
 }
 
-/*
+/**
+ * ram_handle_compressed: handle the zero page case
+ *
  * If a page (or a whole RDMA chunk) has been
  * determined to be zero, then zap it.
+ *
+ * @host: host address for the zero page
+ * @ch: what the page is filled from.  We only support zero
+ * @size: size of the zero page
  */
 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
 {
@@ -2373,20 +2412,33 @@ static void decompress_data_with_multi_threads(QEMUFile *f,
     qemu_mutex_unlock(&decomp_done_lock);
 }
 
-/*
- * Allocate data structures etc needed by incoming migration with postcopy-ram
- * postcopy-ram's similarly names postcopy_ram_incoming_init does the work
+/**
+ * ram_postcopy_incoming_init: allocate postcopy data structures
+ *
+ * Returns 0 for success and negative if there was one error
+ *
+ * @mis: current migration incoming state
+ *
+ * Allocate data structures etc needed by incoming migration with
+ * postcopy-ram. postcopy-ram's similarly names
+ * postcopy_ram_incoming_init does the work.
  */
 int ram_postcopy_incoming_init(MigrationIncomingState *mis)
 {
-    size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
+    unsigned long ram_pages = last_ram_page();
 
     return postcopy_ram_incoming_init(mis, ram_pages);
 }
 
-/*
+/**
+ * ram_load_postcopy: load a page in postcopy case
+ *
+ * Returns 0 for success or -errno in case of error
+ *
  * Called in postcopy mode by ram_load().
  * rcu_read_lock is taken prior to this being called.
+ *
+ * @f: QEMUFile where to send the data
  */
 static int ram_load_postcopy(QEMUFile *f)
 {
@@ -2673,5 +2725,5 @@ static SaveVMHandlers savevm_ram_handlers = {
 void ram_mig_init(void)
 {
     qemu_mutex_init(&XBZRLE.lock);
-    register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);
+    register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);
 }
diff --git a/migration/rdma.c b/migration/rdma.c
index 674ccab12e..fe0a4b5a83 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -809,7 +809,7 @@ static void qemu_rdma_dump_gid(const char *who, struct rdma_cm_id *id)
  *
  * Patches are being reviewed on linux-rdma.
  */
-static int qemu_rdma_broken_ipv6_kernel(Error **errp, struct ibv_context *verbs)
+static int qemu_rdma_broken_ipv6_kernel(struct ibv_context *verbs, Error **errp)
 {
     struct ibv_port_attr port_attr;
 
@@ -950,7 +950,7 @@ static int qemu_rdma_resolve_host(RDMAContext *rdma, Error **errp)
                 RDMA_RESOLVE_TIMEOUT_MS);
         if (!ret) {
             if (e->ai_family == AF_INET6) {
-                ret = qemu_rdma_broken_ipv6_kernel(errp, rdma->cm_id->verbs);
+                ret = qemu_rdma_broken_ipv6_kernel(rdma->cm_id->verbs, errp);
                 if (ret) {
                     continue;
                 }
@@ -2277,7 +2277,7 @@ static void qemu_rdma_cleanup(RDMAContext *rdma)
 }
 
 
-static int qemu_rdma_source_init(RDMAContext *rdma, Error **errp, bool pin_all)
+static int qemu_rdma_source_init(RDMAContext *rdma, bool pin_all, Error **errp)
 {
     int ret, idx;
     Error *local_err = NULL, **temp = &local_err;
@@ -2469,7 +2469,7 @@ static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp)
             continue;
         }
         if (e->ai_family == AF_INET6) {
-            ret = qemu_rdma_broken_ipv6_kernel(errp, listen_id->verbs);
+            ret = qemu_rdma_broken_ipv6_kernel(listen_id->verbs, errp);
             if (ret) {
                 continue;
             }
@@ -3676,8 +3676,8 @@ void rdma_start_outgoing_migration(void *opaque,
         goto err;
     }
 
-    ret = qemu_rdma_source_init(rdma, errp,
-        s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL]);
+    ret = qemu_rdma_source_init(rdma,
+        s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp);
 
     if (ret) {
         goto err;
diff --git a/migration/savevm.c b/migration/savevm.c
index 3b19a4a274..03ae1bdeb4 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -871,7 +871,7 @@ void qemu_savevm_send_postcopy_advise(QEMUFile *f)
 {
     uint64_t tmp[2];
     tmp[0] = cpu_to_be64(ram_pagesize_summary());
-    tmp[1] = cpu_to_be64(1ul << qemu_target_page_bits());
+    tmp[1] = cpu_to_be64(qemu_target_page_size());
 
     trace_qemu_savevm_send_postcopy_advise();
     qemu_savevm_command_send(f, MIG_CMD_POSTCOPY_ADVISE, 16, (uint8_t *)tmp);
@@ -1062,7 +1062,7 @@ int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
 static bool should_send_vmdesc(void)
 {
     MachineState *machine = MACHINE(qdev_get_machine());
-    bool in_postcopy = migration_in_postcopy(migrate_get_current());
+    bool in_postcopy = migration_in_postcopy();
     return !machine->suppress_vmdesc && !in_postcopy;
 }
 
@@ -1111,7 +1111,7 @@ void qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only)
     int vmdesc_len;
     SaveStateEntry *se;
     int ret;
-    bool in_postcopy = migration_in_postcopy(migrate_get_current());
+    bool in_postcopy = migration_in_postcopy();
 
     trace_savevm_state_complete_precopy();
 
@@ -1197,7 +1197,7 @@ void qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only)
  * the result is split into the amount for units that can and
  * for units that can't do postcopy.
  */
-void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size,
+void qemu_savevm_state_pending(QEMUFile *f, uint64_t threshold_size,
                                uint64_t *res_non_postcopiable,
                                uint64_t *res_postcopiable)
 {
@@ -1216,7 +1216,7 @@ void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size,
                 continue;
             }
         }
-        se->ops->save_live_pending(f, se->opaque, max_size,
+        se->ops->save_live_pending(f, se->opaque, threshold_size,
                                    res_non_postcopiable, res_postcopiable);
     }
 }
@@ -1390,13 +1390,13 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis)
     }
 
     remote_tps = qemu_get_be64(mis->from_src_file);
-    if (remote_tps != (1ul << qemu_target_page_bits())) {
+    if (remote_tps != qemu_target_page_size()) {
         /*
          * Again, some differences could be dealt with, but for now keep it
          * simple.
          */
-        error_report("Postcopy needs matching target page sizes (s=%d d=%d)",
-                     (int)remote_tps, 1 << qemu_target_page_bits());
+        error_report("Postcopy needs matching target page sizes (s=%d d=%zd)",
+                     (int)remote_tps, qemu_target_page_size());
         return -1;
     }
 
@@ -1479,8 +1479,7 @@ static int loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
         block_length = qemu_get_be64(mis->from_src_file);
 
         len -= 16;
-        int ret = ram_discard_range(mis, ramid, start_addr,
-                                    block_length);
+        int ret = ram_discard_range(ramid, start_addr, block_length);
         if (ret) {
             return ret;
         }
diff --git a/migration/trace-events b/migration/trace-events
index 7372ce2a51..b8f01a218c 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -63,8 +63,8 @@ put_qtailq_end(const char *name, const char *reason) "%s %s"
 qemu_file_fclose(void) ""
 
 # migration/ram.c
-get_queued_page(const char *block_name, uint64_t tmp_offset, uint64_t ram_addr) "%s/%" PRIx64 " ram_addr=%" PRIx64
-get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, uint64_t ram_addr, int sent) "%s/%" PRIx64 " ram_addr=%" PRIx64 " (sent=%d)"
+get_queued_page(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/%" PRIx64 " page_abs=%lx"
+get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, unsigned long page_abs, int sent) "%s/%" PRIx64 " page_abs=%lx (sent=%d)"
 migration_bitmap_sync_start(void) ""
 migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64
 migration_throttle(void) ""
diff --git a/nbd/client.c b/nbd/client.c
index 3dc2564cd0..a58fb02cb4 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -812,6 +812,6 @@ ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply)
         LOG("invalid magic (got 0x%" PRIx32 ")", magic);
         return -EINVAL;
     }
-    return 0;
+    return sizeof(buf);
 }
 
diff --git a/net/colo-compare.c b/net/colo-compare.c
index 54e6d40525..03ddebe5d3 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -233,24 +233,54 @@ static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt)
         spkt->ip->ip_sum = ppkt->ip->ip_sum;
     }
 
-    if (ptcp->th_sum == stcp->th_sum) {
+    /*
+     * Check tcp header length for tcp option field.
+     * th_off > 5 means this tcp packet have options field.
+     * The tcp options maybe always different.
+     * for example:
+     * From RFC 7323.
+     * TCP Timestamps option (TSopt):
+     * Kind: 8
+     *
+     * Length: 10 bytes
+     *
+     *    +-------+-------+---------------------+---------------------+
+     *    |Kind=8 |  10   |   TS Value (TSval)  |TS Echo Reply (TSecr)|
+     *    +-------+-------+---------------------+---------------------+
+     *       1       1              4                     4
+     *
+     * In this case the primary guest's timestamp always different with
+     * the secondary guest's timestamp. COLO just focus on payload,
+     * so we just need skip this field.
+     */
+    if (ptcp->th_off > 5) {
+        ptrdiff_t tcp_offset;
+        tcp_offset = ppkt->transport_header - (uint8_t *)ppkt->data
+                     + (ptcp->th_off * 4);
+        res = colo_packet_compare_common(ppkt, spkt, tcp_offset);
+    } else if (ptcp->th_sum == stcp->th_sum) {
         res = colo_packet_compare_common(ppkt, spkt, ETH_HLEN);
     } else {
         res = -1;
     }
 
-    if (res != 0 && trace_event_get_state(TRACE_COLO_COMPARE_MISCOMPARE)) {
-        trace_colo_compare_pkt_info_src(inet_ntoa(ppkt->ip->ip_src),
-                                        ntohl(stcp->th_seq),
-                                        ntohl(stcp->th_ack),
-                                        res, stcp->th_flags,
-                                        spkt->size);
+    if (res && trace_event_get_state(TRACE_COLO_COMPARE_MISCOMPARE)) {
+        char ip_src[20], ip_dst[20];
 
-        trace_colo_compare_pkt_info_dst(inet_ntoa(ppkt->ip->ip_dst),
-                                        ntohl(ptcp->th_seq),
-                                        ntohl(ptcp->th_ack),
-                                        res, ptcp->th_flags,
-                                        ppkt->size);
+        strcpy(ip_src, inet_ntoa(ppkt->ip->ip_src));
+        strcpy(ip_dst, inet_ntoa(ppkt->ip->ip_dst));
+
+        trace_colo_compare_tcp_info(ip_src,
+                                    ip_dst,
+                                    ntohl(ptcp->th_seq),
+                                    ntohl(stcp->th_seq),
+                                    ntohl(ptcp->th_ack),
+                                    ntohl(stcp->th_ack),
+                                    res,
+                                    ptcp->th_flags,
+                                    stcp->th_flags,
+                                    ppkt->size,
+                                    spkt->size);
 
         qemu_hexdump((char *)ppkt->data, stderr,
                      "colo-compare ppkt", ppkt->size);
@@ -372,10 +402,9 @@ static int colo_old_packet_check_one(Packet *pkt, int64_t *check_time)
     }
 }
 
-static void colo_old_packet_check_one_conn(void *opaque,
-                                           void *user_data)
+static int colo_old_packet_check_one_conn(Connection *conn,
+                                          void *user_data)
 {
-    Connection *conn = opaque;
     GList *result = NULL;
     int64_t check_time = REGULAR_PACKET_CHECK_MS;
 
@@ -386,7 +415,10 @@ static void colo_old_packet_check_one_conn(void *opaque,
     if (result) {
         /* do checkpoint will flush old packet */
         /* TODO: colo_notify_checkpoint();*/
+        return 0;
     }
+
+    return 1;
 }
 
 /*
@@ -398,7 +430,12 @@ static void colo_old_packet_check(void *opaque)
 {
     CompareState *s = opaque;
 
-    g_queue_foreach(&s->conn_list, colo_old_packet_check_one_conn, NULL);
+    /*
+     * If we find one old packet, stop finding job and notify
+     * COLO frame do checkpoint.
+     */
+    g_queue_find_custom(&s->conn_list, NULL,
+                        (GCompareFunc)colo_old_packet_check_one_conn);
 }
 
 /*
diff --git a/net/socket.c b/net/socket.c
index fe3547b018..b8c931e762 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -578,7 +578,7 @@ static int net_socket_connect_init(NetClientState *peer,
         goto err;
     }
 
-    fd = socket_connect(c->saddr, &local_error, net_socket_connected, c);
+    fd = socket_connect(c->saddr, net_socket_connected, c, &local_error);
     if (fd < 0) {
         goto err;
     }
diff --git a/net/trace-events b/net/trace-events
index 35198bc742..123cb28c63 100644
--- a/net/trace-events
+++ b/net/trace-events
@@ -13,8 +13,7 @@ colo_compare_icmp_miscompare(const char *sta, int size) ": %s = %d"
 colo_compare_ip_info(int psize, const char *sta, const char *stb, int ssize, const char *stc, const char *std) "ppkt size = %d, ip_src = %s, ip_dst = %s, spkt size = %d, ip_src = %s, ip_dst = %s"
 colo_old_packet_check_found(int64_t old_time) "%" PRId64
 colo_compare_miscompare(void) ""
-colo_compare_pkt_info_src(const char *src, uint32_t sseq, uint32_t sack, int res, uint32_t sflag, int ssize) "src/dst: %s s: seq/ack=%u/%u res=%d flags=%x spkt_size: %d\n"
-colo_compare_pkt_info_dst(const char *dst, uint32_t dseq, uint32_t dack, int res, uint32_t dflag, int dsize) "src/dst: %s d: seq/ack=%u/%u res=%d flags=%x dpkt_size: %d\n"
+colo_compare_tcp_info(const char *src, const char *dst, uint32_t pseq, uint32_t sseq, uint32_t pack, uint32_t sack, int res, uint32_t pflag, uint32_t sflag, int psize, int ssize) "src/dst: %s/%s pseq/sseq:%u/%u pack/sack:%u/%u res=%d pflags/sflag:%x/%x psize/ssize:%d/%d \n"
 
 # net/filter-rewriter.c
 colo_filter_rewriter_debug(void) ""
diff --git a/numa.c b/numa.c
index e01cb547a2..6fc2393ddd 100644
--- a/numa.c
+++ b/numa.c
@@ -338,12 +338,12 @@ void parse_numa_opts(MachineClass *mc)
         if (i == nb_numa_nodes) {
             uint64_t usedmem = 0;
 
-            /* On Linux, each node's border has to be 8MB aligned,
-             * the final node gets the rest.
+            /* Align each node according to the alignment
+             * requirements of the machine class
              */
             for (i = 0; i < nb_numa_nodes - 1; i++) {
                 numa_info[i].node_mem = (ram_size / nb_numa_nodes) &
-                                        ~((1 << 23UL) - 1);
+                                        ~((1 << mc->numa_mem_align_shift) - 1);
                 usedmem += numa_info[i].node_mem;
             }
             numa_info[i].node_mem = ram_size - usedmem;
diff --git a/pc-bios/openbios-ppc b/pc-bios/openbios-ppc
index cfe0aee7e5..ed9a51e738 100644
--- a/pc-bios/openbios-ppc
+++ b/pc-bios/openbios-ppc
diff --git a/pc-bios/openbios-sparc32 b/pc-bios/openbios-sparc32
index d6244b9ebd..1cf43dbe1d 100644
--- a/pc-bios/openbios-sparc32
+++ b/pc-bios/openbios-sparc32
diff --git a/pc-bios/openbios-sparc64 b/pc-bios/openbios-sparc64
index 74f67f2724..281d20d604 100644
--- a/pc-bios/openbios-sparc64
+++ b/pc-bios/openbios-sparc64
diff --git a/qapi-schema.json b/qapi-schema.json
index 68a43274bf..01b087fa16 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -598,6 +598,9 @@
 # @postcopy-requests: The number of page requests received from the destination
 #        (since 2.7)
 #
+# @page-size: The number of bytes per page for the various page-based
+#        statistics (since 2.10)
+#
 # Since: 0.14.0
 ##
 { 'struct': 'MigrationStats',
@@ -605,7 +608,7 @@
            'duplicate': 'int', 'skipped': 'int', 'normal': 'int',
            'normal-bytes': 'int', 'dirty-pages-rate' : 'int',
            'mbps' : 'number', 'dirty-sync-count' : 'int',
-           'postcopy-requests' : 'int' } }
+           'postcopy-requests' : 'int', 'page-size' : 'int' } }
 
 ##
 # @XBZRLECacheStats:
@@ -4051,19 +4054,27 @@
   'data': [ 'all', 'rx', 'tx' ] }
 
 ##
-# @InetSocketAddress:
-#
-# Captures a socket address or address range in the Internet namespace.
+# @InetSocketAddressBase:
 #
 # @host: host part of the address
+# @port: port part of the address
+##
+{ 'struct': 'InetSocketAddressBase',
+  'data': {
+    'host': 'str',
+    'port': 'str' } }
+
+##
+# @InetSocketAddress:
 #
-# @port: port part of the address, or lowest port if @to is present
+# Captures a socket address or address range in the Internet namespace.
 #
 # @numeric: true if the host/port are guaranteed to be numeric,
 #           false if name resolution should be attempted. Defaults to false.
 #           (Since 2.9)
 #
-# @to: highest port to try
+# @to: If present, this is range of possible addresses, with port
+#      between @port and @to.
 #
 # @ipv4: whether to accept IPv4 addresses, default try both IPv4 and IPv6
 #
@@ -4072,9 +4083,8 @@
 # Since: 1.3
 ##
 { 'struct': 'InetSocketAddress',
+  'base': 'InetSocketAddressBase',
   'data': {
-    'host': 'str',
-    'port': 'str',
     '*numeric':  'bool',
     '*to': 'uint16',
     '*ipv4': 'bool',
@@ -4137,7 +4147,7 @@
 # Since: 2.9
 ##
 { 'enum': 'SocketAddressFlatType',
-  'data': [ 'unix', 'inet' ] }
+  'data': [ 'inet', 'unix', 'vsock', 'fd' ] }
 
 ##
 # @SocketAddressFlat:
@@ -4146,22 +4156,19 @@
 #
 # @type:       Transport type
 #
-# This is similar to SocketAddress, only distinction:
-#
-# 1. SocketAddressFlat is a flat union, SocketAddress is a simple union.
-#    A flat union is nicer than simple because it avoids nesting
-#    (i.e. more {}) on the wire.
-#
-# 2. SocketAddressFlat supports only types 'unix' and 'inet', because
-#    that's what its current users need.
+# This is just like SocketAddress, except it's a flat union rather
+# than a simple union.  Nicer because it avoids nesting on the wire,
+# i.e. this form has fewer {}.
 #
 # Since: 2.9
 ##
 { 'union': 'SocketAddressFlat',
   'base': { 'type': 'SocketAddressFlatType' },
   'discriminator': 'type',
-  'data': { 'unix': 'UnixSocketAddress',
-            'inet': 'InetSocketAddress' } }
+  'data': { 'inet': 'InetSocketAddress',
+            'unix': 'UnixSocketAddress',
+            'vsock': 'VsockSocketAddress',
+            'fd': 'String' } }
 
 ##
 # @getfd:
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 0f132fc995..87fb747ab6 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2053,7 +2053,7 @@
 # @ignore:      Ignore the request
 # @unmap:       Forward as an unmap request
 #
-# Since: 1.7
+# Since: 2.9
 ##
 { 'enum': 'BlockdevDiscardOptions',
   'data': [ 'ignore', 'unmap' ] }
@@ -2082,7 +2082,7 @@
 # @threads:     Use qemu's thread pool
 # @native:      Use native AIO backend (only Linux and Windows)
 #
-# Since: 1.7
+# Since: 2.9
 ##
 { 'enum': 'BlockdevAioOptions',
   'data': [ 'threads', 'native' ] }
@@ -2097,7 +2097,7 @@
 # @no-flush:    ignore any flush requests for the device (default:
 #               false)
 #
-# Since: 1.7
+# Since: 2.9
 ##
 { 'struct': 'BlockdevCacheOptions',
   'data': { '*direct': 'bool',
@@ -2108,18 +2108,9 @@
 #
 # Drivers that are supported in block device operations.
 #
-# @host_device: Since 2.1
-# @host_cdrom: Since 2.1
-# @gluster: Since 2.7
-# @nbd: Since 2.8
-# @nfs: Since 2.8
-# @replication: Since 2.8
-# @ssh: Since 2.8
-# @iscsi: Since 2.9
-# @rbd: Since 2.9
-# @sheepdog: Since 2.9
+# @vxhs: Since 2.10
 #
-# Since: 2.0
+# Since: 2.9
 ##
 { 'enum': 'BlockdevDriver',
   'data': [ 'blkdebug', 'blkverify', 'bochs', 'cloop',
@@ -2127,7 +2118,7 @@
             'host_device', 'http', 'https', 'iscsi', 'luks', 'nbd', 'nfs',
             'null-aio', 'null-co', 'parallels', 'qcow', 'qcow2', 'qed',
             'quorum', 'raw', 'rbd', 'replication', 'sheepdog', 'ssh',
-            'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
+            'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat', 'vxhs' ] }
 
 ##
 # @BlockdevOptionsFile:
@@ -2137,7 +2128,7 @@
 # @filename:    path to the image file
 # @aio:         AIO backend (default: threads) (since: 2.8)
 #
-# Since: 1.7
+# Since: 2.9
 ##
 { 'struct': 'BlockdevOptionsFile',
   'data': { 'filename': 'str',
@@ -2153,7 +2144,7 @@
 #              requests. Default to zero which completes requests immediately.
 #              (Since 2.4)
 #
-# Since: 2.2
+# Since: 2.9
 ##
 { 'struct': 'BlockdevOptionsNull',
   'data': { '*size': 'int', '*latency-ns': 'uint64' } }
@@ -2173,7 +2164,7 @@
 #               (since 2.4)
 # @rw:          whether to allow write operations (default: false)
 #
-# Since: 1.7
+# Since: 2.9
 ##
 { 'struct': 'BlockdevOptionsVVFAT',
   'data': { 'dir': 'str', '*fat-type': 'int', '*floppy': 'bool',
@@ -2187,7 +2178,7 @@
 #
 # @file:        reference to or definition of the data source block device
 #
-# Since: 1.7
+# Since: 2.9
 ##
 { 'struct': 'BlockdevOptionsGenericFormat',
   'data': { 'file': 'BlockdevRef' } }
@@ -2201,7 +2192,7 @@
 #              the decryption key (since 2.6). Mandatory except when
 #              doing a metadata-only probe of the image.
 #
-# Since: 2.6
+# Since: 2.9
 ##
 { 'struct': 'BlockdevOptionsLUKS',
   'base': 'BlockdevOptionsGenericFormat',
@@ -2219,7 +2210,7 @@
 #               allowed to pass an empty string here in order to disable the
 #               default backing file.
 #
-# Since: 1.7
+# Since: 2.9
 ##
 { 'struct': 'BlockdevOptionsGenericCOWFormat',
   'base': 'BlockdevOptionsGenericFormat',
@@ -2240,7 +2231,7 @@
 #
 # @all:         Perform all available overlap checks
 #
-# Since: 2.2
+# Since: 2.9
 ##
 { 'enum': 'Qcow2OverlapCheckMode',
   'data': [ 'none', 'constant', 'cached', 'all' ] }
@@ -2255,7 +2246,7 @@
 # @template: Specifies a template mode which can be adjusted using the other
 #            flags, defaults to 'cached'
 #
-# Since: 2.2
+# Since: 2.9
 ##
 { 'struct': 'Qcow2OverlapCheckFlags',
   'data': { '*template':       'Qcow2OverlapCheckMode',
@@ -2279,7 +2270,7 @@
 #
 # @mode:    named mode which chooses a specific set of flags
 #
-# Since: 2.2
+# Since: 2.9
 ##
 { 'alternate': 'Qcow2OverlapChecks',
   'data': { 'flags': 'Qcow2OverlapCheckFlags',
@@ -2320,7 +2311,7 @@
 #                         caches. The interval is in seconds. The default value
 #                         is 0 and it disables this feature (since 2.5)
 #
-# Since: 1.7
+# Since: 2.9
 ##
 { 'struct': 'BlockdevOptionsQcow2',
   'base': 'BlockdevOptionsGenericCOWFormat',
@@ -2347,7 +2338,7 @@
 #
 # TODO: Expose the host_key_check option in QMP
 #
-# Since: 2.8
+# Since: 2.9
 ##
 { 'struct': 'BlockdevOptionsSsh',
   'data': { 'server': 'InetSocketAddress',
@@ -2360,7 +2351,7 @@
 #
 # Trigger events supported by blkdebug.
 #
-# Since: 2.0
+# Since: 2.9
 ##
 { 'enum': 'BlkdebugEvent', 'prefix': 'BLKDBG',
   'data': [ 'l1_update', 'l1_grow_alloc_table', 'l1_grow_write_table',
@@ -2400,7 +2391,7 @@
 #
 # @immediately: fail immediately; defaults to false
 #
-# Since: 2.0
+# Since: 2.9
 ##
 { 'struct': 'BlkdebugInjectErrorOptions',
   'data': { 'event': 'BlkdebugEvent',
@@ -2423,7 +2414,7 @@
 # @new_state:   the state identifier blkdebug is supposed to assume if
 #               this event is triggered
 #
-# Since: 2.0
+# Since: 2.9
 ##
 { 'struct': 'BlkdebugSetStateOptions',
   'data': { 'event': 'BlkdebugEvent',
@@ -2446,7 +2437,7 @@
 #
 # @set-state:       array of state-change descriptions
 #
-# Since: 2.0
+# Since: 2.9
 ##
 { 'struct': 'BlockdevOptionsBlkdebug',
   'data': { 'image': 'BlockdevRef',
@@ -2464,7 +2455,7 @@
 #
 # @raw:     raw image used for verification
 #
-# Since: 2.0
+# Since: 2.9
 ##
 { 'struct': 'BlockdevOptionsBlkverify',
   'data': { 'test': 'BlockdevRef',
@@ -2479,7 +2470,7 @@
 #
 # @fifo: read only from the first child that has not failed
 #
-# Since: 2.2
+# Since: 2.9
 ##
 { 'enum': 'QuorumReadPattern', 'data': [ 'quorum', 'fifo' ] }
 
@@ -2501,7 +2492,7 @@
 # @read-pattern: choose read pattern and set to quorum by default
 #                (Since 2.2)
 #
-# Since: 2.0
+# Since: 2.9
 ##
 { 'struct': 'BlockdevOptionsQuorum',
   'data': { '*blkverify': 'bool',
@@ -2526,7 +2517,7 @@
 #
 # @logfile:     libgfapi log file (default /dev/stderr) (Since 2.8)
 #
-# Since: 2.7
+# Since: 2.9
 ##
 { 'struct': 'BlockdevOptionsGluster',
   'data': { 'volume': 'str',
@@ -2601,27 +2592,6 @@
 
 
 ##
-# @RbdAuthSupport:
-#
-# An enumeration of RBD auth support
-#
-# Since: 2.9
-##
-{ 'enum': 'RbdAuthSupport',
-  'data': [ 'cephx', 'none' ] }
-
-
-##
-# @RbdAuthMethod:
-#
-# An enumeration of rados auth_supported types
-#
-# Since: 2.9
-##
-{ 'struct': 'RbdAuthMethod',
-  'data': { 'auth': 'RbdAuthSupport' } }
-
-##
 # @BlockdevOptionsRbd:
 #
 # @pool:               Ceph pool name.
@@ -2639,11 +2609,6 @@
 # @server:             Monitor host address and port.  This maps
 #                      to the "mon_host" Ceph option.
 #
-# @auth-supported:     Authentication supported.
-#
-# @password-secret:    The ID of a QCryptoSecret object providing
-#                      the password for the login.
-#
 # Since: 2.9
 ##
 { 'struct': 'BlockdevOptionsRbd',
@@ -2652,9 +2617,7 @@
             '*conf': 'str',
             '*snapshot': 'str',
             '*user': 'str',
-            '*server': ['InetSocketAddress'],
-            '*auth-supported': ['RbdAuthMethod'],
-            '*password-secret': 'str' } }
+            '*server': ['InetSocketAddressBase'] } }
 
 ##
 # @BlockdevOptionsSheepdog:
@@ -2662,7 +2625,7 @@
 # Driver specific block device options for sheepdog
 #
 # @vdi:         Virtual disk image name
-# @addr:        The Sheepdog server to connect to
+# @server:      The Sheepdog server to connect to
 # @snap-id:     Snapshot ID
 # @tag:         Snapshot tag name
 #
@@ -2671,7 +2634,7 @@
 # Since: 2.9
 ##
 { 'struct': 'BlockdevOptionsSheepdog',
-  'data': { 'addr': 'SocketAddressFlat',
+  'data': { 'server': 'SocketAddressFlat',
             'vdi': 'str',
             '*snap-id': 'uint32',
             '*tag': 'str' } }
@@ -2685,7 +2648,7 @@
 #
 # @secondary: Secondary mode, receive the vm's state from primary QEMU.
 #
-# Since: 2.8
+# Since: 2.9
 ##
 { 'enum' : 'ReplicationMode', 'data' : [ 'primary', 'secondary' ] }
 
@@ -2700,7 +2663,7 @@
 #          node who owns the replication node chain. Must not be given in
 #          primary mode.
 #
-# Since: 2.8
+# Since: 2.9
 ##
 { 'struct': 'BlockdevOptionsReplication',
   'base': 'BlockdevOptionsGenericFormat',
@@ -2714,7 +2677,7 @@
 #
 # @inet:        TCP transport
 #
-# Since: 2.8
+# Since: 2.9
 ##
 { 'enum': 'NFSTransport',
   'data': [ 'inet' ] }
@@ -2728,7 +2691,7 @@
 #
 # @host:        host address for NFS server
 #
-# Since: 2.8
+# Since: 2.9
 ##
 { 'struct': 'NFSServer',
   'data': { 'type': 'NFSTransport',
@@ -2763,7 +2726,7 @@
 # @debug:                   set the NFS debug level (max 2) (defaults
 #                           to libnfs default)
 #
-# Since: 2.8
+# Since: 2.9
 ##
 { 'struct': 'BlockdevOptionsNfs',
   'data': { 'server': 'NFSServer',
@@ -2776,16 +2739,101 @@
             '*debug': 'int' } }
 
 ##
-# @BlockdevOptionsCurl:
+# @BlockdevOptionsCurlBase:
 #
-# Driver specific block device options for the curl backend.
+# Driver specific block device options shared by all protocols supported by the
+# curl backend.
 #
-# @filename:    path to the image file
+# @url:                     URL of the image file
 #
-# Since: 1.7
+# @readahead:               Size of the read-ahead cache; must be a multiple of
+#                           512 (defaults to 256 kB)
+#
+# @timeout:                 Timeout for connections, in seconds (defaults to 5)
+#
+# @username:                Username for authentication (defaults to none)
+#
+# @password-secret:         ID of a QCryptoSecret object providing a password
+#                           for authentication (defaults to no password)
+#
+# @proxy-username:          Username for proxy authentication (defaults to none)
+#
+# @proxy-password-secret:   ID of a QCryptoSecret object providing a password
+#                           for proxy authentication (defaults to no password)
+#
+# Since: 2.9
 ##
-{ 'struct': 'BlockdevOptionsCurl',
-  'data': { 'filename': 'str' } }
+{ 'struct': 'BlockdevOptionsCurlBase',
+  'data': { 'url': 'str',
+            '*readahead': 'int',
+            '*timeout': 'int',
+            '*username': 'str',
+            '*password-secret': 'str',
+            '*proxy-username': 'str',
+            '*proxy-password-secret': 'str' } }
+
+##
+# @BlockdevOptionsCurlHttp:
+#
+# Driver specific block device options for HTTP connections over the curl
+# backend.  URLs must start with "http://".
+#
+# @cookie:      List of cookies to set; format is
+#               "name1=content1; name2=content2;" as explained by
+#               CURLOPT_COOKIE(3). Defaults to no cookies.
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsCurlHttp',
+  'base': 'BlockdevOptionsCurlBase',
+  'data': { '*cookie': 'str' } }
+
+##
+# @BlockdevOptionsCurlHttps:
+#
+# Driver specific block device options for HTTPS connections over the curl
+# backend.  URLs must start with "https://".
+#
+# @cookie:      List of cookies to set; format is
+#               "name1=content1; name2=content2;" as explained by
+#               CURLOPT_COOKIE(3). Defaults to no cookies.
+#
+# @sslverify:   Whether to verify the SSL certificate's validity (defaults to
+#               true)
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsCurlHttps',
+  'base': 'BlockdevOptionsCurlBase',
+  'data': { '*cookie': 'str',
+            '*sslverify': 'bool' } }
+
+##
+# @BlockdevOptionsCurlFtp:
+#
+# Driver specific block device options for FTP connections over the curl
+# backend.  URLs must start with "ftp://".
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsCurlFtp',
+  'base': 'BlockdevOptionsCurlBase',
+  'data': { } }
+
+##
+# @BlockdevOptionsCurlFtps:
+#
+# Driver specific block device options for FTPS connections over the curl
+# backend.  URLs must start with "ftps://".
+#
+# @sslverify:   Whether to verify the SSL certificate's validity (defaults to
+#               true)
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsCurlFtps',
+  'base': 'BlockdevOptionsCurlBase',
+  'data': { '*sslverify': 'bool' } }
 
 ##
 # @BlockdevOptionsNbd:
@@ -2798,10 +2846,10 @@
 #
 # @tls-creds:   TLS credentials ID
 #
-# Since: 2.8
+# Since: 2.9
 ##
 { 'struct': 'BlockdevOptionsNbd',
-  'data': { 'server': 'SocketAddress',
+  'data': { 'server': 'SocketAddressFlat',
             '*export': 'str',
             '*tls-creds': 'str' } }
 
@@ -2813,13 +2861,29 @@
 # @offset:      position where the block device starts
 # @size:        the assumed size of the device
 #
-# Since: 2.8
+# Since: 2.9
 ##
 { 'struct': 'BlockdevOptionsRaw',
   'base': 'BlockdevOptionsGenericFormat',
   'data': { '*offset': 'int', '*size': 'int' } }
 
 ##
+# @BlockdevOptionsVxHS:
+#
+# Driver specific block device options for VxHS
+#
+# @vdisk-id:    UUID of VxHS volume
+# @server:      vxhs server IP, port
+# @tls-creds:   TLS credentials ID
+#
+# Since: 2.10
+##
+{ 'struct': 'BlockdevOptionsVxHS',
+  'data': { 'vdisk-id': 'str',
+            'server': 'InetSocketAddressBase',
+            '*tls-creds': 'str' } }
+
+##
 # @BlockdevOptions:
 #
 # Options for creating a block device.  Many options are available for all
@@ -2837,7 +2901,7 @@
 #
 # Remaining options are determined by the block driver.
 #
-# Since: 1.7
+# Since: 2.9
 ##
 { 'union': 'BlockdevOptions',
   'base': { 'driver': 'BlockdevDriver',
@@ -2854,13 +2918,13 @@
       'cloop':      'BlockdevOptionsGenericFormat',
       'dmg':        'BlockdevOptionsGenericFormat',
       'file':       'BlockdevOptionsFile',
-      'ftp':        'BlockdevOptionsCurl',
-      'ftps':       'BlockdevOptionsCurl',
+      'ftp':        'BlockdevOptionsCurlFtp',
+      'ftps':       'BlockdevOptionsCurlFtps',
       'gluster':    'BlockdevOptionsGluster',
       'host_cdrom': 'BlockdevOptionsFile',
       'host_device':'BlockdevOptionsFile',
-      'http':       'BlockdevOptionsCurl',
-      'https':      'BlockdevOptionsCurl',
+      'http':       'BlockdevOptionsCurlHttp',
+      'https':      'BlockdevOptionsCurlHttps',
       'iscsi':      'BlockdevOptionsIscsi',
       'luks':       'BlockdevOptionsLUKS',
       'nbd':        'BlockdevOptionsNbd',
@@ -2881,7 +2945,8 @@
       'vhdx':       'BlockdevOptionsGenericFormat',
       'vmdk':       'BlockdevOptionsGenericCOWFormat',
       'vpc':        'BlockdevOptionsGenericFormat',
-      'vvfat':      'BlockdevOptionsVVFAT'
+      'vvfat':      'BlockdevOptionsVVFAT',
+      'vxhs':       'BlockdevOptionsVxHS'
   } }
 
 ##
@@ -2894,7 +2959,7 @@
 #                   empty string means that no block device should be
 #                   referenced.
 #
-# Since: 1.7
+# Since: 2.9
 ##
 { 'alternate': 'BlockdevRef',
   'data': { 'definition': 'BlockdevOptions',
@@ -2907,11 +2972,7 @@
 # BlockBackend will be created; otherwise, @node-name is mandatory at the top
 # level and no BlockBackend will be created.
 #
-# Note: This command is still a work in progress.  It doesn't support all
-# block drivers among other things.  Stay away from it unless you want
-# to help with its development.
-#
-# Since: 1.7
+# Since: 2.9
 #
 # Example:
 #
@@ -2957,7 +3018,7 @@
 { 'command': 'blockdev-add', 'data': 'BlockdevOptions', 'boxed': true }
 
 ##
-# @x-blockdev-del:
+# @blockdev-del:
 #
 # Deletes a block device that has been added using blockdev-add.
 # The command will fail if the node is attached to a device or is
@@ -2965,11 +3026,7 @@
 #
 # @node-name: Name of the graph node to delete.
 #
-# Note: This command is still a work in progress and is considered
-# experimental. Stay away from it unless you want to help with its
-# development.
-#
-# Since: 2.5
+# Since: 2.9
 #
 # Example:
 #
@@ -2985,13 +3042,13 @@
 #    }
 # <- { "return": {} }
 #
-# -> { "execute": "x-blockdev-del",
+# -> { "execute": "blockdev-del",
 #      "arguments": { "node-name": "node0" }
 #    }
 # <- { "return": {} }
 #
 ##
-{ 'command': 'x-blockdev-del', 'data': { 'node-name': 'str' } }
+{ 'command': 'blockdev-del', 'data': { 'node-name': 'str' } }
 
 ##
 # @blockdev-open-tray:
diff --git a/qapi/opts-visitor.c b/qapi/opts-visitor.c
index 026d25b767..324b197495 100644
--- a/qapi/opts-visitor.c
+++ b/qapi/opts-visitor.c
@@ -164,7 +164,7 @@ opts_check_struct(Visitor *v, Error **errp)
     GHashTableIter iter;
     GQueue *any;
 
-    if (ov->depth > 0) {
+    if (ov->depth > 1) {
         return;
     }
 
@@ -276,8 +276,8 @@ static void
 opts_check_list(Visitor *v, Error **errp)
 {
     /*
-     * FIXME should set error when unvisited elements remain.  Mostly
-     * harmless, as the generated visits always visit all elements.
+     * Unvisited list elements will be reported later when checking
+     * whether unvisited struct members remain.
      */
 }
 
diff --git a/qapi/string-input-visitor.c b/qapi/string-input-visitor.c
index 806b01ae3a..c089491c24 100644
--- a/qapi/string-input-visitor.c
+++ b/qapi/string-input-visitor.c
@@ -54,6 +54,10 @@ static int parse_str(StringInputVisitor *siv, const char *name, Error **errp)
         return 0;
     }
 
+    if (!*str) {
+        return 0;
+    }
+
     do {
         errno = 0;
         start = strtoll(str, &endptr, 0);
diff --git a/qdev-monitor.c b/qdev-monitor.c
index 5f2fcdfc45..e61d596ef7 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -29,6 +29,7 @@
 #include "qemu/error-report.h"
 #include "qemu/help_option.h"
 #include "sysemu/block-backend.h"
+#include "migration/migration.h"
 
 /*
  * Aliases were a bad idea from the start.  Let's keep them
@@ -603,6 +604,11 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
         return NULL;
     }
 
+    if (!migration_is_idle()) {
+        error_setg(errp, "device_add not allowed while migrating");
+        return NULL;
+    }
+
     /* create device */
     dev = DEVICE(object_new(driver));
 
@@ -836,6 +842,45 @@ static DeviceState *find_device_state(const char *id, Error **errp)
     return DEVICE(obj);
 }
 
+void qdev_unplug(DeviceState *dev, Error **errp)
+{
+    DeviceClass *dc = DEVICE_GET_CLASS(dev);
+    HotplugHandler *hotplug_ctrl;
+    HotplugHandlerClass *hdc;
+
+    if (dev->parent_bus && !qbus_is_hotpluggable(dev->parent_bus)) {
+        error_setg(errp, QERR_BUS_NO_HOTPLUG, dev->parent_bus->name);
+        return;
+    }
+
+    if (!dc->hotpluggable) {
+        error_setg(errp, QERR_DEVICE_NO_HOTPLUG,
+                   object_get_typename(OBJECT(dev)));
+        return;
+    }
+
+    if (!migration_is_idle()) {
+        error_setg(errp, "device_del not allowed while migrating");
+        return;
+    }
+
+    qdev_hot_removed = true;
+
+    hotplug_ctrl = qdev_get_hotplug_handler(dev);
+    /* hotpluggable device MUST have HotplugHandler, if it doesn't
+     * then something is very wrong with it */
+    g_assert(hotplug_ctrl);
+
+    /* If device supports async unplug just request it to be done,
+     * otherwise just remove it synchronously */
+    hdc = HOTPLUG_HANDLER_GET_CLASS(hotplug_ctrl);
+    if (hdc->unplug_request) {
+        hotplug_handler_unplug_request(hotplug_ctrl, dev, errp);
+    } else {
+        hotplug_handler_unplug(hotplug_ctrl, dev, errp);
+    }
+}
+
 void qmp_device_del(const char *id, Error **errp)
 {
     DeviceState *dev = find_device_state(id, errp);
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index 9c9702cc62..8ac78222af 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -22,9 +22,9 @@ STEXI
 ETEXI
 
 DEF("create", img_create,
-    "create [-q] [--object objectdef] [--image-opts] [-f fmt] [-o options] filename [size]")
+    "create [-q] [--object objectdef] [-f fmt] [-o options] filename [size]")
 STEXI
-@item create [--object @var{objectdef}] [--image-opts] [-q] [-f @var{fmt}] [-o @var{options}] @var{filename} [@var{size}]
+@item create [--object @var{objectdef}] [-q] [-f @var{fmt}] [-o @var{options}] @var{filename} [@var{size}]
 ETEXI
 
 DEF("commit", img_commit,
diff --git a/qemu-img.c b/qemu-img.c
index 98b836b030..bbe15741f1 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -88,6 +88,16 @@ static void QEMU_NORETURN GCC_FMT_ATTR(1, 2) error_exit(const char *fmt, ...)
     exit(EXIT_FAILURE);
 }
 
+static void QEMU_NORETURN missing_argument(const char *option)
+{
+    error_exit("missing argument for option '%s'", option);
+}
+
+static void QEMU_NORETURN unrecognized_option(const char *option)
+{
+    error_exit("unrecognized option '%s'", option);
+}
+
 /* Please keep in synch with qemu-img.texi */
 static void QEMU_NORETURN help(void)
 {
@@ -406,13 +416,18 @@ static int img_create(int argc, char **argv)
             {"object", required_argument, 0, OPTION_OBJECT},
             {0, 0, 0, 0}
         };
-        c = getopt_long(argc, argv, "F:b:f:he6o:q",
+        c = getopt_long(argc, argv, ":F:b:f:he6o:q",
                         long_options, NULL);
         if (c == -1) {
             break;
         }
         switch(c) {
+        case ':':
+            missing_argument(argv[optind - 1]);
+            break;
         case '?':
+            unrecognized_option(argv[optind - 1]);
+            break;
         case 'h':
             help();
             break;
@@ -501,7 +516,7 @@ static int img_create(int argc, char **argv)
     }
 
     bdrv_img_create(filename, fmt, base_filename, base_fmt,
-                    options, img_size, 0, &local_err, quiet);
+                    options, img_size, 0, quiet, &local_err);
     if (local_err) {
         error_reportf_err(local_err, "%s: ", filename);
         goto fail;
@@ -651,13 +666,18 @@ static int img_check(int argc, char **argv)
             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
             {0, 0, 0, 0}
         };
-        c = getopt_long(argc, argv, "hf:r:T:q",
+        c = getopt_long(argc, argv, ":hf:r:T:q",
                         long_options, &option_index);
         if (c == -1) {
             break;
         }
         switch(c) {
+        case ':':
+            missing_argument(argv[optind - 1]);
+            break;
         case '?':
+            unrecognized_option(argv[optind - 1]);
+            break;
         case 'h':
             help();
             break;
@@ -855,13 +875,18 @@ static int img_commit(int argc, char **argv)
             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
             {0, 0, 0, 0}
         };
-        c = getopt_long(argc, argv, "f:ht:b:dpq",
+        c = getopt_long(argc, argv, ":f:ht:b:dpq",
                         long_options, NULL);
         if (c == -1) {
             break;
         }
         switch(c) {
+        case ':':
+            missing_argument(argv[optind - 1]);
+            break;
         case '?':
+            unrecognized_option(argv[optind - 1]);
+            break;
         case 'h':
             help();
             break;
@@ -959,7 +984,7 @@ static int img_commit(int argc, char **argv)
     aio_context_acquire(aio_context);
     commit_active_start("commit", bs, base_bs, BLOCK_JOB_DEFAULT, 0,
                         BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb,
-                        &cbi, &local_err, false);
+                        &cbi, false, &local_err);
     aio_context_release(aio_context);
     if (local_err) {
         goto done;
@@ -1190,13 +1215,18 @@ static int img_compare(int argc, char **argv)
             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
             {0, 0, 0, 0}
         };
-        c = getopt_long(argc, argv, "hf:F:T:pqs",
+        c = getopt_long(argc, argv, ":hf:F:T:pqs",
                         long_options, NULL);
         if (c == -1) {
             break;
         }
         switch (c) {
+        case ':':
+            missing_argument(argv[optind - 1]);
+            break;
         case '?':
+            unrecognized_option(argv[optind - 1]);
+            break;
         case 'h':
             help();
             break;
@@ -1926,13 +1956,18 @@ static int img_convert(int argc, char **argv)
             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
             {0, 0, 0, 0}
         };
-        c = getopt_long(argc, argv, "hf:O:B:ce6o:s:l:S:pt:T:qnm:W",
+        c = getopt_long(argc, argv, ":hf:O:B:ce6o:s:l:S:pt:T:qnm:W",
                         long_options, NULL);
         if (c == -1) {
             break;
         }
         switch(c) {
+        case ':':
+            missing_argument(argv[optind - 1]);
+            break;
         case '?':
+            unrecognized_option(argv[optind - 1]);
+            break;
         case 'h':
             help();
             break;
@@ -2502,13 +2537,18 @@ static int img_info(int argc, char **argv)
             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
             {0, 0, 0, 0}
         };
-        c = getopt_long(argc, argv, "f:h",
+        c = getopt_long(argc, argv, ":f:h",
                         long_options, &option_index);
         if (c == -1) {
             break;
         }
         switch(c) {
+        case ':':
+            missing_argument(argv[optind - 1]);
+            break;
         case '?':
+            unrecognized_option(argv[optind - 1]);
+            break;
         case 'h':
             help();
             break;
@@ -2713,13 +2753,18 @@ static int img_map(int argc, char **argv)
             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
             {0, 0, 0, 0}
         };
-        c = getopt_long(argc, argv, "f:h",
+        c = getopt_long(argc, argv, ":f:h",
                         long_options, &option_index);
         if (c == -1) {
             break;
         }
         switch (c) {
+        case ':':
+            missing_argument(argv[optind - 1]);
+            break;
         case '?':
+            unrecognized_option(argv[optind - 1]);
+            break;
         case 'h':
             help();
             break;
@@ -2835,13 +2880,18 @@ static int img_snapshot(int argc, char **argv)
             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
             {0, 0, 0, 0}
         };
-        c = getopt_long(argc, argv, "la:c:d:hq",
+        c = getopt_long(argc, argv, ":la:c:d:hq",
                         long_options, NULL);
         if (c == -1) {
             break;
         }
         switch(c) {
+        case ':':
+            missing_argument(argv[optind - 1]);
+            break;
         case '?':
+            unrecognized_option(argv[optind - 1]);
+            break;
         case 'h':
             help();
             return 0;
@@ -2988,13 +3038,18 @@ static int img_rebase(int argc, char **argv)
             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
             {0, 0, 0, 0}
         };
-        c = getopt_long(argc, argv, "hf:F:b:upt:T:q",
+        c = getopt_long(argc, argv, ":hf:F:b:upt:T:q",
                         long_options, NULL);
         if (c == -1) {
             break;
         }
         switch(c) {
+        case ':':
+            missing_argument(argv[optind - 1]);
+            break;
         case '?':
+            unrecognized_option(argv[optind - 1]);
+            break;
         case 'h':
             help();
             return 0;
@@ -3355,13 +3410,18 @@ static int img_resize(int argc, char **argv)
             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
             {0, 0, 0, 0}
         };
-        c = getopt_long(argc, argv, "f:hq",
+        c = getopt_long(argc, argv, ":f:hq",
                         long_options, NULL);
         if (c == -1) {
             break;
         }
         switch(c) {
+        case ':':
+            missing_argument(argv[optind - 1]);
+            break;
         case '?':
+            unrecognized_option(argv[optind - 1]);
+            break;
         case 'h':
             help();
             break;
@@ -3493,54 +3553,59 @@ static int img_amend(int argc, char **argv)
             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
             {0, 0, 0, 0}
         };
-        c = getopt_long(argc, argv, "ho:f:t:pq",
+        c = getopt_long(argc, argv, ":ho:f:t:pq",
                         long_options, NULL);
         if (c == -1) {
             break;
         }
 
         switch (c) {
-            case 'h':
-            case '?':
-                help();
-                break;
-            case 'o':
-                if (!is_valid_option_list(optarg)) {
-                    error_report("Invalid option list: %s", optarg);
-                    ret = -1;
-                    goto out_no_progress;
-                }
-                if (!options) {
-                    options = g_strdup(optarg);
-                } else {
-                    char *old_options = options;
-                    options = g_strdup_printf("%s,%s", options, optarg);
-                    g_free(old_options);
-                }
-                break;
-            case 'f':
-                fmt = optarg;
-                break;
-            case 't':
-                cache = optarg;
-                break;
-            case 'p':
-                progress = true;
-                break;
-            case 'q':
-                quiet = true;
-                break;
-            case OPTION_OBJECT:
-                opts = qemu_opts_parse_noisily(&qemu_object_opts,
-                                               optarg, true);
-                if (!opts) {
-                    ret = -1;
-                    goto out_no_progress;
-                }
-                break;
-            case OPTION_IMAGE_OPTS:
-                image_opts = true;
-                break;
+        case ':':
+            missing_argument(argv[optind - 1]);
+            break;
+        case '?':
+            unrecognized_option(argv[optind - 1]);
+            break;
+        case 'h':
+            help();
+            break;
+        case 'o':
+            if (!is_valid_option_list(optarg)) {
+                error_report("Invalid option list: %s", optarg);
+                ret = -1;
+                goto out_no_progress;
+            }
+            if (!options) {
+                options = g_strdup(optarg);
+            } else {
+                char *old_options = options;
+                options = g_strdup_printf("%s,%s", options, optarg);
+                g_free(old_options);
+            }
+            break;
+        case 'f':
+            fmt = optarg;
+            break;
+        case 't':
+            cache = optarg;
+            break;
+        case 'p':
+            progress = true;
+            break;
+        case 'q':
+            quiet = true;
+            break;
+        case OPTION_OBJECT:
+            opts = qemu_opts_parse_noisily(&qemu_object_opts,
+                                           optarg, true);
+            if (!opts) {
+                ret = -1;
+                goto out_no_progress;
+            }
+            break;
+        case OPTION_IMAGE_OPTS:
+            image_opts = true;
+            break;
         }
     }
 
@@ -3759,14 +3824,19 @@ static int img_bench(int argc, char **argv)
             {"no-drain", no_argument, 0, OPTION_NO_DRAIN},
             {0, 0, 0, 0}
         };
-        c = getopt_long(argc, argv, "hc:d:f:no:qs:S:t:w", long_options, NULL);
+        c = getopt_long(argc, argv, ":hc:d:f:no:qs:S:t:w", long_options, NULL);
         if (c == -1) {
             break;
         }
 
         switch (c) {
-        case 'h':
+        case ':':
+            missing_argument(argv[optind - 1]);
+            break;
         case '?':
+            unrecognized_option(argv[optind - 1]);
+            break;
+        case 'h':
             help();
             break;
         case 'c':
@@ -4093,7 +4163,7 @@ static int img_dd(int argc, char **argv)
         { 0, 0, 0, 0 }
     };
 
-    while ((c = getopt_long(argc, argv, "hf:O:", long_options, NULL))) {
+    while ((c = getopt_long(argc, argv, ":hf:O:", long_options, NULL))) {
         if (c == EOF) {
             break;
         }
@@ -4104,10 +4174,12 @@ static int img_dd(int argc, char **argv)
         case 'f':
             fmt = optarg;
             break;
+        case ':':
+            missing_argument(argv[optind - 1]);
+            break;
         case '?':
-            error_report("Try 'qemu-img --help' for more information.");
-            ret = -1;
-            goto out;
+            unrecognized_option(argv[optind - 1]);
+            break;
         case 'h':
             help();
             break;
@@ -4336,8 +4408,14 @@ int main(int argc, char **argv)
     qemu_add_opts(&qemu_source_opts);
     qemu_add_opts(&qemu_trace_opts);
 
-    while ((c = getopt_long(argc, argv, "+hVT:", long_options, NULL)) != -1) {
+    while ((c = getopt_long(argc, argv, "+:hVT:", long_options, NULL)) != -1) {
         switch (c) {
+        case ':':
+            missing_argument(argv[optind - 1]);
+            return 0;
+        case '?':
+            unrecognized_option(argv[optind - 1]);
+            return 0;
         case 'h':
             help();
             return 0;
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index 2c48f9ce1a..312fc6d157 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -35,6 +35,13 @@ static int compare_cmdname(const void *a, const void *b)
 
 void qemuio_add_command(const cmdinfo_t *ci)
 {
+    /* ci->perm assumes a file is open, but the GLOBAL and NOFILE_OK
+     * flags allow it not to be, so that combination is invalid.
+     * Catch it now rather than letting it manifest as a crash if a
+     * particular set of command line options are used.
+     */
+    assert(ci->perm == 0 ||
+           (ci->flags & (CMD_FLAG_GLOBAL | CMD_NOFILE_OK)) == 0);
     cmdtab = g_renew(cmdinfo_t, cmdtab, ++ncmds);
     cmdtab[ncmds - 1] = *ci;
     qsort(cmdtab, ncmds, sizeof(*cmdtab), compare_cmdname);
@@ -514,7 +521,7 @@ static int do_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
     }
 
     co = qemu_coroutine_create(co_pwrite_zeroes_entry, &data);
-    qemu_coroutine_enter(co);
+    bdrv_coroutine_enter(blk_bs(blk), co);
     while (!data.done) {
         aio_poll(blk_get_aio_context(blk), true);
     }
diff --git a/qemu-options.hx b/qemu-options.hx
index 2043371260..b9a2463919 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -635,6 +635,30 @@ file sectors into the image file.
 conversion of plain zero writes by the OS to driver specific optimized
 zero write commands. You may even choose "unmap" if @var{discard} is set
 to "unmap" to allow a zero write to be converted to an UNMAP operation.
+@item bps=@var{b},bps_rd=@var{r},bps_wr=@var{w}
+Specify bandwidth throttling limits in bytes per second, either for all request
+types or for reads or writes only.  Small values can lead to timeouts or hangs
+inside the guest.  A safe minimum for disks is 2 MB/s.
+@item bps_max=@var{bm},bps_rd_max=@var{rm},bps_wr_max=@var{wm}
+Specify bursts in bytes per second, either for all request types or for reads
+or writes only.  Bursts allow the guest I/O to spike above the limit
+temporarily.
+@item iops=@var{i},iops_rd=@var{r},iops_wr=@var{w}
+Specify request rate limits in requests per second, either for all request
+types or for reads or writes only.
+@item iops_max=@var{bm},iops_rd_max=@var{rm},iops_wr_max=@var{wm}
+Specify bursts in requests per second, either for all request types or for reads
+or writes only.  Bursts allow the guest I/O to spike above the limit
+temporarily.
+@item iops_size=@var{is}
+Let every @var{is} bytes of a request count as a new request for iops
+throttling purposes.  Use this option to prevent guests from circumventing iops
+limits by sending fewer but larger requests.
+@item group=@var{g}
+Join a throttling quota group with given name @var{g}.  All drives that are
+members of the same group are accounted for together.  Use this option to
+prevent guests from circumventing throttling limits by using many small disks
+instead of a single larger disk.
 @end table
 
 By default, the @option{cache=writeback} mode is used. It will report data
diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index 73d93eb5ce..915df9ed90 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -999,7 +999,9 @@ static void build_guest_fsinfo_for_virtual_device(char const *syspath,
     dirpath = g_strdup_printf("%s/slaves", syspath);
     dir = opendir(dirpath);
     if (!dir) {
-        error_setg_errno(errp, errno, "opendir(\"%s\")", dirpath);
+        if (errno != ENOENT) {
+            error_setg_errno(errp, errno, "opendir(\"%s\")", dirpath);
+        }
         g_free(dirpath);
         return;
     }
diff --git a/qga/commands-win32.c b/qga/commands-win32.c
index 19d72b2411..04026eedbf 100644
--- a/qga/commands-win32.c
+++ b/qga/commands-win32.c
@@ -768,7 +768,7 @@ int64_t qmp_guest_fsfreeze_freeze(Error **errp)
     /* cannot risk guest agent blocking itself on a write in this state */
     ga_set_frozen(ga_state);
 
-    qga_vss_fsfreeze(&i, &local_err, true);
+    qga_vss_fsfreeze(&i, true, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         goto error;
@@ -807,7 +807,7 @@ int64_t qmp_guest_fsfreeze_thaw(Error **errp)
         return 0;
     }
 
-    qga_vss_fsfreeze(&i, errp, false);
+    qga_vss_fsfreeze(&i, false, errp);
 
     ga_unset_frozen(ga_state);
     return i;
diff --git a/qga/vss-win32.c b/qga/vss-win32.c
index 9a0e46356a..a80933c98b 100644
--- a/qga/vss-win32.c
+++ b/qga/vss-win32.c
@@ -145,7 +145,7 @@ void ga_uninstall_vss_provider(void)
 }
 
 /* Call VSS requester and freeze/thaw filesystems and applications */
-void qga_vss_fsfreeze(int *nr_volume, Error **errp, bool freeze)
+void qga_vss_fsfreeze(int *nr_volume, bool freeze, Error **errp)
 {
     const char *func_name = freeze ? "requester_freeze" : "requester_thaw";
     QGAVSSRequesterFunc func;
diff --git a/qga/vss-win32.h b/qga/vss-win32.h
index 4d1d15081e..51d303a8f6 100644
--- a/qga/vss-win32.h
+++ b/qga/vss-win32.h
@@ -21,6 +21,6 @@ bool vss_initialized(void);
 int ga_install_vss_provider(void);
 void ga_uninstall_vss_provider(void);
 
-void qga_vss_fsfreeze(int *nr_volume, Error **errp, bool freeze);
+void qga_vss_fsfreeze(int *nr_volume, bool freeze, Error **errp);
 
 #endif
diff --git a/qmp.c b/qmp.c
index fa82b598c6..ab74cd729d 100644
--- a/qmp.c
+++ b/qmp.c
@@ -207,6 +207,12 @@ void qmp_cont(Error **errp)
         }
     }
 
+    blk_resume_after_migration(&local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
     if (runstate_check(RUN_STATE_INMIGRATE)) {
         autostart = 1;
     } else {
@@ -542,11 +548,6 @@ DevicePropertyInfoList *qmp_device_list_properties(const char *typename,
         return NULL;
     }
 
-    if (DEVICE_CLASS(klass)->cannot_destroy_with_object_finalize_yet) {
-        error_setg(errp, "Can't list properties of device '%s'", typename);
-        return NULL;
-    }
-
     obj = object_new(typename);
 
     object_property_iter_init(&iter, obj);
diff --git a/qom/object_interfaces.c b/qom/object_interfaces.c
index 03a95c3276..d4253a88de 100644
--- a/qom/object_interfaces.c
+++ b/qom/object_interfaces.c
@@ -114,7 +114,7 @@ Object *user_creatable_add_opts(QemuOpts *opts, Error **errp)
     QDict *pdict;
     Object *obj;
     const char *id = qemu_opts_id(opts);
-    const char *type = qemu_opt_get(opts, "qom-type");
+    char *type = qemu_opt_get_del(opts, "qom-type");
 
     if (!type) {
         error_setg(errp, QERR_MISSING_PARAMETER, "qom-type");
@@ -122,17 +122,21 @@ Object *user_creatable_add_opts(QemuOpts *opts, Error **errp)
     }
     if (!id) {
         error_setg(errp, QERR_MISSING_PARAMETER, "id");
+        qemu_opt_set(opts, "qom-type", type, &error_abort);
+        g_free(type);
         return NULL;
     }
 
+    qemu_opts_set_id(opts, NULL);
     pdict = qemu_opts_to_qdict(opts, NULL);
-    qdict_del(pdict, "qom-type");
-    qdict_del(pdict, "id");
 
     v = opts_visitor_new(opts);
     obj = user_creatable_add_type(type, id, pdict, v, errp);
     visit_free(v);
 
+    qemu_opts_set_id(opts, (char *) id);
+    qemu_opt_set(opts, "qom-type", type, &error_abort);
+    g_free(type);
     QDECREF(pdict);
     return obj;
 }
diff --git a/replay/replay-internal.c b/replay/replay-internal.c
index bea7b4aa6b..fca8514012 100644
--- a/replay/replay-internal.c
+++ b/replay/replay-internal.c
@@ -195,6 +195,10 @@ void replay_save_instructions(void)
     if (replay_file && replay_mode == REPLAY_MODE_RECORD) {
         replay_mutex_lock();
         int diff = (int)(replay_get_current_step() - replay_state.current_step);
+
+        /* Time can only go forward */
+        assert(diff >= 0);
+
         if (diff > 0) {
             replay_put_event(EVENT_INSTRUCTION);
             replay_put_dword(diff);
diff --git a/replay/replay.c b/replay/replay.c
index 78e2a7e570..f810628cac 100644
--- a/replay/replay.c
+++ b/replay/replay.c
@@ -22,7 +22,7 @@
 
 /* Current version of the replay mechanism.
    Increase it when file format changes. */
-#define REPLAY_VERSION              0xe02005
+#define REPLAY_VERSION              0xe02006
 /* Size of replay log header */
 #define HEADER_SIZE                 (sizeof(uint32_t) + sizeof(uint64_t))
 
@@ -84,6 +84,10 @@ void replay_account_executed_instructions(void)
         if (replay_state.instructions_count > 0) {
             int count = (int)(replay_get_current_step()
                               - replay_state.current_step);
+
+            /* Time can only go forward */
+            assert(count >= 0);
+
             replay_state.instructions_count -= count;
             replay_state.current_step += count;
             if (replay_state.instructions_count == 0) {
diff --git a/roms/openbios b/roms/openbios
-Subproject f233c3f72cfa79c1123a7ccef08d2f7e228da6d
+Subproject 04898e8ce4c2f7bd94c7eeff9d26f2ff23aae8d
diff --git a/scripts/qapi.py b/scripts/qapi.py
index e88c047c2e..6c4d554165 100644
--- a/scripts/qapi.py
+++ b/scripts/qapi.py
@@ -106,7 +106,6 @@ class QAPIDoc(object):
             self.name = name
             # the list of lines for this section
             self.content = []
-            self.optional = False
 
         def append(self, line):
             self.content.append(line)
diff --git a/scripts/qapi2texi.py b/scripts/qapi2texi.py
index 8eed11a60c..9e015002ef 100755
--- a/scripts/qapi2texi.py
+++ b/scripts/qapi2texi.py
@@ -35,12 +35,12 @@ EXAMPLE_FMT = """@example
 
 def subst_strong(doc):
     """Replaces *foo* by @strong{foo}"""
-    return re.sub(r'\*([^*\n]+)\*', r'@emph{\1}', doc)
+    return re.sub(r'\*([^*\n]+)\*', r'@strong{\1}', doc)
 
 
 def subst_emph(doc):
     """Replaces _foo_ by @emph{foo}"""
-    return re.sub(r'\b_([^_\n]+)_\b', r' @emph{\1} ', doc)
+    return re.sub(r'\b_([^_\n]+)_\b', r'@emph{\1}', doc)
 
 
 def subst_vars(doc):
@@ -292,6 +292,7 @@ def main(argv):
     if not qapi.doc_required:
         print >>sys.stderr, ("%s: need pragma 'doc-required' "
                              "to generate documentation" % argv[0])
+        sys.exit(1)
     print texi_schema(schema)
 
 
diff --git a/scripts/qemugdb/mtree.py b/scripts/qemugdb/mtree.py
index cc8131c2e7..e6791b7885 100644
--- a/scripts/qemugdb/mtree.py
+++ b/scripts/qemugdb/mtree.py
@@ -21,7 +21,15 @@ def isnull(ptr):
     return ptr == gdb.Value(0).cast(ptr.type)
 
 def int128(p):
-    return int(p['lo']) + (int(p['hi']) << 64)
+    '''Read an Int128 type to a python integer.
+
+    QEMU can be built with native Int128 support so we need to detect
+    if the value is a structure or the native type.
+    '''
+    if p.type.code == gdb.TYPE_CODE_STRUCT:
+        return int(p['lo']) + (int(p['hi']) << 64)
+    else:
+        return int(("%s" % p), 16)
 
 class MtreeCommand(gdb.Command):
     '''Display the memory tree hierarchy'''
@@ -69,7 +77,7 @@ class MtreeCommand(gdb.Command):
             gdb.write('%s    alias: %s@%016x (@ %s)\n' %
                       ('  ' * level,
                        alias['name'].string(),
-                       ptr['alias_offset'],
+                       int(ptr['alias_offset']),
                        alias,
                        ),
                       gdb.STDOUT)
diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py
index 4c990047b6..d60b3a08f7 100755
--- a/scripts/simpletrace.py
+++ b/scripts/simpletrace.py
@@ -116,7 +116,28 @@ class Analyzer(object):
     is invoked.
 
     If a method matching a trace event name exists, it is invoked to process
-    that trace record.  Otherwise the catchall() method is invoked."""
+    that trace record.  Otherwise the catchall() method is invoked.
+
+    Example:
+    The following method handles the runstate_set(int new_state) trace event::
+
+      def runstate_set(self, new_state):
+          ...
+
+    The method can also take a timestamp argument before the trace event
+    arguments::
+
+      def runstate_set(self, timestamp, new_state):
+          ...
+
+    Timestamps have the uint64_t type and are in nanoseconds.
+
+    The pid can be included in addition to the timestamp and is useful when
+    dealing with traces from multiple processes::
+
+      def runstate_set(self, timestamp, pid, new_state):
+          ...
+    """
 
     def begin(self):
         """Called at the start of the trace."""
diff --git a/scripts/tracetool/format/tcg_h.py b/scripts/tracetool/format/tcg_h.py
index 7ddc4a52ce..db55f52eb5 100644
--- a/scripts/tracetool/format/tcg_h.py
+++ b/scripts/tracetool/format/tcg_h.py
@@ -40,6 +40,7 @@ def generate(events, backend, group):
         '#define TRACE_%s_GENERATED_TCG_TRACERS_H' % group.upper(),
         '',
         '#include "exec/helper-proto.h"',
+        '#include "%s"' % header,
         '',
         )
 
diff --git a/scripts/tracetool/format/tcg_helper_c.py b/scripts/tracetool/format/tcg_helper_c.py
index 7dccd8c5ec..ec7acbe347 100644
--- a/scripts/tracetool/format/tcg_helper_c.py
+++ b/scripts/tracetool/format/tcg_helper_c.py
@@ -55,6 +55,7 @@ def generate(events, backend, group):
         '#include "qemu-common.h"',
         '#include "cpu.h"',
         '#include "exec/helper-proto.h"',
+        '#include "%s"' % header,
         '',
         )
 
diff --git a/slirp/Makefile.objs b/slirp/Makefile.objs
index 1baa1f1c7c..28049b03cd 100644
--- a/slirp/Makefile.objs
+++ b/slirp/Makefile.objs
@@ -2,4 +2,4 @@ common-obj-y = cksum.o if.o ip_icmp.o ip6_icmp.o ip6_input.o ip6_output.o \
                ip_input.o ip_output.o dnssearch.o dhcpv6.o
 common-obj-y += slirp.o mbuf.o misc.o sbuf.o socket.o tcp_input.o tcp_output.o
 common-obj-y += tcp_subr.o tcp_timer.o udp.o udp6.o bootp.o tftp.o arp_table.o \
-                ndp_table.o
+                ndp_table.o ncsi.o
diff --git a/slirp/ip6_icmp.c b/slirp/ip6_icmp.c
index 298a48dd25..777eb574be 100644
--- a/slirp/ip6_icmp.c
+++ b/slirp/ip6_icmp.c
@@ -143,17 +143,13 @@ void ndp_send_ra(Slirp *slirp)
     /* Build IPv6 packet */
     struct mbuf *t = m_get(slirp);
     struct ip6 *rip = mtod(t, struct ip6 *);
+    size_t pl_size = 0;
+    struct in6_addr addr;
+    uint32_t scope_id;
+
     rip->ip_src = (struct in6_addr)LINKLOCAL_ADDR;
     rip->ip_dst = (struct in6_addr)ALLNODES_MULTICAST;
     rip->ip_nh = IPPROTO_ICMPV6;
-    rip->ip_pl = htons(ICMP6_NDP_RA_MINLEN
-                        + NDPOPT_LINKLAYER_LEN
-                        + NDPOPT_PREFIXINFO_LEN
-#ifndef _WIN32
-                        + NDPOPT_RDNSS_LEN
-#endif
-                        );
-    t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl);
 
     /* Build ICMPv6 packet */
     t->m_data += sizeof(struct ip6);
@@ -171,6 +167,7 @@ void ndp_send_ra(Slirp *slirp)
     ricmp->icmp6_nra.reach_time = htonl(NDP_AdvReachableTime);
     ricmp->icmp6_nra.retrans_time = htonl(NDP_AdvRetransTime);
     t->m_data += ICMP6_NDP_RA_MINLEN;
+    pl_size += ICMP6_NDP_RA_MINLEN;
 
     /* Source link-layer address (NDP option) */
     struct ndpopt *opt = mtod(t, struct ndpopt *);
@@ -178,6 +175,7 @@ void ndp_send_ra(Slirp *slirp)
     opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8;
     in6_compute_ethaddr(rip->ip_src, opt->ndpopt_linklayer);
     t->m_data += NDPOPT_LINKLAYER_LEN;
+    pl_size += NDPOPT_LINKLAYER_LEN;
 
     /* Prefix information (NDP option) */
     struct ndpopt *opt2 = mtod(t, struct ndpopt *);
@@ -192,27 +190,26 @@ void ndp_send_ra(Slirp *slirp)
     opt2->ndpopt_prefixinfo.reserved2 = 0;
     opt2->ndpopt_prefixinfo.prefix = slirp->vprefix_addr6;
     t->m_data += NDPOPT_PREFIXINFO_LEN;
+    pl_size += NDPOPT_PREFIXINFO_LEN;
 
-#ifndef _WIN32
     /* Prefix information (NDP option) */
-    /* disabled for windows for now, until get_dns6_addr is implemented */
-    struct ndpopt *opt3 = mtod(t, struct ndpopt *);
-    opt3->ndpopt_type = NDPOPT_RDNSS;
-    opt3->ndpopt_len = NDPOPT_RDNSS_LEN / 8;
-    opt3->ndpopt_rdnss.reserved = 0;
-    opt3->ndpopt_rdnss.lifetime = htonl(2 * NDP_MaxRtrAdvInterval);
-    opt3->ndpopt_rdnss.addr = slirp->vnameserver_addr6;
-    t->m_data += NDPOPT_RDNSS_LEN;
-#endif
+    if (get_dns6_addr(&addr, &scope_id) >= 0) {
+        /* Host system does have an IPv6 DNS server, announce our proxy.  */
+        struct ndpopt *opt3 = mtod(t, struct ndpopt *);
+        opt3->ndpopt_type = NDPOPT_RDNSS;
+        opt3->ndpopt_len = NDPOPT_RDNSS_LEN / 8;
+        opt3->ndpopt_rdnss.reserved = 0;
+        opt3->ndpopt_rdnss.lifetime = htonl(2 * NDP_MaxRtrAdvInterval);
+        opt3->ndpopt_rdnss.addr = slirp->vnameserver_addr6;
+        t->m_data += NDPOPT_RDNSS_LEN;
+        pl_size += NDPOPT_RDNSS_LEN;
+    }
+
+    rip->ip_pl = htons(pl_size);
+    t->m_data -= sizeof(struct ip6) + pl_size;
+    t->m_len = sizeof(struct ip6) + pl_size;
 
     /* ICMPv6 Checksum */
-#ifndef _WIN32
-    t->m_data -= NDPOPT_RDNSS_LEN;
-#endif
-    t->m_data -= NDPOPT_PREFIXINFO_LEN;
-    t->m_data -= NDPOPT_LINKLAYER_LEN;
-    t->m_data -= ICMP6_NDP_RA_MINLEN;
-    t->m_data -= sizeof(struct ip6);
     ricmp->icmp6_cksum = ip6_cksum(t);
 
     ip6_output(NULL, t, 0);
diff --git a/slirp/ncsi-pkt.h b/slirp/ncsi-pkt.h
new file mode 100644
index 0000000000..ea07d1cd0f
--- /dev/null
+++ b/slirp/ncsi-pkt.h
@@ -0,0 +1,419 @@
+/*
+ * Copyright Gavin Shan, IBM Corporation 2016.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef NCSI_PKT_H
+#define NCSI_PKT_H
+
+/* from linux/net/ncsi/ncsi-pkt.h */
+#define __be32 uint32_t
+#define __be16 uint16_t
+
+struct ncsi_pkt_hdr {
+        unsigned char mc_id;        /* Management controller ID */
+        unsigned char revision;     /* NCSI version - 0x01      */
+        unsigned char reserved;     /* Reserved                 */
+        unsigned char id;           /* Packet sequence number   */
+        unsigned char type;         /* Packet type              */
+        unsigned char channel;      /* Network controller ID    */
+        __be16        length;       /* Payload length           */
+        __be32        reserved1[2]; /* Reserved                 */
+};
+
+struct ncsi_cmd_pkt_hdr {
+        struct ncsi_pkt_hdr common; /* Common NCSI packet header */
+};
+
+struct ncsi_rsp_pkt_hdr {
+        struct ncsi_pkt_hdr common; /* Common NCSI packet header */
+        __be16              code;   /* Response code             */
+        __be16              reason; /* Response reason           */
+};
+
+struct ncsi_aen_pkt_hdr {
+        struct ncsi_pkt_hdr common;       /* Common NCSI packet header */
+        unsigned char       reserved2[3]; /* Reserved                  */
+        unsigned char       type;         /* AEN packet type           */
+};
+
+/* NCSI common command packet */
+struct ncsi_cmd_pkt {
+        struct ncsi_cmd_pkt_hdr cmd;      /* Command header */
+        __be32                  checksum; /* Checksum       */
+        unsigned char           pad[26];
+};
+
+struct ncsi_rsp_pkt {
+        struct ncsi_rsp_pkt_hdr rsp;      /* Response header */
+        __be32                  checksum; /* Checksum        */
+        unsigned char           pad[22];
+};
+
+/* Select Package */
+struct ncsi_cmd_sp_pkt {
+        struct ncsi_cmd_pkt_hdr cmd;            /* Command header */
+        unsigned char           reserved[3];    /* Reserved       */
+        unsigned char           hw_arbitration; /* HW arbitration */
+        __be32                  checksum;       /* Checksum       */
+        unsigned char           pad[22];
+};
+
+/* Disable Channel */
+struct ncsi_cmd_dc_pkt {
+        struct ncsi_cmd_pkt_hdr cmd;         /* Command header  */
+        unsigned char           reserved[3]; /* Reserved        */
+        unsigned char           ald;         /* Allow link down */
+        __be32                  checksum;    /* Checksum        */
+        unsigned char           pad[22];
+};
+
+/* Reset Channel */
+struct ncsi_cmd_rc_pkt {
+        struct ncsi_cmd_pkt_hdr cmd;      /* Command header */
+        __be32                  reserved; /* Reserved       */
+        __be32                  checksum; /* Checksum       */
+        unsigned char           pad[22];
+};
+
+/* AEN Enable */
+struct ncsi_cmd_ae_pkt {
+        struct ncsi_cmd_pkt_hdr cmd;         /* Command header   */
+        unsigned char           reserved[3]; /* Reserved         */
+        unsigned char           mc_id;       /* MC ID            */
+        __be32                  mode;        /* AEN working mode */
+        __be32                  checksum;    /* Checksum         */
+        unsigned char           pad[18];
+};
+
+/* Set Link */
+struct ncsi_cmd_sl_pkt {
+        struct ncsi_cmd_pkt_hdr cmd;      /* Command header    */
+        __be32                  mode;     /* Link working mode */
+        __be32                  oem_mode; /* OEM link mode     */
+        __be32                  checksum; /* Checksum          */
+        unsigned char           pad[18];
+};
+
+/* Set VLAN Filter */
+struct ncsi_cmd_svf_pkt {
+        struct ncsi_cmd_pkt_hdr cmd;       /* Command header    */
+        __be16                  reserved;  /* Reserved          */
+        __be16                  vlan;      /* VLAN ID           */
+        __be16                  reserved1; /* Reserved          */
+        unsigned char           index;     /* VLAN table index  */
+        unsigned char           enable;    /* Enable or disable */
+        __be32                  checksum;  /* Checksum          */
+        unsigned char           pad[14];
+};
+
+/* Enable VLAN */
+struct ncsi_cmd_ev_pkt {
+        struct ncsi_cmd_pkt_hdr cmd;         /* Command header   */
+        unsigned char           reserved[3]; /* Reserved         */
+        unsigned char           mode;        /* VLAN filter mode */
+        __be32                  checksum;    /* Checksum         */
+        unsigned char           pad[22];
+};
+
+/* Set MAC Address */
+struct ncsi_cmd_sma_pkt {
+        struct ncsi_cmd_pkt_hdr cmd;      /* Command header          */
+        unsigned char           mac[6];   /* MAC address             */
+        unsigned char           index;    /* MAC table index         */
+        unsigned char           at_e;     /* Addr type and operation */
+        __be32                  checksum; /* Checksum                */
+        unsigned char           pad[18];
+};
+
+/* Enable Broadcast Filter */
+struct ncsi_cmd_ebf_pkt {
+        struct ncsi_cmd_pkt_hdr cmd;      /* Command header */
+        __be32                  mode;     /* Filter mode    */
+        __be32                  checksum; /* Checksum       */
+        unsigned char           pad[22];
+};
+
+/* Enable Global Multicast Filter */
+struct ncsi_cmd_egmf_pkt {
+        struct ncsi_cmd_pkt_hdr cmd;      /* Command header */
+        __be32                  mode;     /* Global MC mode */
+        __be32                  checksum; /* Checksum       */
+        unsigned char           pad[22];
+};
+
+/* Set NCSI Flow Control */
+struct ncsi_cmd_snfc_pkt {
+        struct ncsi_cmd_pkt_hdr cmd;         /* Command header    */
+        unsigned char           reserved[3]; /* Reserved          */
+        unsigned char           mode;        /* Flow control mode */
+        __be32                  checksum;    /* Checksum          */
+        unsigned char           pad[22];
+};
+
+/* Get Link Status */
+struct ncsi_rsp_gls_pkt {
+        struct ncsi_rsp_pkt_hdr rsp;        /* Response header   */
+        __be32                  status;     /* Link status       */
+        __be32                  other;      /* Other indications */
+        __be32                  oem_status; /* OEM link status   */
+        __be32                  checksum;
+        unsigned char           pad[10];
+};
+
+/* Get Version ID */
+struct ncsi_rsp_gvi_pkt {
+        struct ncsi_rsp_pkt_hdr rsp;          /* Response header */
+        __be32                  ncsi_version; /* NCSI version    */
+        unsigned char           reserved[3];  /* Reserved        */
+        unsigned char           alpha2;       /* NCSI version    */
+        unsigned char           fw_name[12];  /* f/w name string */
+        __be32                  fw_version;   /* f/w version     */
+        __be16                  pci_ids[4];   /* PCI IDs         */
+        __be32                  mf_id;        /* Manufacture ID  */
+        __be32                  checksum;
+};
+
+/* Get Capabilities */
+struct ncsi_rsp_gc_pkt {
+        struct ncsi_rsp_pkt_hdr rsp;         /* Response header   */
+        __be32                  cap;         /* Capabilities      */
+        __be32                  bc_cap;      /* Broadcast cap     */
+        __be32                  mc_cap;      /* Multicast cap     */
+        __be32                  buf_cap;     /* Buffering cap     */
+        __be32                  aen_cap;     /* AEN cap           */
+        unsigned char           vlan_cnt;    /* VLAN filter count */
+        unsigned char           mixed_cnt;   /* Mix filter count  */
+        unsigned char           mc_cnt;      /* MC filter count   */
+        unsigned char           uc_cnt;      /* UC filter count   */
+        unsigned char           reserved[2]; /* Reserved          */
+        unsigned char           vlan_mode;   /* VLAN mode         */
+        unsigned char           channel_cnt; /* Channel count     */
+        __be32                  checksum;    /* Checksum          */
+};
+
+/* Get Parameters */
+struct ncsi_rsp_gp_pkt {
+        struct ncsi_rsp_pkt_hdr rsp;          /* Response header       */
+        unsigned char           mac_cnt;      /* Number of MAC addr    */
+        unsigned char           reserved[2];  /* Reserved              */
+        unsigned char           mac_enable;   /* MAC addr enable flags */
+        unsigned char           vlan_cnt;     /* VLAN tag count        */
+        unsigned char           reserved1;    /* Reserved              */
+        __be16                  vlan_enable;  /* VLAN tag enable flags */
+        __be32                  link_mode;    /* Link setting          */
+        __be32                  bc_mode;      /* BC filter mode        */
+        __be32                  valid_modes;  /* Valid mode parameters */
+        unsigned char           vlan_mode;    /* VLAN mode             */
+        unsigned char           fc_mode;      /* Flow control mode     */
+        unsigned char           reserved2[2]; /* Reserved              */
+        __be32                  aen_mode;     /* AEN mode              */
+        unsigned char           mac[6];       /* Supported MAC addr    */
+        __be16                  vlan;         /* Supported VLAN tags   */
+        __be32                  checksum;     /* Checksum              */
+};
+
+/* Get Controller Packet Statistics */
+struct ncsi_rsp_gcps_pkt {
+        struct ncsi_rsp_pkt_hdr rsp;            /* Response header            */
+        __be32                  cnt_hi;         /* Counter cleared            */
+        __be32                  cnt_lo;         /* Counter cleared            */
+        __be32                  rx_bytes;       /* Rx bytes                   */
+        __be32                  tx_bytes;       /* Tx bytes                   */
+        __be32                  rx_uc_pkts;     /* Rx UC packets              */
+        __be32                  rx_mc_pkts;     /* Rx MC packets              */
+        __be32                  rx_bc_pkts;     /* Rx BC packets              */
+        __be32                  tx_uc_pkts;     /* Tx UC packets              */
+        __be32                  tx_mc_pkts;     /* Tx MC packets              */
+        __be32                  tx_bc_pkts;     /* Tx BC packets              */
+        __be32                  fcs_err;        /* FCS errors                 */
+        __be32                  align_err;      /* Alignment errors           */
+        __be32                  false_carrier;  /* False carrier detection    */
+        __be32                  runt_pkts;      /* Rx runt packets            */
+        __be32                  jabber_pkts;    /* Rx jabber packets          */
+        __be32                  rx_pause_xon;   /* Rx pause XON frames        */
+        __be32                  rx_pause_xoff;  /* Rx XOFF frames             */
+        __be32                  tx_pause_xon;   /* Tx XON frames              */
+        __be32                  tx_pause_xoff;  /* Tx XOFF frames             */
+        __be32                  tx_s_collision; /* Single collision frames    */
+        __be32                  tx_m_collision; /* Multiple collision frames  */
+        __be32                  l_collision;    /* Late collision frames      */
+        __be32                  e_collision;    /* Excessive collision frames */
+        __be32                  rx_ctl_frames;  /* Rx control frames          */
+        __be32                  rx_64_frames;   /* Rx 64-bytes frames         */
+        __be32                  rx_127_frames;  /* Rx 65-127 bytes frames     */
+        __be32                  rx_255_frames;  /* Rx 128-255 bytes frames    */
+        __be32                  rx_511_frames;  /* Rx 256-511 bytes frames    */
+        __be32                  rx_1023_frames; /* Rx 512-1023 bytes frames   */
+        __be32                  rx_1522_frames; /* Rx 1024-1522 bytes frames  */
+        __be32                  rx_9022_frames; /* Rx 1523-9022 bytes frames  */
+        __be32                  tx_64_frames;   /* Tx 64-bytes frames         */
+        __be32                  tx_127_frames;  /* Tx 65-127 bytes frames     */
+        __be32                  tx_255_frames;  /* Tx 128-255 bytes frames    */
+        __be32                  tx_511_frames;  /* Tx 256-511 bytes frames    */
+        __be32                  tx_1023_frames; /* Tx 512-1023 bytes frames   */
+        __be32                  tx_1522_frames; /* Tx 1024-1522 bytes frames  */
+        __be32                  tx_9022_frames; /* Tx 1523-9022 bytes frames  */
+        __be32                  rx_valid_bytes; /* Rx valid bytes             */
+        __be32                  rx_runt_pkts;   /* Rx error runt packets      */
+        __be32                  rx_jabber_pkts; /* Rx error jabber packets    */
+        __be32                  checksum;       /* Checksum                   */
+};
+
+/* Get NCSI Statistics */
+struct ncsi_rsp_gns_pkt {
+        struct ncsi_rsp_pkt_hdr rsp;           /* Response header         */
+        __be32                  rx_cmds;       /* Rx NCSI commands        */
+        __be32                  dropped_cmds;  /* Dropped commands        */
+        __be32                  cmd_type_errs; /* Command type errors     */
+        __be32                  cmd_csum_errs; /* Command checksum errors */
+        __be32                  rx_pkts;       /* Rx NCSI packets         */
+        __be32                  tx_pkts;       /* Tx NCSI packets         */
+        __be32                  tx_aen_pkts;   /* Tx AEN packets          */
+        __be32                  checksum;      /* Checksum                */
+};
+
+/* Get NCSI Pass-through Statistics */
+struct ncsi_rsp_gnpts_pkt {
+        struct ncsi_rsp_pkt_hdr rsp;            /* Response header     */
+        __be32                  tx_pkts;        /* Tx packets          */
+        __be32                  tx_dropped;     /* Tx dropped packets  */
+        __be32                  tx_channel_err; /* Tx channel errors   */
+        __be32                  tx_us_err;      /* Tx undersize errors */
+        __be32                  rx_pkts;        /* Rx packets          */
+        __be32                  rx_dropped;     /* Rx dropped packets  */
+        __be32                  rx_channel_err; /* Rx channel errors   */
+        __be32                  rx_us_err;      /* Rx undersize errors */
+        __be32                  rx_os_err;      /* Rx oversize errors  */
+        __be32                  checksum;       /* Checksum            */
+};
+
+/* Get package status */
+struct ncsi_rsp_gps_pkt {
+        struct ncsi_rsp_pkt_hdr rsp;      /* Response header             */
+        __be32                  status;   /* Hardware arbitration status */
+        __be32                  checksum;
+};
+
+/* Get package UUID */
+struct ncsi_rsp_gpuuid_pkt {
+        struct ncsi_rsp_pkt_hdr rsp;      /* Response header */
+        unsigned char           uuid[16]; /* UUID            */
+        __be32                  checksum;
+};
+
+/* AEN: Link State Change */
+struct ncsi_aen_lsc_pkt {
+        struct ncsi_aen_pkt_hdr aen;        /* AEN header      */
+        __be32                  status;     /* Link status     */
+        __be32                  oem_status; /* OEM link status */
+        __be32                  checksum;   /* Checksum        */
+        unsigned char           pad[14];
+};
+
+/* AEN: Configuration Required */
+struct ncsi_aen_cr_pkt {
+        struct ncsi_aen_pkt_hdr aen;      /* AEN header */
+        __be32                  checksum; /* Checksum   */
+        unsigned char           pad[22];
+};
+
+/* AEN: Host Network Controller Driver Status Change */
+struct ncsi_aen_hncdsc_pkt {
+        struct ncsi_aen_pkt_hdr aen;      /* AEN header */
+        __be32                  status;   /* Status     */
+        __be32                  checksum; /* Checksum   */
+        unsigned char           pad[18];
+};
+
+/* NCSI packet revision */
+#define NCSI_PKT_REVISION       0x01
+
+/* NCSI packet commands */
+#define NCSI_PKT_CMD_CIS        0x00 /* Clear Initial State              */
+#define NCSI_PKT_CMD_SP         0x01 /* Select Package                   */
+#define NCSI_PKT_CMD_DP         0x02 /* Deselect Package                 */
+#define NCSI_PKT_CMD_EC         0x03 /* Enable Channel                   */
+#define NCSI_PKT_CMD_DC         0x04 /* Disable Channel                  */
+#define NCSI_PKT_CMD_RC         0x05 /* Reset Channel                    */
+#define NCSI_PKT_CMD_ECNT       0x06 /* Enable Channel Network Tx        */
+#define NCSI_PKT_CMD_DCNT       0x07 /* Disable Channel Network Tx       */
+#define NCSI_PKT_CMD_AE         0x08 /* AEN Enable                       */
+#define NCSI_PKT_CMD_SL         0x09 /* Set Link                         */
+#define NCSI_PKT_CMD_GLS        0x0a /* Get Link                         */
+#define NCSI_PKT_CMD_SVF        0x0b /* Set VLAN Filter                  */
+#define NCSI_PKT_CMD_EV         0x0c /* Enable VLAN                      */
+#define NCSI_PKT_CMD_DV         0x0d /* Disable VLAN                     */
+#define NCSI_PKT_CMD_SMA        0x0e /* Set MAC address                  */
+#define NCSI_PKT_CMD_EBF        0x10 /* Enable Broadcast Filter          */
+#define NCSI_PKT_CMD_DBF        0x11 /* Disable Broadcast Filter         */
+#define NCSI_PKT_CMD_EGMF       0x12 /* Enable Global Multicast Filter   */
+#define NCSI_PKT_CMD_DGMF       0x13 /* Disable Global Multicast Filter  */
+#define NCSI_PKT_CMD_SNFC       0x14 /* Set NCSI Flow Control            */
+#define NCSI_PKT_CMD_GVI        0x15 /* Get Version ID                   */
+#define NCSI_PKT_CMD_GC         0x16 /* Get Capabilities                 */
+#define NCSI_PKT_CMD_GP         0x17 /* Get Parameters                   */
+#define NCSI_PKT_CMD_GCPS       0x18 /* Get Controller Packet Statistics */
+#define NCSI_PKT_CMD_GNS        0x19 /* Get NCSI Statistics              */
+#define NCSI_PKT_CMD_GNPTS      0x1a /* Get NCSI Pass-throu Statistics   */
+#define NCSI_PKT_CMD_GPS        0x1b /* Get package status               */
+#define NCSI_PKT_CMD_OEM        0x50 /* OEM                              */
+#define NCSI_PKT_CMD_PLDM       0x51 /* PLDM request over NCSI over RBT  */
+#define NCSI_PKT_CMD_GPUUID     0x52 /* Get package UUID                 */
+
+/* NCSI packet responses */
+#define NCSI_PKT_RSP_CIS        (NCSI_PKT_CMD_CIS    + 0x80)
+#define NCSI_PKT_RSP_SP         (NCSI_PKT_CMD_SP     + 0x80)
+#define NCSI_PKT_RSP_DP         (NCSI_PKT_CMD_DP     + 0x80)
+#define NCSI_PKT_RSP_EC         (NCSI_PKT_CMD_EC     + 0x80)
+#define NCSI_PKT_RSP_DC         (NCSI_PKT_CMD_DC     + 0x80)
+#define NCSI_PKT_RSP_RC         (NCSI_PKT_CMD_RC     + 0x80)
+#define NCSI_PKT_RSP_ECNT       (NCSI_PKT_CMD_ECNT   + 0x80)
+#define NCSI_PKT_RSP_DCNT       (NCSI_PKT_CMD_DCNT   + 0x80)
+#define NCSI_PKT_RSP_AE         (NCSI_PKT_CMD_AE     + 0x80)
+#define NCSI_PKT_RSP_SL         (NCSI_PKT_CMD_SL     + 0x80)
+#define NCSI_PKT_RSP_GLS        (NCSI_PKT_CMD_GLS    + 0x80)
+#define NCSI_PKT_RSP_SVF        (NCSI_PKT_CMD_SVF    + 0x80)
+#define NCSI_PKT_RSP_EV         (NCSI_PKT_CMD_EV     + 0x80)
+#define NCSI_PKT_RSP_DV         (NCSI_PKT_CMD_DV     + 0x80)
+#define NCSI_PKT_RSP_SMA        (NCSI_PKT_CMD_SMA    + 0x80)
+#define NCSI_PKT_RSP_EBF        (NCSI_PKT_CMD_EBF    + 0x80)
+#define NCSI_PKT_RSP_DBF        (NCSI_PKT_CMD_DBF    + 0x80)
+#define NCSI_PKT_RSP_EGMF       (NCSI_PKT_CMD_EGMF   + 0x80)
+#define NCSI_PKT_RSP_DGMF       (NCSI_PKT_CMD_DGMF   + 0x80)
+#define NCSI_PKT_RSP_SNFC       (NCSI_PKT_CMD_SNFC   + 0x80)
+#define NCSI_PKT_RSP_GVI        (NCSI_PKT_CMD_GVI    + 0x80)
+#define NCSI_PKT_RSP_GC         (NCSI_PKT_CMD_GC     + 0x80)
+#define NCSI_PKT_RSP_GP         (NCSI_PKT_CMD_GP     + 0x80)
+#define NCSI_PKT_RSP_GCPS       (NCSI_PKT_CMD_GCPS   + 0x80)
+#define NCSI_PKT_RSP_GNS        (NCSI_PKT_CMD_GNS    + 0x80)
+#define NCSI_PKT_RSP_GNPTS      (NCSI_PKT_CMD_GNPTS  + 0x80)
+#define NCSI_PKT_RSP_GPS        (NCSI_PKT_CMD_GPS    + 0x80)
+#define NCSI_PKT_RSP_OEM        (NCSI_PKT_CMD_OEM    + 0x80)
+#define NCSI_PKT_RSP_PLDM       (NCSI_PKT_CMD_PLDM   + 0x80)
+#define NCSI_PKT_RSP_GPUUID     (NCSI_PKT_CMD_GPUUID + 0x80)
+
+/* NCSI response code/reason */
+#define NCSI_PKT_RSP_C_COMPLETED        0x0000 /* Command Completed        */
+#define NCSI_PKT_RSP_C_FAILED           0x0001 /* Command Failed           */
+#define NCSI_PKT_RSP_C_UNAVAILABLE      0x0002 /* Command Unavailable      */
+#define NCSI_PKT_RSP_C_UNSUPPORTED      0x0003 /* Command Unsupported      */
+#define NCSI_PKT_RSP_R_NO_ERROR         0x0000 /* No Error                 */
+#define NCSI_PKT_RSP_R_INTERFACE        0x0001 /* Interface not ready      */
+#define NCSI_PKT_RSP_R_PARAM            0x0002 /* Invalid Parameter        */
+#define NCSI_PKT_RSP_R_CHANNEL          0x0003 /* Channel not Ready        */
+#define NCSI_PKT_RSP_R_PACKAGE          0x0004 /* Package not Ready        */
+#define NCSI_PKT_RSP_R_LENGTH           0x0005 /* Invalid payload length   */
+#define NCSI_PKT_RSP_R_UNKNOWN          0x7fff /* Command type unsupported */
+
+/* NCSI AEN packet type */
+#define NCSI_PKT_AEN            0xFF /* AEN Packet               */
+#define NCSI_PKT_AEN_LSC        0x00 /* Link status change       */
+#define NCSI_PKT_AEN_CR         0x01 /* Configuration required   */
+#define NCSI_PKT_AEN_HNCDSC     0x02 /* HNC driver status change */
+
+#endif /* NCSI_PKT_H */
diff --git a/slirp/ncsi.c b/slirp/ncsi.c
new file mode 100644
index 0000000000..d12ba3e494
--- /dev/null
+++ b/slirp/ncsi.c
@@ -0,0 +1,130 @@
+/*
+ * NC-SI (Network Controller Sideband Interface) "echo" model
+ *
+ * Copyright (C) 2016 IBM Corp.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "slirp.h"
+
+#include "ncsi-pkt.h"
+
+/* Get Capabilities */
+static int ncsi_rsp_handler_gc(struct ncsi_rsp_pkt_hdr *rnh)
+{
+    struct ncsi_rsp_gc_pkt *rsp = (struct ncsi_rsp_gc_pkt *) rnh;
+
+    rsp->cap = htonl(~0);
+    rsp->bc_cap = htonl(~0);
+    rsp->mc_cap = htonl(~0);
+    rsp->buf_cap = htonl(~0);
+    rsp->aen_cap = htonl(~0);
+    rsp->vlan_mode = 0xff;
+    rsp->uc_cnt = 2;
+    return 0;
+}
+
+/* Get Link status */
+static int ncsi_rsp_handler_gls(struct ncsi_rsp_pkt_hdr *rnh)
+{
+    struct ncsi_rsp_gls_pkt *rsp = (struct ncsi_rsp_gls_pkt *) rnh;
+
+    rsp->status = htonl(0x1);
+    return 0;
+}
+
+static const struct ncsi_rsp_handler {
+        unsigned char   type;
+        int             payload;
+        int             (*handler)(struct ncsi_rsp_pkt_hdr *rnh);
+} ncsi_rsp_handlers[] = {
+        { NCSI_PKT_RSP_CIS,     4, NULL },
+        { NCSI_PKT_RSP_SP,      4, NULL },
+        { NCSI_PKT_RSP_DP,      4, NULL },
+        { NCSI_PKT_RSP_EC,      4, NULL },
+        { NCSI_PKT_RSP_DC,      4, NULL },
+        { NCSI_PKT_RSP_RC,      4, NULL },
+        { NCSI_PKT_RSP_ECNT,    4, NULL },
+        { NCSI_PKT_RSP_DCNT,    4, NULL },
+        { NCSI_PKT_RSP_AE,      4, NULL },
+        { NCSI_PKT_RSP_SL,      4, NULL },
+        { NCSI_PKT_RSP_GLS,    16, ncsi_rsp_handler_gls },
+        { NCSI_PKT_RSP_SVF,     4, NULL },
+        { NCSI_PKT_RSP_EV,      4, NULL },
+        { NCSI_PKT_RSP_DV,      4, NULL },
+        { NCSI_PKT_RSP_SMA,     4, NULL },
+        { NCSI_PKT_RSP_EBF,     4, NULL },
+        { NCSI_PKT_RSP_DBF,     4, NULL },
+        { NCSI_PKT_RSP_EGMF,    4, NULL },
+        { NCSI_PKT_RSP_DGMF,    4, NULL },
+        { NCSI_PKT_RSP_SNFC,    4, NULL },
+        { NCSI_PKT_RSP_GVI,    36, NULL },
+        { NCSI_PKT_RSP_GC,     32, ncsi_rsp_handler_gc },
+        { NCSI_PKT_RSP_GP,     -1, NULL },
+        { NCSI_PKT_RSP_GCPS,  172, NULL },
+        { NCSI_PKT_RSP_GNS,   172, NULL },
+        { NCSI_PKT_RSP_GNPTS, 172, NULL },
+        { NCSI_PKT_RSP_GPS,     8, NULL },
+        { NCSI_PKT_RSP_OEM,     0, NULL },
+        { NCSI_PKT_RSP_PLDM,    0, NULL },
+        { NCSI_PKT_RSP_GPUUID, 20, NULL }
+};
+
+/*
+ * packet format : ncsi header + payload + checksum
+ */
+#define NCSI_MAX_PAYLOAD 172
+#define NCSI_MAX_LEN     (sizeof(struct ncsi_pkt_hdr) + NCSI_MAX_PAYLOAD + 4)
+
+void ncsi_input(Slirp *slirp, const uint8_t *pkt, int pkt_len)
+{
+    struct ncsi_pkt_hdr *nh = (struct ncsi_pkt_hdr *)(pkt + ETH_HLEN);
+    uint8_t ncsi_reply[ETH_HLEN + NCSI_MAX_LEN];
+    struct ethhdr *reh = (struct ethhdr *)ncsi_reply;
+    struct ncsi_rsp_pkt_hdr *rnh = (struct ncsi_rsp_pkt_hdr *)
+        (ncsi_reply + ETH_HLEN);
+    const struct ncsi_rsp_handler *handler = NULL;
+    int i;
+
+    memset(ncsi_reply, 0, sizeof(ncsi_reply));
+
+    memset(reh->h_dest, 0xff, ETH_ALEN);
+    memset(reh->h_source, 0xff, ETH_ALEN);
+    reh->h_proto = htons(ETH_P_NCSI);
+
+    for (i = 0; i < ARRAY_SIZE(ncsi_rsp_handlers); i++) {
+        if (ncsi_rsp_handlers[i].type == nh->type + 0x80) {
+            handler = &ncsi_rsp_handlers[i];
+            break;
+        }
+    }
+
+    rnh->common.mc_id      = nh->mc_id;
+    rnh->common.revision   = NCSI_PKT_REVISION;
+    rnh->common.id         = nh->id;
+    rnh->common.type       = nh->type + 0x80;
+    rnh->common.channel    = nh->channel;
+
+    if (handler) {
+        rnh->common.length = htons(handler->payload);
+        rnh->code          = htons(NCSI_PKT_RSP_C_COMPLETED);
+        rnh->reason        = htons(NCSI_PKT_RSP_R_NO_ERROR);
+
+        if (handler->handler) {
+            /* TODO: handle errors */
+            handler->handler(rnh);
+        }
+    } else {
+        rnh->common.length = 0;
+        rnh->code          = htons(NCSI_PKT_RSP_C_UNAVAILABLE);
+        rnh->reason        = htons(NCSI_PKT_RSP_R_UNKNOWN);
+    }
+
+    /* TODO: add a checksum at the end of the frame but the specs
+     * allows it to be zero */
+
+    slirp_output(slirp->opaque, ncsi_reply, ETH_HLEN + sizeof(*nh) +
+                 (handler ? handler->payload : 0) + 4);
+}
diff --git a/slirp/slirp.c b/slirp/slirp.c
index 60539de7a3..9a50918346 100644
--- a/slirp/slirp.c
+++ b/slirp/slirp.c
@@ -198,7 +198,7 @@ static int get_dns_addr_resolv_conf(int af, void *pdns_addr, void *cached_addr,
 #ifdef DEBUG
             else {
                 char s[INET6_ADDRSTRLEN];
-                char *res = inet_ntop(af, tmp_addr, s, sizeof(s));
+                const char *res = inet_ntop(af, tmp_addr, s, sizeof(s));
                 if (!res) {
                     res = "(string conversion error)";
                 }
@@ -870,6 +870,10 @@ void slirp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len)
         }
         break;
 
+    case ETH_P_NCSI:
+        ncsi_input(slirp, pkt, pkt_len);
+        break;
+
     default:
         break;
     }
diff --git a/slirp/slirp.h b/slirp/slirp.h
index 3877f667f0..5af4f482b5 100644
--- a/slirp/slirp.h
+++ b/slirp/slirp.h
@@ -231,6 +231,9 @@ extern Slirp *slirp_instance;
 
 void if_start(Slirp *);
 
+/* ncsi.c */
+void ncsi_input(Slirp *slirp, const uint8_t *pkt, int pkt_len);
+
 #ifndef _WIN32
 #include <netdb.h>
 #endif
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 04b062cb7e..b357aee778 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -304,33 +304,6 @@ bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
 }
 
 #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64)
-static void arm_v7m_unassigned_access(CPUState *cpu, hwaddr addr,
-                                      bool is_write, bool is_exec, int opaque,
-                                      unsigned size)
-{
-    ARMCPU *arm = ARM_CPU(cpu);
-    CPUARMState *env = &arm->env;
-
-    /* ARMv7-M interrupt return works by loading a magic value into the PC.
-     * On real hardware the load causes the return to occur.  The qemu
-     * implementation performs the jump normally, then does the exception
-     * return by throwing a special exception when when the CPU tries to
-     * execute code at the magic address.
-     */
-    if (env->v7m.exception != 0 && addr >= 0xfffffff0 && is_exec) {
-        cpu->exception_index = EXCP_EXCEPTION_EXIT;
-        cpu_loop_exit(cpu);
-    }
-
-    /* In real hardware an attempt to access parts of the address space
-     * with nothing there will usually cause an external abort.
-     * However our QEMU board models are often missing device models where
-     * the guest can boot anyway with the default read-as-zero/writes-ignored
-     * behaviour that you get without a QEMU unassigned_access hook.
-     * So just return here to retain that default behaviour.
-     */
-}
-
 static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
 {
     CPUClass *cc = CPU_GET_CLASS(cs);
@@ -338,17 +311,7 @@ static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
     CPUARMState *env = &cpu->env;
     bool ret = false;
 
-    /* ARMv7-M interrupt return works by loading a magic value
-     * into the PC.  On real hardware the load causes the
-     * return to occur.  The qemu implementation performs the
-     * jump normally, then does the exception return when the
-     * CPU tries to execute code at the magic address.
-     * This will cause the magic PC value to be pushed to
-     * the stack if an interrupt occurred at the wrong time.
-     * We avoid this by disabling interrupts when
-     * pc contains a magic address.
-     *
-     * ARMv7-M interrupt masking works differently than -A or -R.
+    /* ARMv7-M interrupt masking works differently than -A or -R.
      * There is no FIQ/IRQ distinction. Instead of I and F bits
      * masking FIQ and IRQ interrupts, an exception is taken only
      * if it is higher priority than the current execution priority
@@ -356,8 +319,7 @@ static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
      * currently active exception).
      */
     if (interrupt_request & CPU_INTERRUPT_HARD
-        && (armv7m_nvic_can_take_pending_exception(env->nvic))
-        && (env->regs[15] < 0xfffffff0)) {
+        && (armv7m_nvic_can_take_pending_exception(env->nvic))) {
         cs->exception_index = EXCP_IRQ;
         cc->do_interrupt(cs);
         ret = true;
@@ -1091,7 +1053,6 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data)
     cc->do_interrupt = arm_v7m_cpu_do_interrupt;
 #endif
 
-    cc->do_unassigned_access = arm_v7m_unassigned_access;
     cc->cpu_exec_interrupt = arm_v7m_cpu_exec_interrupt;
 }
 
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index a8aabce7dd..1055bfef3d 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -58,6 +58,7 @@
 #define EXCP_SEMIHOST       16   /* semihosting call */
 #define EXCP_NOCP           17   /* v7M NOCP UsageFault */
 #define EXCP_INVSTATE       18   /* v7M INVSTATE UsageFault */
+/* NB: add new EXCP_ defines to the array in arm_log_exception() too */
 
 #define ARMV7M_EXCP_RESET   1
 #define ARMV7M_EXCP_NMI     2
@@ -2290,6 +2291,9 @@ static inline bool arm_cpu_data_is_big_endian(CPUARMState *env)
 #define ARM_TBFLAG_NS_MASK          (1 << ARM_TBFLAG_NS_SHIFT)
 #define ARM_TBFLAG_BE_DATA_SHIFT    20
 #define ARM_TBFLAG_BE_DATA_MASK     (1 << ARM_TBFLAG_BE_DATA_SHIFT)
+/* For M profile only, Handler (ie not Thread) mode */
+#define ARM_TBFLAG_HANDLER_SHIFT    21
+#define ARM_TBFLAG_HANDLER_MASK     (1 << ARM_TBFLAG_HANDLER_SHIFT)
 
 /* Bit usage when in AArch64 state */
 #define ARM_TBFLAG_TBI0_SHIFT 0        /* TBI0 for EL0/1 or TBI for EL2/3 */
@@ -2326,6 +2330,8 @@ static inline bool arm_cpu_data_is_big_endian(CPUARMState *env)
     (((F) & ARM_TBFLAG_NS_MASK) >> ARM_TBFLAG_NS_SHIFT)
 #define ARM_TBFLAG_BE_DATA(F) \
     (((F) & ARM_TBFLAG_BE_DATA_MASK) >> ARM_TBFLAG_BE_DATA_SHIFT)
+#define ARM_TBFLAG_HANDLER(F) \
+    (((F) & ARM_TBFLAG_HANDLER_MASK) >> ARM_TBFLAG_HANDLER_SHIFT)
 #define ARM_TBFLAG_TBI0(F) \
     (((F) & ARM_TBFLAG_TBI0_MASK) >> ARM_TBFLAG_TBI0_SHIFT)
 #define ARM_TBFLAG_TBI1(F) \
@@ -2516,6 +2522,10 @@ static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
     }
     *flags |= fp_exception_el(env) << ARM_TBFLAG_FPEXC_EL_SHIFT;
 
+    if (env->v7m.exception != 0) {
+        *flags |= ARM_TBFLAG_HANDLER_MASK;
+    }
+
     *cs_base = 0;
 }
 
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 8cb7a9451c..8a3e4480aa 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -6271,6 +6271,25 @@ static void arm_log_exception(int idx)
 {
     if (qemu_loglevel_mask(CPU_LOG_INT)) {
         const char *exc = NULL;
+        static const char * const excnames[] = {
+            [EXCP_UDEF] = "Undefined Instruction",
+            [EXCP_SWI] = "SVC",
+            [EXCP_PREFETCH_ABORT] = "Prefetch Abort",
+            [EXCP_DATA_ABORT] = "Data Abort",
+            [EXCP_IRQ] = "IRQ",
+            [EXCP_FIQ] = "FIQ",
+            [EXCP_BKPT] = "Breakpoint",
+            [EXCP_EXCEPTION_EXIT] = "QEMU v7M exception exit",
+            [EXCP_KERNEL_TRAP] = "QEMU intercept of kernel commpage",
+            [EXCP_HVC] = "Hypervisor Call",
+            [EXCP_HYP_TRAP] = "Hypervisor Trap",
+            [EXCP_SMC] = "Secure Monitor Call",
+            [EXCP_VIRQ] = "Virtual IRQ",
+            [EXCP_VFIQ] = "Virtual FIQ",
+            [EXCP_SEMIHOST] = "Semihosting call",
+            [EXCP_NOCP] = "v7M NOCP UsageFault",
+            [EXCP_INVSTATE] = "v7M INVSTATE UsageFault",
+        };
 
         if (idx >= 0 && idx < ARRAY_SIZE(excnames)) {
             exc = excnames[idx];
diff --git a/target/arm/internals.h b/target/arm/internals.h
index f742a419ff..1f6efef7c4 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -51,27 +51,6 @@ static inline bool excp_is_internal(int excp)
         || excp == EXCP_SEMIHOST;
 }
 
-/* Exception names for debug logging; note that not all of these
- * precisely correspond to architectural exceptions.
- */
-static const char * const excnames[] = {
-    [EXCP_UDEF] = "Undefined Instruction",
-    [EXCP_SWI] = "SVC",
-    [EXCP_PREFETCH_ABORT] = "Prefetch Abort",
-    [EXCP_DATA_ABORT] = "Data Abort",
-    [EXCP_IRQ] = "IRQ",
-    [EXCP_FIQ] = "FIQ",
-    [EXCP_BKPT] = "Breakpoint",
-    [EXCP_EXCEPTION_EXIT] = "QEMU v7M exception exit",
-    [EXCP_KERNEL_TRAP] = "QEMU intercept of kernel commpage",
-    [EXCP_HVC] = "Hypervisor Call",
-    [EXCP_HYP_TRAP] = "Hypervisor Trap",
-    [EXCP_SMC] = "Secure Monitor Call",
-    [EXCP_VIRQ] = "Virtual IRQ",
-    [EXCP_VFIQ] = "Virtual FIQ",
-    [EXCP_SEMIHOST] = "Semihosting call",
-};
-
 /* Scale factor for generic timers, ie number of ns per tick.
  * This gives a 62.5MHz timer.
  */
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
index 61111091ad..a16abc8d12 100644
--- a/target/arm/kvm64.c
+++ b/target/arm/kvm64.c
@@ -940,7 +940,7 @@ bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit)
              * single step at this point so something has gone wrong.
              */
             error_report("%s: guest single-step while debugging unsupported"
-                         " (%"PRIx64", %"PRIx32")\n",
+                         " (%"PRIx64", %"PRIx32")",
                          __func__, env->pc, debug_exit->hsr);
             return false;
         }
@@ -965,7 +965,7 @@ bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit)
         break;
     }
     default:
-        error_report("%s: unhandled debug exit (%"PRIx32", %"PRIx64")\n",
+        error_report("%s: unhandled debug exit (%"PRIx32", %"PRIx64")",
                      __func__, debug_exit->hsr, env->pc);
     }
 
diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
index d64c8670fa..156b825040 100644
--- a/target/arm/op_helper.c
+++ b/target/arm/op_helper.c
@@ -130,7 +130,7 @@ void tlb_fill(CPUState *cs, target_ulong addr, MMUAccessType access_type,
     if (unlikely(ret)) {
         ARMCPU *cpu = ARM_CPU(cs);
         CPUARMState *env = &cpu->env;
-        uint32_t syn, exc;
+        uint32_t syn, exc, fsc;
         unsigned int target_el;
         bool same_el;
 
@@ -145,19 +145,32 @@ void tlb_fill(CPUState *cs, target_ulong addr, MMUAccessType access_type,
             env->cp15.hpfar_el2 = extract64(fi.s2addr, 12, 47) << 4;
         }
         same_el = arm_current_el(env) == target_el;
-        /* AArch64 syndrome does not have an LPAE bit */
-        syn = fsr & ~(1 << 9);
+
+        if (fsr & (1 << 9)) {
+            /* LPAE format fault status register : bottom 6 bits are
+             * status code in the same form as needed for syndrome
+             */
+            fsc = extract32(fsr, 0, 6);
+        } else {
+            /* Short format FSR : this fault will never actually be reported
+             * to an EL that uses a syndrome register. Check that here,
+             * and use a (currently) reserved FSR code in case the constructed
+             * syndrome does leak into the guest somehow.
+             */
+            assert(target_el != 2 && !arm_el_is_aa64(env, target_el));
+            fsc = 0x3f;
+        }
 
         /* For insn and data aborts we assume there is no instruction syndrome
          * information; this is always true for exceptions reported to EL1.
          */
         if (access_type == MMU_INST_FETCH) {
-            syn = syn_insn_abort(same_el, 0, fi.s1ptw, syn);
+            syn = syn_insn_abort(same_el, 0, fi.s1ptw, fsc);
             exc = EXCP_PREFETCH_ABORT;
         } else {
             syn = merge_syn_data_abort(env->exception.syndrome, target_el,
                                        same_el, fi.s1ptw,
-                                       access_type == MMU_DATA_STORE, syn);
+                                       access_type == MMU_DATA_STORE, fsc);
             if (access_type == MMU_DATA_STORE
                 && arm_feature(env, ARM_FEATURE_V6)) {
                 fsr |= (1 << 11);
diff --git a/target/arm/translate.c b/target/arm/translate.c
index e32e38cadd..0b5a0bca06 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -296,6 +296,30 @@ static void gen_step_complete_exception(DisasContext *s)
     s->is_jmp = DISAS_EXC;
 }
 
+static void gen_singlestep_exception(DisasContext *s)
+{
+    /* Generate the right kind of exception for singlestep, which is
+     * either the architectural singlestep or EXCP_DEBUG for QEMU's
+     * gdb singlestepping.
+     */
+    if (s->ss_active) {
+        gen_step_complete_exception(s);
+    } else {
+        gen_exception_internal(EXCP_DEBUG);
+    }
+}
+
+static inline bool is_singlestepping(DisasContext *s)
+{
+    /* Return true if we are singlestepping either because of
+     * architectural singlestep or QEMU gdbstub singlestep. This does
+     * not include the command line '-singlestep' mode which is rather
+     * misnamed as it only means "one instruction per TB" and doesn't
+     * affect the code we generate.
+     */
+    return s->singlestep_enabled || s->ss_active;
+}
+
 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
 {
     TCGv_i32 tmp1 = tcg_temp_new_i32();
@@ -880,6 +904,21 @@ static const uint8_t table_logic_cc[16] = {
     1, /* mvn */
 };
 
+static inline void gen_set_condexec(DisasContext *s)
+{
+    if (s->condexec_mask) {
+        uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
+        TCGv_i32 tmp = tcg_temp_new_i32();
+        tcg_gen_movi_i32(tmp, val);
+        store_cpu_field(tmp, condexec_bits);
+    }
+}
+
+static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
+{
+    tcg_gen_movi_i32(cpu_R[15], val);
+}
+
 /* Set PC and Thumb state from an immediate address.  */
 static inline void gen_bx_im(DisasContext *s, uint32_t addr)
 {
@@ -904,6 +943,51 @@ static inline void gen_bx(DisasContext *s, TCGv_i32 var)
     store_cpu_field(var, thumb);
 }
 
+/* Set PC and Thumb state from var. var is marked as dead.
+ * For M-profile CPUs, include logic to detect exception-return
+ * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
+ * and BX reg, and no others, and happens only for code in Handler mode.
+ */
+static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
+{
+    /* Generate the same code here as for a simple bx, but flag via
+     * s->is_jmp that we need to do the rest of the work later.
+     */
+    gen_bx(s, var);
+    if (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M)) {
+        s->is_jmp = DISAS_BX_EXCRET;
+    }
+}
+
+static inline void gen_bx_excret_final_code(DisasContext *s)
+{
+    /* Generate the code to finish possible exception return and end the TB */
+    TCGLabel *excret_label = gen_new_label();
+
+    /* Is the new PC value in the magic range indicating exception return? */
+    tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], 0xff000000, excret_label);
+    /* No: end the TB as we would for a DISAS_JMP */
+    if (is_singlestepping(s)) {
+        gen_singlestep_exception(s);
+    } else {
+        tcg_gen_exit_tb(0);
+    }
+    gen_set_label(excret_label);
+    /* Yes: this is an exception return.
+     * At this point in runtime env->regs[15] and env->thumb will hold
+     * the exception-return magic number, which do_v7m_exception_exit()
+     * will read. Nothing else will be able to see those values because
+     * the cpu-exec main loop guarantees that we will always go straight
+     * from raising the exception to the exception-handling code.
+     *
+     * gen_ss_advance(s) does nothing on M profile currently but
+     * calling it is conceptually the right thing as we have executed
+     * this instruction (compare SWI, HVC, SMC handling).
+     */
+    gen_ss_advance(s);
+    gen_exception_internal(EXCP_EXCEPTION_EXIT);
+}
+
 /* Variant of store_reg which uses branch&exchange logic when storing
    to r15 in ARM architecture v7 and above. The source must be a temporary
    and will be marked as dead. */
@@ -923,7 +1007,7 @@ static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
 {
     if (reg == 15 && ENABLE_ARCH_5) {
-        gen_bx(s, var);
+        gen_bx_excret(s, var);
     } else {
         store_reg(s, reg, var);
     }
@@ -1056,11 +1140,6 @@ DO_GEN_ST(8, MO_UB)
 DO_GEN_ST(16, MO_UW)
 DO_GEN_ST(32, MO_UL)
 
-static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
-{
-    tcg_gen_movi_i32(cpu_R[15], val);
-}
-
 static inline void gen_hvc(DisasContext *s, int imm16)
 {
     /* The pre HVC helper handles cases when HVC gets trapped
@@ -1094,17 +1173,6 @@ static inline void gen_smc(DisasContext *s)
     s->is_jmp = DISAS_SMC;
 }
 
-static inline void
-gen_set_condexec (DisasContext *s)
-{
-    if (s->condexec_mask) {
-        uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
-        TCGv_i32 tmp = tcg_temp_new_i32();
-        tcg_gen_movi_i32(tmp, val);
-        store_cpu_field(tmp, condexec_bits);
-    }
-}
-
 static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
 {
     gen_set_condexec(s);
@@ -4092,7 +4160,7 @@ static inline void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
 
 static inline void gen_jmp (DisasContext *s, uint32_t dest)
 {
-    if (unlikely(s->singlestep_enabled || s->ss_active)) {
+    if (unlikely(is_singlestepping(s))) {
         /* An indirect jump so that we still trigger the debug exception.  */
         if (s->thumb)
             dest |= 1;
@@ -9858,7 +9926,7 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw
                         tmp = tcg_temp_new_i32();
                         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
                         if (i == 15) {
-                            gen_bx(s, tmp);
+                            gen_bx_excret(s, tmp);
                         } else if (i == rn) {
                             loaded_var = tmp;
                             loaded_base = 1;
@@ -9959,7 +10027,7 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw
             gen_arm_shift_reg(tmp, op, tmp2, logic_cc);
             if (logic_cc)
                 gen_logic_CC(tmp);
-            store_reg_bx(s, rd, tmp);
+            store_reg(s, rd, tmp);
             break;
         case 1: /* Sign/zero extend.  */
             op = (insn >> 20) & 7;
@@ -10485,7 +10553,12 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw
                         }
                         break;
                     case 4: /* bxj */
-                        /* Trivial implementation equivalent to bx.  */
+                        /* Trivial implementation equivalent to bx.
+                         * This instruction doesn't exist at all for M-profile.
+                         */
+                        if (arm_dc_feature(s, ARM_FEATURE_M)) {
+                            goto illegal_op;
+                        }
                         tmp = load_reg(s, rn);
                         gen_bx(s, tmp);
                         break;
@@ -10885,7 +10958,7 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw
                 goto illegal_op;
             }
             if (rs == 15) {
-                gen_bx(s, tmp);
+                gen_bx_excret(s, tmp);
             } else {
                 store_reg(s, rs, tmp);
             }
@@ -11075,9 +11148,11 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
                     tmp2 = tcg_temp_new_i32();
                     tcg_gen_movi_i32(tmp2, val);
                     store_reg(s, 14, tmp2);
+                    gen_bx(s, tmp);
+                } else {
+                    /* Only BX works as exception-return, not BLX */
+                    gen_bx_excret(s, tmp);
                 }
-                /* already thumb, no need to check */
-                gen_bx(s, tmp);
                 break;
             }
             break;
@@ -11752,6 +11827,7 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
     dc->vec_len = ARM_TBFLAG_VECLEN(tb->flags);
     dc->vec_stride = ARM_TBFLAG_VECSTRIDE(tb->flags);
     dc->c15_cpar = ARM_TBFLAG_XSCALE_CPAR(tb->flags);
+    dc->v7m_handler_mode = ARM_TBFLAG_HANDLER(tb->flags);
     dc->cp_regs = cpu->cp_regs;
     dc->features = env->features;
 
@@ -11851,14 +11927,6 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
             dc->is_jmp = DISAS_EXC;
             break;
         }
-#else
-        if (arm_dc_feature(dc, ARM_FEATURE_M)) {
-            /* Branches to the magic exception-return addresses should
-             * already have been caught via the arm_v7m_unassigned_access hook,
-             * and never get here.
-             */
-            assert(dc->pc < 0xfffffff0);
-        }
 #endif
 
         if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
@@ -11953,9 +12021,8 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
             ((dc->pc >= next_page_start - 3) && insn_crosses_page(env, dc));
 
     } while (!dc->is_jmp && !tcg_op_buf_full() &&
-             !cs->singlestep_enabled &&
+             !is_singlestepping(dc) &&
              !singlestep &&
-             !dc->ss_active &&
              !end_of_page &&
              num_insns < max_insns);
 
@@ -11971,9 +12038,16 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
     /* At this stage dc->condjmp will only be set when the skipped
        instruction was a conditional branch or trap, and the PC has
        already been written.  */
-    if (unlikely(cs->singlestep_enabled || dc->ss_active)) {
+    gen_set_condexec(dc);
+    if (dc->is_jmp == DISAS_BX_EXCRET) {
+        /* Exception return branches need some special case code at the
+         * end of the TB, which is complex enough that it has to
+         * handle the single-step vs not and the condition-failed
+         * insn codepath itself.
+         */
+        gen_bx_excret_final_code(dc);
+    } else if (unlikely(is_singlestepping(dc))) {
         /* Unconditional and "condition passed" instruction codepath. */
-        gen_set_condexec(dc);
         switch (dc->is_jmp) {
         case DISAS_SWI:
             gen_ss_advance(dc);
@@ -11993,24 +12067,8 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
             gen_set_pc_im(dc, dc->pc);
             /* fall through */
         default:
-            if (dc->ss_active) {
-                gen_step_complete_exception(dc);
-            } else {
-                /* FIXME: Single stepping a WFI insn will not halt
-                   the CPU.  */
-                gen_exception_internal(EXCP_DEBUG);
-            }
-        }
-        if (dc->condjmp) {
-            /* "Condition failed" instruction codepath. */
-            gen_set_label(dc->condlabel);
-            gen_set_condexec(dc);
-            gen_set_pc_im(dc, dc->pc);
-            if (dc->ss_active) {
-                gen_step_complete_exception(dc);
-            } else {
-                gen_exception_internal(EXCP_DEBUG);
-            }
+            /* FIXME: Single stepping a WFI insn will not halt the CPU. */
+            gen_singlestep_exception(dc);
         }
     } else {
         /* While branches must always occur at the end of an IT block,
@@ -12021,7 +12079,6 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
             - Hardware watchpoints.
            Hardware breakpoints have already been handled and skip this code.
          */
-        gen_set_condexec(dc);
         switch(dc->is_jmp) {
         case DISAS_NEXT:
             gen_goto_tb(dc, 1, dc->pc);
@@ -12061,11 +12118,17 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
             gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
             break;
         }
-        if (dc->condjmp) {
-            gen_set_label(dc->condlabel);
-            gen_set_condexec(dc);
+    }
+
+    if (dc->condjmp) {
+        /* "Condition failed" instruction codepath for the branch/trap insn */
+        gen_set_label(dc->condlabel);
+        gen_set_condexec(dc);
+        if (unlikely(is_singlestepping(dc))) {
+            gen_set_pc_im(dc, dc->pc);
+            gen_singlestep_exception(dc);
+        } else {
             gen_goto_tb(dc, 1, dc->pc);
-            dc->condjmp = 0;
         }
     }
 
diff --git a/target/arm/translate.h b/target/arm/translate.h
index abb0760158..629dab945e 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -31,6 +31,7 @@ typedef struct DisasContext {
     bool vfp_enabled; /* FP enabled via FPSCR.EN */
     int vec_len;
     int vec_stride;
+    bool v7m_handler_mode;
     /* Immediate value in AArch32 SVC insn; must be set if is_jmp == DISAS_SWI
      * so that top level loop can generate correct syndrome information.
      */
@@ -134,6 +135,10 @@ static void disas_set_insn_syndrome(DisasContext *s, uint32_t syn)
 #define DISAS_HVC 8
 #define DISAS_SMC 9
 #define DISAS_YIELD 10
+/* M profile branch which might be an exception return (and so needs
+ * custom end-of-TB code)
+ */
+#define DISAS_BX_EXCRET 11
 
 #ifdef TARGET_AARCH64
 void a64_translate_init(void);
diff --git a/target/i386/arch_dump.c b/target/i386/arch_dump.c
index 5a2e4be5d0..fe0aa36932 100644
--- a/target/i386/arch_dump.c
+++ b/target/i386/arch_dump.c
@@ -391,8 +391,7 @@ int cpu_get_dump_info(ArchDumpInfo *info,
 
 #ifdef TARGET_X86_64
     X86CPU *first_x86_cpu = X86_CPU(first_cpu);
-
-    lma = !!(first_x86_cpu->env.hflags & HF_LMA_MASK);
+    lma = first_cpu && (first_x86_cpu->env.hflags & HF_LMA_MASK);
 #endif
 
     if (lma) {
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 7aa762245a..13c0985f11 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3373,15 +3373,19 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
     GList *l;
     Error *local_err = NULL;
 
-    /*TODO: cpu->max_features incorrectly overwrites features
-     * set using "feat=on|off". Once we fix this, we can convert
+    /*TODO: Now cpu->max_features doesn't overwrite features
+     * set using QOM properties, and we can convert
      * plus_features & minus_features to global properties
      * inside x86_cpu_parse_featurestr() too.
      */
     if (cpu->max_features) {
         for (w = 0; w < FEATURE_WORDS; w++) {
-            env->features[w] =
-                x86_cpu_get_supported_feature_word(w, cpu->migratable);
+            /* Override only features that weren't set explicitly
+             * by the user.
+             */
+            env->features[w] |=
+                x86_cpu_get_supported_feature_word(w, cpu->migratable) &
+                ~env->user_features[w];
         }
     }
 
@@ -3692,15 +3696,17 @@ static void x86_cpu_unrealizefn(DeviceState *dev, Error **errp)
 }
 
 typedef struct BitProperty {
-    uint32_t *ptr;
+    FeatureWord w;
     uint32_t mask;
 } BitProperty;
 
 static void x86_cpu_get_bit_prop(Object *obj, Visitor *v, const char *name,
                                  void *opaque, Error **errp)
 {
+    X86CPU *cpu = X86_CPU(obj);
     BitProperty *fp = opaque;
-    bool value = (*fp->ptr & fp->mask) == fp->mask;
+    uint32_t f = cpu->env.features[fp->w];
+    bool value = (f & fp->mask) == fp->mask;
     visit_type_bool(v, name, &value, errp);
 }
 
@@ -3708,6 +3714,7 @@ static void x86_cpu_set_bit_prop(Object *obj, Visitor *v, const char *name,
                                  void *opaque, Error **errp)
 {
     DeviceState *dev = DEVICE(obj);
+    X86CPU *cpu = X86_CPU(obj);
     BitProperty *fp = opaque;
     Error *local_err = NULL;
     bool value;
@@ -3724,10 +3731,11 @@ static void x86_cpu_set_bit_prop(Object *obj, Visitor *v, const char *name,
     }
 
     if (value) {
-        *fp->ptr |= fp->mask;
+        cpu->env.features[fp->w] |= fp->mask;
     } else {
-        *fp->ptr &= ~fp->mask;
+        cpu->env.features[fp->w] &= ~fp->mask;
     }
+    cpu->env.user_features[fp->w] |= fp->mask;
 }
 
 static void x86_cpu_release_bit_prop(Object *obj, const char *name,
@@ -3745,7 +3753,7 @@ static void x86_cpu_release_bit_prop(Object *obj, const char *name,
  */
 static void x86_cpu_register_bit_prop(X86CPU *cpu,
                                       const char *prop_name,
-                                      uint32_t *field,
+                                      FeatureWord w,
                                       int bitnr)
 {
     BitProperty *fp;
@@ -3755,11 +3763,11 @@ static void x86_cpu_register_bit_prop(X86CPU *cpu,
     op = object_property_find(OBJECT(cpu), prop_name, NULL);
     if (op) {
         fp = op->opaque;
-        assert(fp->ptr == field);
+        assert(fp->w == w);
         fp->mask |= mask;
     } else {
         fp = g_new0(BitProperty, 1);
-        fp->ptr = field;
+        fp->w = w;
         fp->mask = mask;
         object_property_add(OBJECT(cpu), prop_name, "bool",
                             x86_cpu_get_bit_prop,
@@ -3787,7 +3795,7 @@ static void x86_cpu_register_feature_bit_props(X86CPU *cpu,
     /* aliases don't use "|" delimiters anymore, they are registered
      * manually using object_property_add_alias() */
     assert(!strchr(name, '|'));
-    x86_cpu_register_bit_prop(cpu, name, &cpu->env.features[w], bitnr);
+    x86_cpu_register_bit_prop(cpu, name, w, bitnr);
 }
 
 static GuestPanicInformation *x86_cpu_get_crash_info(CPUState *cs)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 07401ad9fe..c4602ca80d 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1147,6 +1147,8 @@ typedef struct CPUX86State {
     uint32_t cpuid_vendor3;
     uint32_t cpuid_version;
     FeatureWordArray features;
+    /* Features that were explicitly enabled/disabled */
+    FeatureWordArray user_features;
     uint32_t cpuid_model[12];
 
     /* MTRRs */
diff --git a/target/i386/helper.c b/target/i386/helper.c
index e2af3404f2..f11cac63a1 100644
--- a/target/i386/helper.c
+++ b/target/i386/helper.c
@@ -326,6 +326,10 @@ void x86_cpu_dump_local_apic_state(CPUState *cs, FILE *f,
 {
     X86CPU *cpu = X86_CPU(cs);
     APICCommonState *s = APIC_COMMON(cpu->apic_state);
+    if (!s) {
+        cpu_fprintf(f, "local apic state not available\n");
+        return;
+    }
     uint32_t *lvt = s->lvt;
 
     cpu_fprintf(f, "dumping local APIC state for CPU %-2u\n\n",
diff --git a/target/i386/misc_helper.c b/target/i386/misc_helper.c
index ca2ea09f54..628f64aad5 100644
--- a/target/i386/misc_helper.c
+++ b/target/i386/misc_helper.c
@@ -18,6 +18,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/main-loop.h"
 #include "cpu.h"
 #include "exec/helper-proto.h"
 #include "exec/exec-all.h"
@@ -156,7 +157,9 @@ void helper_write_crN(CPUX86State *env, int reg, target_ulong t0)
         break;
     case 8:
         if (!(env->hflags2 & HF2_VINTR_MASK)) {
+            qemu_mutex_lock_iothread();
             cpu_set_apic_tpr(x86_env_get_cpu(env)->apic_state, t0);
+            qemu_mutex_unlock_iothread();
         }
         env->v_tpr = t0 & 0x0f;
         break;
diff --git a/target/i386/translate.c b/target/i386/translate.c
index 72c1b03a2a..1d1372fb43 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4418,6 +4418,13 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     s->vex_l = 0;
     s->vex_v = 0;
  next_byte:
+    /* x86 has an upper limit of 15 bytes for an instruction. Since we
+     * do not want to decode and generate IR for an illegal
+     * instruction, the following check limits the instruction size to
+     * 25 bytes: 14 prefix + 1 opc + 6 (modrm+sib+ofs) + 4 imm */
+    if (s->pc - pc_start > 14) {
+        goto illegal_op;
+    }
     b = cpu_ldub_code(env, s->pc);
     s->pc++;
     /* Collect prefixes.  */
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 9f1f132cef..64017acfad 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -2245,14 +2245,8 @@ static void alter_insns(uint64_t *word, uint64_t flags, bool on)
     }
 }
 
-static void kvmppc_host_cpu_initfn(Object *obj)
-{
-    assert(kvm_enabled());
-}
-
 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
 {
-    DeviceClass *dc = DEVICE_CLASS(oc);
     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
     uint32_t vmx = kvmppc_get_vmx();
     uint32_t dfp = kvmppc_get_dfp();
@@ -2279,9 +2273,6 @@ static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
     if (icache_size != -1) {
         pcc->l1_icache_size = icache_size;
     }
-
-    /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
-    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 bool kvmppc_has_cap_epr(void)
@@ -2333,7 +2324,6 @@ static int kvm_ppc_register_host_cpu_type(void)
 {
     TypeInfo type_info = {
         .name = TYPE_HOST_POWERPC_CPU,
-        .instance_init = kvmppc_host_cpu_initfn,
         .class_init = kvmppc_host_cpu_class_init,
     };
     PowerPCCPUClass *pvr_pcc;
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index 4ea3a2de80..ce461cc905 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -376,12 +376,12 @@ static void cpu_model_from_info(S390CPUModel *model, const CpuModelInfo *info,
 
 static void qdict_add_disabled_feat(const char *name, void *opaque)
 {
-    qdict_put((QDict *) opaque, name, qbool_from_bool(false));
+    qdict_put(opaque, name, qbool_from_bool(false));
 }
 
 static void qdict_add_enabled_feat(const char *name, void *opaque)
 {
-    qdict_put((QDict *) opaque, name, qbool_from_bool(true));
+    qdict_put(opaque, name, qbool_from_bool(true));
 }
 
 /* convert S390CPUDef into a static CpuModelInfo */
@@ -660,7 +660,6 @@ static void check_compatibility(const S390CPUModel *max_model,
 
 static S390CPUModel *get_max_cpu_model(Error **errp)
 {
-#ifndef CONFIG_USER_ONLY
     static S390CPUModel max_model;
     static bool cached;
 
@@ -680,7 +679,6 @@ static S390CPUModel *get_max_cpu_model(Error **errp)
         cached = true;
         return &max_model;
     }
-#endif
     return NULL;
 }
 
diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index ac47154b83..1a249d8359 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -47,16 +47,16 @@
 #include "exec/memattrs.h"
 #include "hw/s390x/s390-virtio-ccw.h"
 
-/* #define DEBUG_KVM */
-
-#ifdef DEBUG_KVM
-#define DPRINTF(fmt, ...) \
-    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTF(fmt, ...) \
-    do { } while (0)
+#ifndef DEBUG_KVM
+#define DEBUG_KVM  0
 #endif
 
+#define DPRINTF(fmt, ...) do {                \
+    if (DEBUG_KVM) {                          \
+        fprintf(stderr, fmt, ## __VA_ARGS__); \
+    }                                         \
+} while (0);
+
 #define kvm_vm_check_mem_attr(s, attr) \
     kvm_vm_check_attr(s, KVM_S390_VM_MEM_CTRL, attr)
 
diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c
index 93b0e61366..eca82441d0 100644
--- a/target/s390x/misc_helper.c
+++ b/target/s390x/misc_helper.c
@@ -288,7 +288,9 @@ void HELPER(diag)(CPUS390XState *env, uint32_t r1, uint32_t r3, uint32_t num)
     switch (num) {
     case 0x500:
         /* KVM hypercall */
+        qemu_mutex_lock_iothread();
         r = s390_virtio_hypercall(env);
+        qemu_mutex_unlock_iothread();
         break;
     case 0x44:
         /* yield */
@@ -515,7 +517,8 @@ uint32_t HELPER(sigp)(CPUS390XState *env, uint64_t order_code, uint32_t r1,
     /* Remember: Use "R1 or R1 + 1, whichever is the odd-numbered register"
        as parameter (input). Status (output) is always R1. */
 
-    switch (order_code) {
+    /* sigp contains the order code in bit positions 56-63, mask it here. */
+    switch (order_code & 0xff) {
     case SIGP_SET_ARCH:
         /* switch arch */
         break;
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 09a19c6f35..75ea247bc4 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -130,14 +130,7 @@ enum {
 
 static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
 {
-#if QEMU_GNUC_PREREQ(4, 1)
     __builtin___clear_cache((char *) start, (char *) stop);
-#else
-    register uintptr_t _beg __asm("a1") = start;
-    register uintptr_t _end __asm("a2") = stop;
-    register uintptr_t _flg __asm("a3") = 0;
-    __asm __volatile__ ("swi 0x9f0002" : : "r" (_beg), "r" (_end), "r" (_flg));
-#endif
 }
 
 #endif
diff --git a/tcg/sparc/tcg-target.inc.c b/tcg/sparc/tcg-target.inc.c
index d1f4c0dead..3785d77f62 100644
--- a/tcg/sparc/tcg-target.inc.c
+++ b/tcg/sparc/tcg-target.inc.c
@@ -843,6 +843,29 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
 static tcg_insn_unit *qemu_ld_trampoline[16];
 static tcg_insn_unit *qemu_st_trampoline[16];
 
+static void emit_extend(TCGContext *s, TCGReg r, int op)
+{
+    /* Emit zero extend of 8, 16 or 32 bit data as
+     * required by the MO_* value op; do nothing for 64 bit.
+     */
+    switch (op & MO_SIZE) {
+    case MO_8:
+        tcg_out_arithi(s, r, r, 0xff, ARITH_AND);
+        break;
+    case MO_16:
+        tcg_out_arithi(s, r, r, 16, SHIFT_SLL);
+        tcg_out_arithi(s, r, r, 16, SHIFT_SRL);
+        break;
+    case MO_32:
+        if (SPARC64) {
+            tcg_out_arith(s, r, r, 0, SHIFT_SRL);
+        }
+        break;
+    case MO_64:
+        break;
+    }
+}
+
 static void build_trampolines(TCGContext *s)
 {
     static void * const qemu_ld_helpers[16] = {
@@ -910,6 +933,7 @@ static void build_trampolines(TCGContext *s)
         qemu_st_trampoline[i] = s->code_ptr;
 
         if (SPARC64) {
+            emit_extend(s, TCG_REG_O2, i);
             ra = TCG_REG_O4;
         } else {
             ra = TCG_REG_O1;
@@ -925,6 +949,7 @@ static void build_trampolines(TCGContext *s)
                 tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX);
                 ra += 2;
             } else {
+                emit_extend(s, ra, i);
                 ra += 1;
             }
             /* Skip the oi argument.  */
@@ -1119,7 +1144,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
         /* Skip the high-part; we'll perform the extract in the trampoline.  */
         param++;
     }
-    tcg_out_mov(s, TCG_TYPE_REG, param++, addr);
+    tcg_out_mov(s, TCG_TYPE_REG, param++, addrz);
 
     /* We use the helpers to extend SB and SW data, leaving the case
        of SL needing explicit extending below.  */
@@ -1199,7 +1224,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
         /* Skip the high-part; we'll perform the extract in the trampoline.  */
         param++;
     }
-    tcg_out_mov(s, TCG_TYPE_REG, param++, addr);
+    tcg_out_mov(s, TCG_TYPE_REG, param++, addrz);
     if (!SPARC64 && (memop & MO_SIZE) == MO_64) {
         /* Skip the high-part; we'll perform the extract in the trampoline.  */
         param++;
diff --git a/tests/Makefile.include b/tests/Makefile.include
index 402e71cf06..579ec07cce 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -379,6 +379,7 @@ qapi-schema += doc-duplicated-since.json
 qapi-schema += doc-empty-arg.json
 qapi-schema += doc-empty-section.json
 qapi-schema += doc-empty-symbol.json
+qapi-schema += doc-good.json
 qapi-schema += doc-interleaved-section.json
 qapi-schema += doc-invalid-end.json
 qapi-schema += doc-invalid-end2.json
@@ -518,7 +519,7 @@ QEMU_CFLAGS += -I$(SRC_PATH)/tests
 
 
 # Deps that are common to various different sets of tests below
-test-util-obj-y = $(trace-obj-y) libqemuutil.a libqemustub.a
+test-util-obj-y = libqemuutil.a libqemustub.a
 test-qom-obj-y = $(qom-obj-y) $(test-util-obj-y)
 test-qapi-obj-y = tests/test-qapi-visit.o tests/test-qapi-types.o \
 	tests/test-qapi-event.o tests/test-qmp-introspect.o \
@@ -607,6 +608,9 @@ $(SRC_PATH)/tests/qapi-schema/qapi-schema-test.json $(SRC_PATH)/scripts/qapi-int
 		$(gen-out-type) -o tests -p "test-" $<, \
 		"GEN","$@")
 
+tests/qapi-schema/doc-good.test.texi: $(SRC_PATH)/tests/qapi-schema/doc-good.json $(SRC_PATH)/scripts/qapi2texi.py $(qapi-py)
+	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi2texi.py $< > $@,"GEN","$@")
+
 tests/test-string-output-visitor$(EXESUF): tests/test-string-output-visitor.o $(test-qapi-obj-y)
 tests/test-string-input-visitor$(EXESUF): tests/test-string-input-visitor.o $(test-qapi-obj-y)
 tests/test-qmp-event$(EXESUF): tests/test-qmp-event.o $(test-qapi-obj-y)
@@ -736,7 +740,7 @@ tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o $(test-util-obj-y) \
 	$(chardev-obj-y)
 tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o
 tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o $(test-util-obj-y)
-tests/test-keyval$(EXESUF): tests/test-keyval.o $(test-util-obj-y)
+tests/test-keyval$(EXESUF): tests/test-keyval.o $(test-util-obj-y) $(test-qapi-obj-y)
 tests/test-write-threshold$(EXESUF): tests/test-write-threshold.o $(test-block-obj-y)
 tests/test-netfilter$(EXESUF): tests/test-netfilter.o $(qtest-obj-y)
 tests/test-filter-mirror$(EXESUF): tests/test-filter-mirror.o $(qtest-obj-y)
@@ -856,9 +860,6 @@ QEMU_IOTESTS_HELPERS-$(CONFIG_LINUX) = tests/qemu-iotests/socket_scm_helper$(EXE
 check-tests/qemu-iotests-quick.sh: tests/qemu-iotests-quick.sh qemu-img$(EXESUF) qemu-io$(EXESUF) $(QEMU_IOTESTS_HELPERS-y)
 	$<
 
-.PHONY: check-tests/test-qapi.py
-check-tests/test-qapi.py: tests/test-qapi.py
-
 .PHONY: $(patsubst %, check-%, $(check-qapi-schema-y))
 $(patsubst %, check-%, $(check-qapi-schema-y)): check-%.json: $(SRC_PATH)/%.json
 	$(call quiet-command, PYTHONPATH=$(SRC_PATH)/scripts \
@@ -871,10 +872,14 @@ $(patsubst %, check-%, $(check-qapi-schema-y)): check-%.json: $(SRC_PATH)/%.json
 	@perl -p -e 's|\Q$(SRC_PATH)\E/||g' $*.test.err | diff -q $(SRC_PATH)/$*.err -
 	@diff -q $(SRC_PATH)/$*.exit $*.test.exit
 
+.PHONY: check-tests/qapi-schema/doc-good.texi
+check-tests/qapi-schema/doc-good.texi: tests/qapi-schema/doc-good.test.texi
+	@diff -q $(SRC_PATH)/tests/qapi-schema/doc-good.texi $<
+
 # Consolidated targets
 
 .PHONY: check-qapi-schema check-qtest check-unit check check-clean
-check-qapi-schema: $(patsubst %,check-%, $(check-qapi-schema-y))
+check-qapi-schema: $(patsubst %,check-%, $(check-qapi-schema-y)) check-tests/qapi-schema/doc-good.texi
 check-qtest: $(patsubst %,check-qtest-%, $(QTEST_TARGETS))
 check-unit: $(patsubst %,check-%, $(check-unit-y))
 check-block: $(patsubst %,check-%, $(check-block-y))
diff --git a/tests/acpi-utils.h b/tests/acpi-utils.h
index 9f9a2d532c..348e4d7931 100644
--- a/tests/acpi-utils.h
+++ b/tests/acpi-utils.h
@@ -26,7 +26,7 @@ typedef struct {
     gsize asl_len;
     gchar *asl_file;
     bool tmp_files_retain;   /* do not delete the temp asl/aml */
-} QEMU_PACKED AcpiSdtTable;
+} AcpiSdtTable;
 
 #define ACPI_READ_FIELD(field, addr)           \
     do {                                       \
diff --git a/tests/boot-serial-test.c b/tests/boot-serial-test.c
index 57edf6af33..11f48b049c 100644
--- a/tests/boot-serial-test.c
+++ b/tests/boot-serial-test.c
@@ -79,8 +79,8 @@ static void test_machine(const void *data)
     g_assert(fd != -1);
 
     args = g_strdup_printf("-M %s,accel=tcg -chardev file,id=serial0,path=%s"
-                           " -serial chardev:serial0 %s", test->machine,
-                           tmpname, test->extra);
+                           " -no-shutdown -serial chardev:serial0 %s",
+                           test->machine, tmpname, test->extra);
 
     qtest_start(args);
     unlink(tmpname);
diff --git a/tests/libqtest.c b/tests/libqtest.c
index a5c3d2bf48..99b1195355 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -167,6 +167,14 @@ QTestState *qtest_init_without_qmp_handshake(const char *extra_args)
     socket_path = g_strdup_printf("/tmp/qtest-%d.sock", getpid());
     qmp_socket_path = g_strdup_printf("/tmp/qtest-%d.qmp", getpid());
 
+    /* It's possible that if an earlier test run crashed it might
+     * have left a stale unix socket lying around. Delete any
+     * stale old socket to avoid spurious test failures with
+     * tests/libqtest.c:70:init_socket: assertion failed (ret != -1): (-1 != -1)
+     */
+    unlink(socket_path);
+    unlink(qmp_socket_path);
+
     sock = init_socket(socket_path);
     qmpsock = init_socket(qmp_socket_path);
 
diff --git a/tests/qapi-schema/doc-good.err b/tests/qapi-schema/doc-good.err
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/tests/qapi-schema/doc-good.err
diff --git a/tests/qapi-schema/doc-good.exit b/tests/qapi-schema/doc-good.exit
new file mode 100644
index 0000000000..573541ac97
--- /dev/null
+++ b/tests/qapi-schema/doc-good.exit
@@ -0,0 +1 @@
+0
diff --git a/tests/qapi-schema/doc-good.json b/tests/qapi-schema/doc-good.json
new file mode 100644
index 0000000000..cfdc0a8a81
--- /dev/null
+++ b/tests/qapi-schema/doc-good.json
@@ -0,0 +1,136 @@
+# -*- Mode: Python -*-
+# Positive QAPI doc comment tests
+
+{ 'pragma': { 'doc-required': true } }
+
+##
+# = Section
+#
+# == Subsection
+#
+# *strong* _with emphasis_
+# @var {in braces}
+# * List item one
+# - Two, multiple
+#   lines
+#
+# 3. Three
+# Still in list
+#
+#   Not in list
+# - Second list
+# Note: still in list
+#
+# Note: not in list
+# 1. Third list
+#    is numbered
+#
+# - another item
+#
+# | example
+# | multiple lines
+#
+# Returns: the King
+# Since: the first age
+# Notes:
+#
+# 1. Lorem ipsum dolor sit amet
+#
+# 2. Ut enim ad minim veniam
+#
+# Duis aute irure dolor
+#
+# Example:
+#
+# -> in
+# <- out
+# Examples:
+# - *verbatim*
+# - {braces}
+##
+
+##
+# @Enum:
+# == Produces *invalid* texinfo
+# @one: The _one_ {and only}
+#
+# @two is undocumented
+##
+{ 'enum': 'Enum', 'data': [ 'one', 'two' ] }
+
+##
+# @Base:
+# @base1:
+#   the first member
+##
+{ 'struct': 'Base', 'data': { 'base1': 'Enum' } }
+
+##
+# @Variant1:
+# A paragraph
+#
+# Another paragraph (but no @var: line)
+##
+{ 'struct': 'Variant1', 'data': { 'var1': 'str' } }
+
+##
+# @Variant2:
+##
+{ 'struct': 'Variant2', 'data': {} }
+
+##
+# @Object:
+##
+{ 'union': 'Object',
+  'base': 'Base',
+  'discriminator': 'base1',
+  'data': { 'one': 'Variant1', 'two': 'Variant2' } }
+
+##
+# @SugaredUnion:
+##
+{ 'union': 'SugaredUnion',
+  'data': { 'one': 'Variant1', 'two': 'Variant2' } }
+
+##
+# == Another subsection
+##
+
+##
+# @cmd:
+# @arg1: the first argument
+#
+# @arg2: the second
+# argument
+# Note: @arg3 is undocumented
+# Returns: @Object
+# TODO: frobnicate
+# Notes:
+# - Lorem ipsum dolor sit amet
+# - Ut enim ad minim veniam
+#
+# Duis aute irure dolor
+# Example:
+#
+# -> in
+# <- out
+# Examples:
+# - *verbatim*
+# - {braces}
+# Since: 2.10
+##
+{ 'command': 'cmd',
+  'data': { 'arg1': 'int', '*arg2': 'str', 'arg3': 'bool' },
+  'returns': 'Object' }
+
+##
+# @cmd-boxed:
+# If you're bored enough to read this, go see a video of boxed cats
+# Example:
+#
+# -> in
+#
+# <- out
+##
+{ 'command': 'cmd-boxed', 'boxed': true,
+  'data': 'Object' }
diff --git a/tests/qapi-schema/doc-good.out b/tests/qapi-schema/doc-good.out
new file mode 100644
index 0000000000..70c1252408
--- /dev/null
+++ b/tests/qapi-schema/doc-good.out
@@ -0,0 +1,148 @@
+object Base
+    member base1: Enum optional=False
+enum Enum ['one', 'two']
+object Object
+    base Base
+    tag base1
+    case one: Variant1
+    case two: Variant2
+enum QType ['none', 'qnull', 'qint', 'qstring', 'qdict', 'qlist', 'qfloat', 'qbool']
+    prefix QTYPE
+object SugaredUnion
+    member type: SugaredUnionKind optional=False
+    tag type
+    case one: q_obj_Variant1-wrapper
+    case two: q_obj_Variant2-wrapper
+enum SugaredUnionKind ['one', 'two']
+object Variant1
+    member var1: str optional=False
+object Variant2
+command cmd q_obj_cmd-arg -> Object
+   gen=True success_response=True boxed=False
+command cmd-boxed Object -> None
+   gen=True success_response=True boxed=True
+object q_empty
+object q_obj_Variant1-wrapper
+    member data: Variant1 optional=False
+object q_obj_Variant2-wrapper
+    member data: Variant2 optional=False
+object q_obj_cmd-arg
+    member arg1: int optional=False
+    member arg2: str optional=True
+    member arg3: bool optional=False
+doc freeform
+    body=
+= Section
+
+== Subsection
+
+*strong* _with emphasis_
+@var {in braces}
+* List item one
+- Two, multiple
+lines
+
+3. Three
+Still in list
+
+Not in list
+- Second list
+Note: still in list
+
+Note: not in list
+1. Third list
+is numbered
+
+- another item
+
+| example
+| multiple lines
+
+Returns: the King
+Since: the first age
+Notes:
+
+1. Lorem ipsum dolor sit amet
+
+2. Ut enim ad minim veniam
+
+Duis aute irure dolor
+
+Example:
+
+-> in
+<- out
+Examples:
+- *verbatim*
+- {braces}
+doc symbol=Enum
+    body=
+== Produces *invalid* texinfo
+    arg=one
+The _one_ {and only}
+    arg=two
+
+    section=
+@two is undocumented
+doc symbol=Base
+    body=
+
+    arg=base1
+the first member
+doc symbol=Variant1
+    body=
+A paragraph
+
+Another paragraph (but no @var: line)
+    arg=var1
+
+doc symbol=Variant2
+    body=
+
+doc symbol=Object
+    body=
+
+doc symbol=SugaredUnion
+    body=
+
+    arg=type
+
+doc freeform
+    body=
+== Another subsection
+doc symbol=cmd
+    body=
+
+    arg=arg1
+the first argument
+    arg=arg2
+the second
+argument
+    arg=arg3
+
+    section=Note
+@arg3 is undocumented
+    section=Returns
+@Object
+    section=TODO
+frobnicate
+    section=Notes
+- Lorem ipsum dolor sit amet
+- Ut enim ad minim veniam
+
+Duis aute irure dolor
+    section=Example
+-> in
+<- out
+    section=Examples
+- *verbatim*
+- {braces}
+    section=Since
+2.10
+doc symbol=cmd-boxed
+    body=
+If you're bored enough to read this, go see a video of boxed cats
+    section=Example
+-> in
+
+<- out
diff --git a/tests/qapi-schema/doc-good.texi b/tests/qapi-schema/doc-good.texi
new file mode 100644
index 0000000000..c410626e4a
--- /dev/null
+++ b/tests/qapi-schema/doc-good.texi
@@ -0,0 +1,243 @@
+@section Section
+
+@subsection Subsection
+
+@strong{strong} @emph{with emphasis}
+@code{var} @{in braces@}
+@itemize @bullet
+@item
+List item one
+@item
+Two, multiple
+lines
+
+@item
+Three
+Still in list
+
+@end itemize
+
+Not in list
+@itemize @minus
+@item
+Second list
+Note: still in list
+
+@end itemize
+
+Note: not in list
+@enumerate
+@item
+Third list
+is numbered
+
+@item
+another item
+
+@example
+example
+@end example
+
+@example
+multiple lines
+@end example
+
+
+@end enumerate
+
+Returns: the King
+Since: the first age
+Notes:
+
+@enumerate
+@item
+Lorem ipsum dolor sit amet
+
+@item
+Ut enim ad minim veniam
+
+@end enumerate
+
+Duis aute irure dolor
+
+Example:
+
+-> in
+<- out
+Examples:
+@itemize @minus
+@item
+@strong{verbatim}
+@item
+@{braces@}
+@end itemize
+
+
+
+@deftp {Enum} Enum
+
+@subsection Produces @strong{invalid} texinfo
+
+@b{Values:}
+@table @asis
+@item @code{one}
+The @emph{one} @{and only@}
+@item @code{two}
+Not documented
+@end table
+@code{two} is undocumented
+
+@end deftp
+
+
+
+@deftp {Object} Base
+
+
+
+@b{Members:}
+@table @asis
+@item @code{base1: Enum}
+the first member
+@end table
+
+
+@end deftp
+
+
+
+@deftp {Object} Variant1
+
+A paragraph
+
+Another paragraph (but no @code{var}: line)
+
+@b{Members:}
+@table @asis
+@item @code{var1: string}
+Not documented
+@end table
+
+
+@end deftp
+
+
+
+@deftp {Object} Variant2
+
+
+
+
+@end deftp
+
+
+
+@deftp {Object} Object
+
+
+
+@b{Members:}
+@table @asis
+@item The members of @code{Base}
+@item The members of @code{Variant1} when @code{base1} is @t{"one"}
+@item The members of @code{Variant2} when @code{base1} is @t{"two"}
+@end table
+
+
+@end deftp
+
+
+
+@deftp {Object} SugaredUnion
+
+
+
+@b{Members:}
+@table @asis
+@item @code{type}
+One of @t{"one"}, @t{"two"}
+@item @code{data: Variant1} when @code{type} is @t{"one"}
+@item @code{data: Variant2} when @code{type} is @t{"two"}
+@end table
+
+
+@end deftp
+
+
+@subsection Another subsection
+
+
+@deftypefn Command {} cmd
+
+
+
+@b{Arguments:}
+@table @asis
+@item @code{arg1: int}
+the first argument
+@item @code{arg2: string} (optional)
+the second
+argument
+@item @code{arg3: boolean}
+Not documented
+@end table
+
+
+@b{Note:}
+@code{arg3} is undocumented
+
+@b{Returns:}
+@code{Object}
+
+@b{TODO:}
+frobnicate
+
+@b{Notes:}
+@itemize @minus
+@item
+Lorem ipsum dolor sit amet
+@item
+Ut enim ad minim veniam
+
+@end itemize
+
+Duis aute irure dolor
+
+@b{Example:}
+@example
+-> in
+<- out
+@end example
+
+
+@b{Examples:}
+@example
+- *verbatim*
+- @{braces@}
+@end example
+
+
+@b{Since:}
+2.10
+
+@end deftypefn
+
+
+
+@deftypefn Command {} cmd-boxed
+
+If you're bored enough to read this, go see a video of boxed cats
+
+@b{Arguments:} the members of @code{Object}
+
+@b{Example:}
+@example
+-> in
+
+<- out
+@end example
+
+
+@end deftypefn
+
+
diff --git a/tests/qapi-schema/test-qapi.py b/tests/qapi-schema/test-qapi.py
index ef74e2c4c8..c7724d3437 100644
--- a/tests/qapi-schema/test-qapi.py
+++ b/tests/qapi-schema/test-qapi.py
@@ -55,3 +55,14 @@ class QAPISchemaTestVisitor(QAPISchemaVisitor):
 
 schema = QAPISchema(sys.argv[1])
 schema.visit(QAPISchemaTestVisitor())
+
+for doc in schema.docs:
+    if doc.symbol:
+        print 'doc symbol=%s' % doc.symbol
+    else:
+        print 'doc freeform'
+    print '    body=\n%s' % doc.body
+    for arg, section in doc.args.iteritems():
+        print '    arg=%s\n%s' % (arg, section)
+    for section in doc.sections:
+        print '    section=%s\n%s' % (section.name, section)
diff --git a/tests/qemu-iotests/017 b/tests/qemu-iotests/017
index e3f9e75967..4f9302db42 100755
--- a/tests/qemu-iotests/017
+++ b/tests/qemu-iotests/017
@@ -41,6 +41,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 # Any format supporting backing files
 _supported_fmt qcow qcow2 vmdk qed
 _supported_proto generic
+_unsupported_proto vxhs
 _supported_os Linux
 _unsupported_imgopts "subformat=monolithicFlat" "subformat=twoGbMaxExtentFlat"
 
diff --git a/tests/qemu-iotests/020 b/tests/qemu-iotests/020
index 9c4a68c977..7a111100ec 100755
--- a/tests/qemu-iotests/020
+++ b/tests/qemu-iotests/020
@@ -43,6 +43,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 # Any format supporting backing files
 _supported_fmt qcow qcow2 vmdk qed
 _supported_proto generic
+_unsupported_proto vxhs
 _supported_os Linux
 _unsupported_imgopts "subformat=monolithicFlat" \
                      "subformat=twoGbMaxExtentFlat" \
diff --git a/tests/qemu-iotests/028 b/tests/qemu-iotests/028
index 7783e57c71..97a8869251 100755
--- a/tests/qemu-iotests/028
+++ b/tests/qemu-iotests/028
@@ -32,6 +32,7 @@ status=1	# failure is the default!
 
 _cleanup()
 {
+    _cleanup_qemu
     rm -f "${TEST_IMG}.copy"
     _cleanup_test_img
 }
diff --git a/tests/qemu-iotests/029 b/tests/qemu-iotests/029
index e639ac0ddf..30bab24dc0 100755
--- a/tests/qemu-iotests/029
+++ b/tests/qemu-iotests/029
@@ -42,6 +42,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 # Any format supporting intenal snapshots
 _supported_fmt qcow2
 _supported_proto generic
+_unsupported_proto vxhs
 _supported_os Linux
 # Internal snapshots are (currently) impossible with refcount_bits=1
 _unsupported_imgopts 'refcount_bits=1[^0-9]'
diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index bc6cf782fe..2f54986434 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -966,5 +966,51 @@ class TestRepairQuorum(iotests.QMPTestCase):
         #       to check that this file is really driven by quorum
         self.vm.shutdown()
 
+# Test mirroring with a source that does not have any parents (not even a
+# BlockBackend)
+class TestOrphanedSource(iotests.QMPTestCase):
+    def setUp(self):
+        blk0 = { 'node-name': 'src',
+                 'driver': 'null-co' }
+
+        blk1 = { 'node-name': 'dest',
+                 'driver': 'null-co' }
+
+        blk2 = { 'node-name': 'dest-ro',
+                 'driver': 'null-co',
+                 'read-only': 'on' }
+
+        self.vm = iotests.VM()
+        self.vm.add_blockdev(self.qmp_to_opts(blk0))
+        self.vm.add_blockdev(self.qmp_to_opts(blk1))
+        self.vm.add_blockdev(self.qmp_to_opts(blk2))
+        self.vm.launch()
+
+    def tearDown(self):
+        self.vm.shutdown()
+
+    def test_no_job_id(self):
+        self.assert_no_active_block_jobs()
+
+        result = self.vm.qmp('blockdev-mirror', device='src', sync='full',
+                             target='dest')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+    def test_success(self):
+        self.assert_no_active_block_jobs()
+
+        result = self.vm.qmp('blockdev-mirror', job_id='job', device='src',
+                             sync='full', target='dest')
+        self.assert_qmp(result, 'return', {})
+
+        self.complete_and_wait('job')
+
+    def test_failing_permissions(self):
+        self.assert_no_active_block_jobs()
+
+        result = self.vm.qmp('blockdev-mirror', device='src', sync='full',
+                             target='dest-ro')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
 if __name__ == '__main__':
     iotests.main(supported_fmts=['qcow2', 'qed'])
diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out
index b67d0504a6..e30fd3b05b 100644
--- a/tests/qemu-iotests/041.out
+++ b/tests/qemu-iotests/041.out
@@ -1,5 +1,5 @@
-............................................................................
+...............................................................................
 ----------------------------------------------------------------------
-Ran 76 tests
+Ran 79 tests
 
 OK
diff --git a/tests/qemu-iotests/073 b/tests/qemu-iotests/073
index ad37a617b2..40f85b18b9 100755
--- a/tests/qemu-iotests/073
+++ b/tests/qemu-iotests/073
@@ -39,6 +39,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 
 _supported_fmt qcow2
 _supported_proto generic
+_unsupported_proto vxhs
 _supported_os Linux
 
 CLUSTER_SIZE=64k
diff --git a/tests/qemu-iotests/094 b/tests/qemu-iotests/094
index 0ba0b0c361..9aa01e3627 100755
--- a/tests/qemu-iotests/094
+++ b/tests/qemu-iotests/094
@@ -27,7 +27,14 @@ echo "QA output created by $seq"
 here="$PWD"
 status=1	# failure is the default!
 
-trap "exit \$status" 0 1 2 3 15
+_cleanup()
+{
+    _cleanup_qemu
+    _cleanup_test_img
+    rm -f "$TEST_DIR/source.$IMGFMT"
+}
+
+trap "_cleanup; exit \$status" 0 1 2 3 15
 
 # get standard environment, filters and checks
 . ./common.rc
@@ -73,8 +80,6 @@ _send_qemu_cmd $QEMU_HANDLE \
 
 wait=1 _cleanup_qemu
 
-_cleanup_test_img
-rm -f "$TEST_DIR/source.$IMGFMT"
 
 # success, all done
 echo '*** done'
diff --git a/tests/qemu-iotests/097 b/tests/qemu-iotests/097
index 4c33e8038a..e22670c8d0 100755
--- a/tests/qemu-iotests/097
+++ b/tests/qemu-iotests/097
@@ -56,30 +56,25 @@ _supported_os Linux
 #  3: Two-layer backing chain, commit to lower backing file
 #     (in this case, the top image will implicitly stay unchanged)
 #
-# Each pass is run twice, since qcow2 has different code paths for cleaning
-# an image depending on whether it has a snapshot.
-#
 # 020 already tests committing, so this only tests whether image chains are
 # working properly and that all images above the base are emptied; therefore,
 # no complicated patterns are necessary.  Check near the 2G mark, as qcow2
 # has been buggy at that boundary in the past.
 for i in 0 1 2 3; do
-for j in 0 1; do
 
 echo
-echo "=== Test pass $i.$j ==="
+echo "=== Test pass $i ==="
 echo
 
-TEST_IMG="$TEST_IMG.base" _make_test_img 2100M
-TEST_IMG="$TEST_IMG.itmd" _make_test_img -b "$TEST_IMG.base" 2100M
-_make_test_img -b "$TEST_IMG.itmd" 2100M
-if [ $j -eq 0 ]; then
-    $QEMU_IMG snapshot -c snap "$TEST_IMG"
-fi
+len=$((2100 * 1024 * 1024 + 512)) # larger than 2G, and not cluster aligned
+TEST_IMG="$TEST_IMG.base" _make_test_img $len
+TEST_IMG="$TEST_IMG.itmd" _make_test_img -b "$TEST_IMG.base" $len
+_make_test_img -b "$TEST_IMG.itmd" $len
 
-$QEMU_IO -c 'write -P 1 0x7ffd0000 192k' "$TEST_IMG.base" | _filter_qemu_io
-$QEMU_IO -c 'write -P 2 0x7ffe0000 128k' "$TEST_IMG.itmd" | _filter_qemu_io
-$QEMU_IO -c 'write -P 3 0x7fff0000 64k' "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "write -P 1 0x7ffd0000 192k" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "write -P 2 0x7ffe0000 128k" "$TEST_IMG.itmd" | _filter_qemu_io
+$QEMU_IO -c "write -P 3 0x7fff0000 64k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "write -P 4 $(($len - 512)) 512" "$TEST_IMG" | _filter_qemu_io
 
 if [ $i -lt 3 ]; then
     if [ $i == 0 ]; then
@@ -97,11 +92,13 @@ if [ $i -lt 3 ]; then
 
     # Bottom should be unchanged
     $QEMU_IO -c 'read -P 1 0x7ffd0000 192k' "$TEST_IMG.base" | _filter_qemu_io
+    $QEMU_IO -c "read -P 0 $((len - 512)) 512" "$TEST_IMG.base" | _filter_qemu_io
 
     # Intermediate should contain changes from top
     $QEMU_IO -c 'read -P 1 0x7ffd0000 64k' "$TEST_IMG.itmd" | _filter_qemu_io
     $QEMU_IO -c 'read -P 2 0x7ffe0000 64k' "$TEST_IMG.itmd" | _filter_qemu_io
     $QEMU_IO -c 'read -P 3 0x7fff0000 64k' "$TEST_IMG.itmd" | _filter_qemu_io
+    $QEMU_IO -c "read -P 4 $((len - 512)) 512" "$TEST_IMG.itmd" | _filter_qemu_io
 
     # And in pass 0, the top image should be empty, whereas in both other passes
     # it should be unchanged (which is both checked by qemu-img map)
@@ -112,6 +109,7 @@ else
     $QEMU_IO -c 'read -P 1 0x7ffd0000 64k' "$TEST_IMG.base" | _filter_qemu_io
     $QEMU_IO -c 'read -P 2 0x7ffe0000 64k' "$TEST_IMG.base" | _filter_qemu_io
     $QEMU_IO -c 'read -P 3 0x7fff0000 64k' "$TEST_IMG.base" | _filter_qemu_io
+    $QEMU_IO -c "read -P 4 $((len - 512)) 512" "$TEST_IMG.base" | _filter_qemu_io
 
     # Both top and intermediate should be unchanged
 fi
@@ -121,7 +119,6 @@ $QEMU_IMG map "$TEST_IMG.itmd" | _filter_qemu_img_map
 $QEMU_IMG map "$TEST_IMG" | _filter_qemu_img_map
 
 done
-done
 
 
 # success, all done
diff --git a/tests/qemu-iotests/097.out b/tests/qemu-iotests/097.out
index 8106cc9275..f6705a1cc7 100644
--- a/tests/qemu-iotests/097.out
+++ b/tests/qemu-iotests/097.out
@@ -1,222 +1,131 @@
 QA output created by 097
 
-=== Test pass 0.0 ===
+=== Test pass 0 ===
 
-Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202009600
-Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.base
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.itmd
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202010112
+Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.base
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.itmd
 wrote 196608/196608 bytes at offset 2147287040
 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 131072/131072 bytes at offset 2147352576
 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 65536/65536 bytes at offset 2147418112
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 Image committed.
 read 196608/196608 bytes at offset 2147287040
 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 65536/65536 bytes at offset 2147287040
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 65536/65536 bytes at offset 2147352576
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 65536/65536 bytes at offset 2147418112
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 Offset          Length          File
 0x7ffd0000      0x30000         TEST_DIR/t.IMGFMT.base
 Offset          Length          File
 0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
 0x7ffe0000      0x20000         TEST_DIR/t.IMGFMT.itmd
+0x83400000      0x200           TEST_DIR/t.IMGFMT.itmd
 Offset          Length          File
 0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
 0x7ffe0000      0x20000         TEST_DIR/t.IMGFMT.itmd
+0x83400000      0x200           TEST_DIR/t.IMGFMT.itmd
 
-=== Test pass 0.1 ===
+=== Test pass 1 ===
 
-Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202009600
-Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.base
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.itmd
-wrote 196608/196608 bytes at offset 2147287040
-192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-wrote 131072/131072 bytes at offset 2147352576
-128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-wrote 65536/65536 bytes at offset 2147418112
-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-Image committed.
-read 196608/196608 bytes at offset 2147287040
-192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 65536/65536 bytes at offset 2147287040
-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 65536/65536 bytes at offset 2147352576
-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 65536/65536 bytes at offset 2147418112
-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-Offset          Length          File
-0x7ffd0000      0x30000         TEST_DIR/t.IMGFMT.base
-Offset          Length          File
-0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
-0x7ffe0000      0x20000         TEST_DIR/t.IMGFMT.itmd
-Offset          Length          File
-0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
-0x7ffe0000      0x20000         TEST_DIR/t.IMGFMT.itmd
-
-=== Test pass 1.0 ===
-
-Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202009600
-Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.base
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.itmd
-wrote 196608/196608 bytes at offset 2147287040
-192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-wrote 131072/131072 bytes at offset 2147352576
-128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-wrote 65536/65536 bytes at offset 2147418112
-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-Image committed.
-read 196608/196608 bytes at offset 2147287040
-192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 65536/65536 bytes at offset 2147287040
-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 65536/65536 bytes at offset 2147352576
-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 65536/65536 bytes at offset 2147418112
-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-Offset          Length          File
-0x7ffd0000      0x30000         TEST_DIR/t.IMGFMT.base
-Offset          Length          File
-0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
-0x7ffe0000      0x20000         TEST_DIR/t.IMGFMT.itmd
-Offset          Length          File
-0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
-0x7ffe0000      0x10000         TEST_DIR/t.IMGFMT.itmd
-0x7fff0000      0x10000         TEST_DIR/t.IMGFMT
-
-=== Test pass 1.1 ===
-
-Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202009600
-Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.base
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.itmd
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202010112
+Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.base
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.itmd
 wrote 196608/196608 bytes at offset 2147287040
 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 131072/131072 bytes at offset 2147352576
 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 65536/65536 bytes at offset 2147418112
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 Image committed.
 read 196608/196608 bytes at offset 2147287040
 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 65536/65536 bytes at offset 2147287040
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 65536/65536 bytes at offset 2147352576
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 65536/65536 bytes at offset 2147418112
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 Offset          Length          File
 0x7ffd0000      0x30000         TEST_DIR/t.IMGFMT.base
 Offset          Length          File
 0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
 0x7ffe0000      0x20000         TEST_DIR/t.IMGFMT.itmd
+0x83400000      0x200           TEST_DIR/t.IMGFMT.itmd
 Offset          Length          File
 0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
 0x7ffe0000      0x10000         TEST_DIR/t.IMGFMT.itmd
 0x7fff0000      0x10000         TEST_DIR/t.IMGFMT
+0x83400000      0x200           TEST_DIR/t.IMGFMT
 
-=== Test pass 2.0 ===
+=== Test pass 2 ===
 
-Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202009600
-Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.base
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.itmd
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202010112
+Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.base
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.itmd
 wrote 196608/196608 bytes at offset 2147287040
 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 131072/131072 bytes at offset 2147352576
 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 65536/65536 bytes at offset 2147418112
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 Image committed.
 read 196608/196608 bytes at offset 2147287040
 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 65536/65536 bytes at offset 2147287040
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 65536/65536 bytes at offset 2147352576
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 65536/65536 bytes at offset 2147418112
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 Offset          Length          File
 0x7ffd0000      0x30000         TEST_DIR/t.IMGFMT.base
 Offset          Length          File
 0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
 0x7ffe0000      0x20000         TEST_DIR/t.IMGFMT.itmd
+0x83400000      0x200           TEST_DIR/t.IMGFMT.itmd
 Offset          Length          File
 0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
 0x7ffe0000      0x10000         TEST_DIR/t.IMGFMT.itmd
 0x7fff0000      0x10000         TEST_DIR/t.IMGFMT
+0x83400000      0x200           TEST_DIR/t.IMGFMT
 
-=== Test pass 2.1 ===
-
-Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202009600
-Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.base
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.itmd
-wrote 196608/196608 bytes at offset 2147287040
-192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-wrote 131072/131072 bytes at offset 2147352576
-128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-wrote 65536/65536 bytes at offset 2147418112
-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-Image committed.
-read 196608/196608 bytes at offset 2147287040
-192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 65536/65536 bytes at offset 2147287040
-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 65536/65536 bytes at offset 2147352576
-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 65536/65536 bytes at offset 2147418112
-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-Offset          Length          File
-0x7ffd0000      0x30000         TEST_DIR/t.IMGFMT.base
-Offset          Length          File
-0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
-0x7ffe0000      0x20000         TEST_DIR/t.IMGFMT.itmd
-Offset          Length          File
-0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
-0x7ffe0000      0x10000         TEST_DIR/t.IMGFMT.itmd
-0x7fff0000      0x10000         TEST_DIR/t.IMGFMT
-
-=== Test pass 3.0 ===
-
-Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202009600
-Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.base
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.itmd
-wrote 196608/196608 bytes at offset 2147287040
-192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-wrote 131072/131072 bytes at offset 2147352576
-128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-wrote 65536/65536 bytes at offset 2147418112
-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-Image committed.
-read 65536/65536 bytes at offset 2147287040
-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 65536/65536 bytes at offset 2147352576
-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 65536/65536 bytes at offset 2147418112
-64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-Offset          Length          File
-0x7ffd0000      0x30000         TEST_DIR/t.IMGFMT.base
-Offset          Length          File
-0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
-0x7ffe0000      0x20000         TEST_DIR/t.IMGFMT.itmd
-Offset          Length          File
-0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
-0x7ffe0000      0x10000         TEST_DIR/t.IMGFMT.itmd
-0x7fff0000      0x10000         TEST_DIR/t.IMGFMT
-
-=== Test pass 3.1 ===
+=== Test pass 3 ===
 
-Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202009600
-Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.base
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.itmd
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202010112
+Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.base
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.itmd
 wrote 196608/196608 bytes at offset 2147287040
 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 131072/131072 bytes at offset 2147352576
 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 wrote 65536/65536 bytes at offset 2147418112
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 Image committed.
 read 65536/65536 bytes at offset 2147287040
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
@@ -224,13 +133,18 @@ read 65536/65536 bytes at offset 2147352576
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 65536/65536 bytes at offset 2147418112
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 Offset          Length          File
 0x7ffd0000      0x30000         TEST_DIR/t.IMGFMT.base
+0x83400000      0x200           TEST_DIR/t.IMGFMT.base
 Offset          Length          File
 0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
 0x7ffe0000      0x20000         TEST_DIR/t.IMGFMT.itmd
+0x83400000      0x200           TEST_DIR/t.IMGFMT.base
 Offset          Length          File
 0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
 0x7ffe0000      0x10000         TEST_DIR/t.IMGFMT.itmd
 0x7fff0000      0x10000         TEST_DIR/t.IMGFMT
+0x83400000      0x200           TEST_DIR/t.IMGFMT
 *** done
diff --git a/tests/qemu-iotests/102 b/tests/qemu-iotests/102
index 64b4af9441..87db1bb1bf 100755
--- a/tests/qemu-iotests/102
+++ b/tests/qemu-iotests/102
@@ -25,11 +25,12 @@ seq=$(basename $0)
 echo "QA output created by $seq"
 
 here=$PWD
-status=1	# failure is the default!
+status=1    # failure is the default!
 
 _cleanup()
 {
-	_cleanup_test_img
+    _cleanup_qemu
+    _cleanup_test_img
 }
 trap "_cleanup; exit \$status" 0 1 2 3 15
 
diff --git a/tests/qemu-iotests/109 b/tests/qemu-iotests/109
index 927151a285..6161633a52 100755
--- a/tests/qemu-iotests/109
+++ b/tests/qemu-iotests/109
@@ -29,6 +29,7 @@ status=1	# failure is the default!
 
 _cleanup()
 {
+    _cleanup_qemu
     rm -f $TEST_IMG.src
 	_cleanup_test_img
 }
diff --git a/tests/qemu-iotests/109.out b/tests/qemu-iotests/109.out
index e5d70d75f1..55fe536d56 100644
--- a/tests/qemu-iotests/109.out
+++ b/tests/qemu-iotests/109.out
@@ -10,7 +10,7 @@ Automatically detecting the format is dangerous for raw images, write operations
 Specify the 'raw' format explicitly to remove the restrictions.
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_ERROR", "data": {"device": "src", "operation": "write", "action": "report"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 1024, "offset": 0, "speed": 0, "type": "mirror", "error": "Operation not permitted"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 65536, "offset": 0, "speed": 0, "type": "mirror", "error": "Operation not permitted"}}
 {"return": []}
 read 65536/65536 bytes at offset 0
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
@@ -73,7 +73,7 @@ Automatically detecting the format is dangerous for raw images, write operations
 Specify the 'raw' format explicitly to remove the restrictions.
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_ERROR", "data": {"device": "src", "operation": "write", "action": "report"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 1024, "offset": 0, "speed": 0, "type": "mirror", "error": "Operation not permitted"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 65536, "offset": 0, "speed": 0, "type": "mirror", "error": "Operation not permitted"}}
 {"return": []}
 read 65536/65536 bytes at offset 0
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
@@ -115,7 +115,7 @@ Automatically detecting the format is dangerous for raw images, write operations
 Specify the 'raw' format explicitly to remove the restrictions.
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_ERROR", "data": {"device": "src", "operation": "write", "action": "report"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 2560, "offset": 0, "speed": 0, "type": "mirror", "error": "Operation not permitted"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 65536, "offset": 0, "speed": 0, "type": "mirror", "error": "Operation not permitted"}}
 {"return": []}
 read 65536/65536 bytes at offset 0
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
@@ -135,7 +135,7 @@ Automatically detecting the format is dangerous for raw images, write operations
 Specify the 'raw' format explicitly to remove the restrictions.
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_ERROR", "data": {"device": "src", "operation": "write", "action": "report"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 2560, "offset": OFFSET, "speed": 0, "type": "mirror", "error": "Operation not permitted"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 65536, "offset": OFFSET, "speed": 0, "type": "mirror", "error": "Operation not permitted"}}
 {"return": []}
 read 65536/65536 bytes at offset 0
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
@@ -195,7 +195,7 @@ Automatically detecting the format is dangerous for raw images, write operations
 Specify the 'raw' format explicitly to remove the restrictions.
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_ERROR", "data": {"device": "src", "operation": "write", "action": "report"}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 2048, "offset": OFFSET, "speed": 0, "type": "mirror", "error": "Operation not permitted"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "src", "len": 65536, "offset": OFFSET, "speed": 0, "type": "mirror", "error": "Operation not permitted"}}
 {"return": []}
 read 65536/65536 bytes at offset 0
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
diff --git a/tests/qemu-iotests/114 b/tests/qemu-iotests/114
index f110d4f65a..5b7dc5496c 100755
--- a/tests/qemu-iotests/114
+++ b/tests/qemu-iotests/114
@@ -39,6 +39,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 
 _supported_fmt qcow2
 _supported_proto generic
+_unsupported_proto vxhs
 _supported_os Linux
 
 
diff --git a/tests/qemu-iotests/117 b/tests/qemu-iotests/117
index e955d52de3..6c83461182 100755
--- a/tests/qemu-iotests/117
+++ b/tests/qemu-iotests/117
@@ -29,6 +29,7 @@ status=1	# failure is the default!
 
 _cleanup()
 {
+    _cleanup_qemu
 	_cleanup_test_img
 }
 trap "_cleanup; exit \$status" 0 1 2 3 15
diff --git a/tests/qemu-iotests/130 b/tests/qemu-iotests/130
index ecc8a5ba1b..e7e43de6d6 100755
--- a/tests/qemu-iotests/130
+++ b/tests/qemu-iotests/130
@@ -31,6 +31,7 @@ status=1	# failure is the default!
 
 _cleanup()
 {
+    _cleanup_qemu
     _cleanup_test_img
 }
 trap "_cleanup; exit \$status" 0 1 2 3 15
@@ -42,6 +43,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 
 _supported_fmt qcow2
 _supported_proto generic
+_unsupported_proto vxhs
 _supported_os Linux
 
 qemu_comm_method="monitor"
diff --git a/tests/qemu-iotests/134 b/tests/qemu-iotests/134
index af618b8817..acce946e75 100755
--- a/tests/qemu-iotests/134
+++ b/tests/qemu-iotests/134
@@ -39,6 +39,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 
 _supported_fmt qcow2
 _supported_proto generic
+_unsupported_proto vxhs
 _supported_os Linux
 
 
diff --git a/tests/qemu-iotests/139 b/tests/qemu-iotests/139
index 6d98e4f879..175d8f0008 100644
--- a/tests/qemu-iotests/139
+++ b/tests/qemu-iotests/139
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 #
-# Test cases for the QMP 'x-blockdev-del' command
+# Test cases for the QMP 'blockdev-del' command
 #
 # Copyright (C) 2015 Igalia, S.L.
 # Author: Alberto Garcia <berto@igalia.com>
@@ -79,7 +79,7 @@ class TestBlockdevDel(iotests.QMPTestCase):
     # Delete a BlockDriverState
     def delBlockDriverState(self, node, expect_error = False):
         self.checkBlockDriverState(node)
-        result = self.vm.qmp('x-blockdev-del', node_name = node)
+        result = self.vm.qmp('blockdev-del', node_name = node)
         if expect_error:
             self.assert_qmp(result, 'error/class', 'GenericError')
         else:
@@ -173,7 +173,7 @@ class TestBlockdevDel(iotests.QMPTestCase):
         self.wait_until_completed(id)
 
     # Add a BlkDebug node
-    # Note that the purpose of this is to test the x-blockdev-del
+    # Note that the purpose of this is to test the blockdev-del
     # sanity checks, not to create a usable blkdebug drive
     def addBlkDebug(self, debug, node):
         self.checkBlockDriverState(node, False)
@@ -191,7 +191,7 @@ class TestBlockdevDel(iotests.QMPTestCase):
         self.checkBlockDriverState(debug)
 
     # Add a BlkVerify node
-    # Note that the purpose of this is to test the x-blockdev-del
+    # Note that the purpose of this is to test the blockdev-del
     # sanity checks, not to create a usable blkverify drive
     def addBlkVerify(self, blkverify, test, raw):
         self.checkBlockDriverState(test, False)
diff --git a/tests/qemu-iotests/140 b/tests/qemu-iotests/140
index 49f9df4eb0..8c80a5a866 100755
--- a/tests/qemu-iotests/140
+++ b/tests/qemu-iotests/140
@@ -33,6 +33,7 @@ status=1	# failure is the default!
 
 _cleanup()
 {
+    _cleanup_qemu
     _cleanup_test_img
     rm -f "$TEST_DIR/nbd"
 }
diff --git a/tests/qemu-iotests/141 b/tests/qemu-iotests/141
index 6d8f0a1a84..40a3405968 100755
--- a/tests/qemu-iotests/141
+++ b/tests/qemu-iotests/141
@@ -29,6 +29,7 @@ status=1	# failure is the default!
 
 _cleanup()
 {
+    _cleanup_qemu
     _cleanup_test_img
     rm -f "$TEST_DIR/{b,m,o}.$IMGFMT"
 }
@@ -65,7 +66,7 @@ test_blockjob()
 
     # We want this to return an error because the block job is still running
     _send_qemu_cmd $QEMU_HANDLE \
-        "{'execute': 'x-blockdev-del',
+        "{'execute': 'blockdev-del',
           'arguments': {'node-name': 'drv0'}}" \
         'error' | _filter_generated_node_ids
 
@@ -75,7 +76,7 @@ test_blockjob()
         "$3"
 
     _send_qemu_cmd $QEMU_HANDLE \
-        "{'execute': 'x-blockdev-del',
+        "{'execute': 'blockdev-del',
           'arguments': {'node-name': 'drv0'}}" \
         'return'
 }
diff --git a/tests/qemu-iotests/143 b/tests/qemu-iotests/143
index ec4ef2221a..5ff1944507 100755
--- a/tests/qemu-iotests/143
+++ b/tests/qemu-iotests/143
@@ -29,6 +29,7 @@ status=1	# failure is the default!
 
 _cleanup()
 {
+    _cleanup_qemu
     rm -f "$TEST_DIR/nbd"
 }
 trap "_cleanup; exit \$status" 0 1 2 3 15
diff --git a/tests/qemu-iotests/147 b/tests/qemu-iotests/147
index 45469c911e..32afea63d4 100755
--- a/tests/qemu-iotests/147
+++ b/tests/qemu-iotests/147
@@ -30,6 +30,13 @@ NBD_PORT = 10811
 test_img = os.path.join(iotests.test_dir, 'test.img')
 unix_socket = os.path.join(iotests.test_dir, 'nbd.socket')
 
+
+def flatten_sock_addr(crumpled_address):
+    result = { 'type': crumpled_address['type'] }
+    result.update(crumpled_address['data'])
+    return result
+
+
 class NBDBlockdevAddBase(iotests.QMPTestCase):
     def blockdev_add_options(self, address, export=None):
         options = { 'node-name': 'nbd-blockdev',
@@ -57,7 +64,7 @@ class NBDBlockdevAddBase(iotests.QMPTestCase):
                                                     filename)
                 break
 
-        result = self.vm.qmp('x-blockdev-del', node_name='nbd-blockdev')
+        result = self.vm.qmp('blockdev-del', node_name='nbd-blockdev')
         self.assert_qmp(result, 'return', {})
 
 
@@ -85,13 +92,15 @@ class QemuNBD(NBDBlockdevAddBase):
                         'host': 'localhost',
                         'port': str(NBD_PORT)
                     } }
-        self.client_test('nbd://localhost:%i' % NBD_PORT, address)
+        self.client_test('nbd://localhost:%i' % NBD_PORT,
+                         flatten_sock_addr(address))
 
     def test_unix(self):
         self._server_up('-k', unix_socket)
         address = { 'type': 'unix',
                     'data': { 'path': unix_socket } }
-        self.client_test('nbd+unix://?socket=' + unix_socket, address)
+        self.client_test('nbd+unix://?socket=' + unix_socket,
+                         flatten_sock_addr(address))
 
 
 class BuiltinNBD(NBDBlockdevAddBase):
@@ -134,7 +143,7 @@ class BuiltinNBD(NBDBlockdevAddBase):
                     } }
         self._server_up(address)
         self.client_test('nbd://localhost:%i/nbd-export' % NBD_PORT,
-                         address, 'nbd-export')
+                         flatten_sock_addr(address), 'nbd-export')
         self._server_down()
 
     def test_inet6(self):
@@ -149,10 +158,10 @@ class BuiltinNBD(NBDBlockdevAddBase):
                      'file': {
                          'driver': 'nbd',
                          'export': 'nbd-export',
-                         'server': address
+                         'server': flatten_sock_addr(address)
                      } }
         self._server_up(address)
-        self.client_test(filename, address, 'nbd-export')
+        self.client_test(filename, flatten_sock_addr(address), 'nbd-export')
         self._server_down()
 
     def test_unix(self):
@@ -160,7 +169,7 @@ class BuiltinNBD(NBDBlockdevAddBase):
                     'data': { 'path': unix_socket } }
         self._server_up(address)
         self.client_test('nbd+unix:///nbd-export?socket=' + unix_socket,
-                         address, 'nbd-export')
+                         flatten_sock_addr(address), 'nbd-export')
         self._server_down()
 
     def test_fd(self):
@@ -182,9 +191,9 @@ class BuiltinNBD(NBDBlockdevAddBase):
                      'file': {
                          'driver': 'nbd',
                          'export': 'nbd-export',
-                         'server': address
+                         'server': flatten_sock_addr(address)
                      } }
-        self.client_test(filename, address, 'nbd-export')
+        self.client_test(filename, flatten_sock_addr(address), 'nbd-export')
 
         self._server_down()
 
diff --git a/tests/qemu-iotests/156 b/tests/qemu-iotests/156
index cc95ff1f98..d799b73e1e 100755
--- a/tests/qemu-iotests/156
+++ b/tests/qemu-iotests/156
@@ -37,6 +37,7 @@ status=1	# failure is the default!
 
 _cleanup()
 {
+    _cleanup_qemu
     rm -f "$TEST_IMG{,.target}{,.backing,.overlay}"
 }
 trap "_cleanup; exit \$status" 0 1 2 3 15
@@ -48,6 +49,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 
 _supported_fmt qcow2 qed
 _supported_proto generic
+_unsupported_proto vxhs
 _supported_os Linux
 
 # Create source disk
diff --git a/tests/qemu-iotests/158 b/tests/qemu-iotests/158
index a6cdd6d8cf..ef8d70f109 100755
--- a/tests/qemu-iotests/158
+++ b/tests/qemu-iotests/158
@@ -39,6 +39,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 
 _supported_fmt qcow2
 _supported_proto generic
+_unsupported_proto vxhs
 _supported_os Linux
 
 
diff --git a/tests/qemu-iotests/176 b/tests/qemu-iotests/176
new file mode 100755
index 0000000000..950b28720e
--- /dev/null
+++ b/tests/qemu-iotests/176
@@ -0,0 +1,131 @@
+#!/bin/bash
+#
+# Commit changes into backing chains and empty the top image if the
+# backing image is not explicitly specified.
+#
+# Variant of 097, which includes snapshots to test different codepath
+# in qcow2
+#
+# Copyright (C) 2014 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=mreitz@redhat.com
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+here="$PWD"
+status=1	# failure is the default!
+
+_cleanup()
+{
+    _cleanup_test_img
+    _rm_test_img "$TEST_IMG.itmd"
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+. ./common.pattern
+
+# Any format supporting backing files and bdrv_make_empty
+_supported_fmt qcow2
+_supported_proto file
+_supported_os Linux
+
+
+# Four passes:
+#  0: Two-layer backing chain, commit to upper backing file (implicitly)
+#     (in this case, the top image will be emptied)
+#  1: Two-layer backing chain, commit to upper backing file (explicitly)
+#     (in this case, the top image will implicitly stay unchanged)
+#  2: Two-layer backing chain, commit to upper backing file (implicitly with -d)
+#     (in this case, the top image will explicitly stay unchanged)
+#  3: Two-layer backing chain, commit to lower backing file
+#     (in this case, the top image will implicitly stay unchanged)
+#
+# 020 already tests committing, so this only tests whether image chains are
+# working properly and that all images above the base are emptied; therefore,
+# no complicated patterns are necessary.  Check near the 2G mark, as qcow2
+# has been buggy at that boundary in the past.
+for i in 0 1 2 3; do
+
+echo
+echo "=== Test pass $i ==="
+echo
+
+len=$((2100 * 1024 * 1024 + 512)) # larger than 2G, and not cluster aligned
+TEST_IMG="$TEST_IMG.base" _make_test_img $len
+TEST_IMG="$TEST_IMG.itmd" _make_test_img -b "$TEST_IMG.base" $len
+_make_test_img -b "$TEST_IMG.itmd" $len
+$QEMU_IMG snapshot -c snap "$TEST_IMG"
+
+$QEMU_IO -c "write -P 1 0x7ffd0000 192k" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "write -P 2 0x7ffe0000 128k" "$TEST_IMG.itmd" | _filter_qemu_io
+$QEMU_IO -c "write -P 3 0x7fff0000 64k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "write -P 4 $(($len - 512)) 512" "$TEST_IMG" | _filter_qemu_io
+
+if [ $i -lt 3 ]; then
+    if [ $i == 0 ]; then
+        # -b "$TEST_IMG.itmd" should be the default (that is, committing to the
+        # first backing file in the chain)
+        $QEMU_IMG commit "$TEST_IMG"
+    elif [ $i == 1 ]; then
+        # explicitly specify the commit target (this should imply -d)
+        $QEMU_IMG commit -b "$TEST_IMG.itmd" "$TEST_IMG"
+    else
+        # do not explicitly specify the commit target, but use -d to leave the
+        # top image unchanged
+        $QEMU_IMG commit -d "$TEST_IMG"
+    fi
+
+    # Bottom should be unchanged
+    $QEMU_IO -c 'read -P 1 0x7ffd0000 192k' "$TEST_IMG.base" | _filter_qemu_io
+    $QEMU_IO -c "read -P 0 $((len - 512)) 512" "$TEST_IMG.base" | _filter_qemu_io
+
+    # Intermediate should contain changes from top
+    $QEMU_IO -c 'read -P 1 0x7ffd0000 64k' "$TEST_IMG.itmd" | _filter_qemu_io
+    $QEMU_IO -c 'read -P 2 0x7ffe0000 64k' "$TEST_IMG.itmd" | _filter_qemu_io
+    $QEMU_IO -c 'read -P 3 0x7fff0000 64k' "$TEST_IMG.itmd" | _filter_qemu_io
+    $QEMU_IO -c "read -P 4 $((len - 512)) 512" "$TEST_IMG.itmd" | _filter_qemu_io
+
+    # And in pass 0, the top image should be empty, whereas in both other passes
+    # it should be unchanged (which is both checked by qemu-img map)
+else
+    $QEMU_IMG commit -b "$TEST_IMG.base" "$TEST_IMG"
+
+    # Bottom should contain all changes
+    $QEMU_IO -c 'read -P 1 0x7ffd0000 64k' "$TEST_IMG.base" | _filter_qemu_io
+    $QEMU_IO -c 'read -P 2 0x7ffe0000 64k' "$TEST_IMG.base" | _filter_qemu_io
+    $QEMU_IO -c 'read -P 3 0x7fff0000 64k' "$TEST_IMG.base" | _filter_qemu_io
+    $QEMU_IO -c "read -P 4 $((len - 512)) 512" "$TEST_IMG.base" | _filter_qemu_io
+
+    # Both top and intermediate should be unchanged
+fi
+
+$QEMU_IMG map "$TEST_IMG.base" | _filter_qemu_img_map
+$QEMU_IMG map "$TEST_IMG.itmd" | _filter_qemu_img_map
+$QEMU_IMG map "$TEST_IMG" | _filter_qemu_img_map
+
+done
+
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/176.out b/tests/qemu-iotests/176.out
new file mode 100644
index 0000000000..6271fa7d6f
--- /dev/null
+++ b/tests/qemu-iotests/176.out
@@ -0,0 +1,150 @@
+QA output created by 176
+
+=== Test pass 0 ===
+
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202010112
+Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.base
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.itmd
+wrote 196608/196608 bytes at offset 2147287040
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 131072/131072 bytes at offset 2147352576
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 65536/65536 bytes at offset 2147418112
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Image committed.
+read 196608/196608 bytes at offset 2147287040
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147287040
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147352576
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147418112
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Offset          Length          File
+0x7ffd0000      0x30000         TEST_DIR/t.IMGFMT.base
+Offset          Length          File
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x20000         TEST_DIR/t.IMGFMT.itmd
+0x83400000      0x200           TEST_DIR/t.IMGFMT.itmd
+Offset          Length          File
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x20000         TEST_DIR/t.IMGFMT.itmd
+0x83400000      0x200           TEST_DIR/t.IMGFMT.itmd
+
+=== Test pass 1 ===
+
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202010112
+Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.base
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.itmd
+wrote 196608/196608 bytes at offset 2147287040
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 131072/131072 bytes at offset 2147352576
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 65536/65536 bytes at offset 2147418112
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Image committed.
+read 196608/196608 bytes at offset 2147287040
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147287040
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147352576
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147418112
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Offset          Length          File
+0x7ffd0000      0x30000         TEST_DIR/t.IMGFMT.base
+Offset          Length          File
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x20000         TEST_DIR/t.IMGFMT.itmd
+0x83400000      0x200           TEST_DIR/t.IMGFMT.itmd
+Offset          Length          File
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x10000         TEST_DIR/t.IMGFMT.itmd
+0x7fff0000      0x10000         TEST_DIR/t.IMGFMT
+0x83400000      0x200           TEST_DIR/t.IMGFMT
+
+=== Test pass 2 ===
+
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202010112
+Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.base
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.itmd
+wrote 196608/196608 bytes at offset 2147287040
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 131072/131072 bytes at offset 2147352576
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 65536/65536 bytes at offset 2147418112
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Image committed.
+read 196608/196608 bytes at offset 2147287040
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147287040
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147352576
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147418112
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Offset          Length          File
+0x7ffd0000      0x30000         TEST_DIR/t.IMGFMT.base
+Offset          Length          File
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x20000         TEST_DIR/t.IMGFMT.itmd
+0x83400000      0x200           TEST_DIR/t.IMGFMT.itmd
+Offset          Length          File
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x10000         TEST_DIR/t.IMGFMT.itmd
+0x7fff0000      0x10000         TEST_DIR/t.IMGFMT
+0x83400000      0x200           TEST_DIR/t.IMGFMT
+
+=== Test pass 3 ===
+
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202010112
+Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.base
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.itmd
+wrote 196608/196608 bytes at offset 2147287040
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 131072/131072 bytes at offset 2147352576
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 65536/65536 bytes at offset 2147418112
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Image committed.
+read 65536/65536 bytes at offset 2147287040
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147352576
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147418112
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 2202009600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Offset          Length          File
+0x7ffd0000      0x30000         TEST_DIR/t.IMGFMT.base
+0x83400000      0x200           TEST_DIR/t.IMGFMT.base
+Offset          Length          File
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x20000         TEST_DIR/t.IMGFMT.itmd
+0x83400000      0x200           TEST_DIR/t.IMGFMT.base
+Offset          Length          File
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x10000         TEST_DIR/t.IMGFMT.itmd
+0x7fff0000      0x10000         TEST_DIR/t.IMGFMT
+0x83400000      0x200           TEST_DIR/t.IMGFMT
+*** done
diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common
index 4d5650d7c8..9c6f9721e5 100644
--- a/tests/qemu-iotests/common
+++ b/tests/qemu-iotests/common
@@ -157,6 +157,7 @@ check options
     -ssh                test ssh
     -nfs                test nfs
     -luks               test luks
+    -vxhs               test vxhs
     -xdiff              graphical mode diff
     -nocache            use O_DIRECT on backing file
     -misalign           misalign memory allocations
@@ -260,6 +261,11 @@ testlist options
             xpand=false
             ;;
 
+        -vxhs)
+            IMGPROTO=vxhs
+            xpand=false
+            ;;
+
         -ssh)
             IMGPROTO=ssh
             xpand=false
diff --git a/tests/qemu-iotests/common.config b/tests/qemu-iotests/common.config
index 55527aac87..c4b51b3509 100644
--- a/tests/qemu-iotests/common.config
+++ b/tests/qemu-iotests/common.config
@@ -105,6 +105,10 @@ if [ -z "$QEMU_NBD_PROG" ]; then
     export QEMU_NBD_PROG="`set_prog_path qemu-nbd`"
 fi
 
+if [ -z "$QEMU_VXHS_PROG" ]; then
+    export QEMU_VXHS_PROG="`set_prog_path qnio_server`"
+fi
+
 _qemu_wrapper()
 {
     (
@@ -156,10 +160,19 @@ _qemu_nbd_wrapper()
     )
 }
 
+_qemu_vxhs_wrapper()
+{
+    (
+        echo $BASHPID > "${TEST_DIR}/qemu-vxhs.pid"
+        exec "$QEMU_VXHS_PROG" $QEMU_VXHS_OPTIONS "$@"
+    )
+}
+
 export QEMU=_qemu_wrapper
 export QEMU_IMG=_qemu_img_wrapper
 export QEMU_IO=_qemu_io_wrapper
 export QEMU_NBD=_qemu_nbd_wrapper
+export QEMU_VXHS=_qemu_vxhs_wrapper
 
 QEMU_IMG_EXTRA_ARGS=
 if [ "$IMGOPTSSYNTAX" = "true" ]; then
diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
index 104001358b..c9a2d5c595 100644
--- a/tests/qemu-iotests/common.filter
+++ b/tests/qemu-iotests/common.filter
@@ -122,6 +122,7 @@ _filter_img_info()
         -e "s#$TEST_DIR#TEST_DIR#g" \
         -e "s#$IMGFMT#IMGFMT#g" \
         -e 's#nbd://127.0.0.1:10810$#TEST_DIR/t.IMGFMT#g' \
+        -e 's#json.*vdisk-id.*vxhs"}}#TEST_DIR/t.IMGFMT#' \
         -e "/encrypted: yes/d" \
         -e "/cluster_size: [0-9]\\+/d" \
         -e "/table_size: [0-9]\\+/d" \
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index 7d4781d4ad..62529eed6e 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -85,6 +85,9 @@ else
     elif [ "$IMGPROTO" = "nfs" ]; then
         TEST_DIR="nfs://127.0.0.1/$TEST_DIR"
         TEST_IMG=$TEST_DIR/t.$IMGFMT
+    elif [ "$IMGPROTO" = "vxhs" ]; then
+        TEST_IMG_FILE=$TEST_DIR/t.$IMGFMT
+        TEST_IMG="vxhs://127.0.0.1:9999/t.$IMGFMT"
     else
         TEST_IMG=$IMGPROTO:$TEST_DIR/t.$IMGFMT
     fi
@@ -171,6 +174,12 @@ _make_test_img()
         eval "$QEMU_NBD -v -t -b 127.0.0.1 -p 10810 -f $IMGFMT  $TEST_IMG_FILE >/dev/null &"
         sleep 1 # FIXME: qemu-nbd needs to be listening before we continue
     fi
+
+    # Start QNIO server on image directory for vxhs protocol
+    if [ $IMGPROTO = "vxhs" ]; then
+        eval "$QEMU_VXHS -d  $TEST_DIR > /dev/null &"
+        sleep 1 # Wait for server to come up.
+    fi
 }
 
 _rm_test_img()
@@ -197,6 +206,16 @@ _cleanup_test_img()
             fi
             rm -f "$TEST_IMG_FILE"
             ;;
+        vxhs)
+            if [ -f "${TEST_DIR}/qemu-vxhs.pid" ]; then
+                local QEMU_VXHS_PID
+                read QEMU_VXHS_PID < "${TEST_DIR}/qemu-vxhs.pid"
+                kill ${QEMU_VXHS_PID} >/dev/null 2>&1
+                rm -f "${TEST_DIR}/qemu-vxhs.pid"
+            fi
+            rm -f "$TEST_IMG_FILE"
+            ;;
+
         file)
             _rm_test_img "$TEST_DIR/t.$IMGFMT"
             _rm_test_img "$TEST_DIR/t.$IMGFMT.orig"
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 1f4bf03185..43142ddfcf 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -168,3 +168,4 @@
 173 rw auto
 174 auto
 175 auto quick
+176 rw auto backing
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index bec8eb4b8d..abcf3c10e2 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -177,6 +177,14 @@ class VM(qtest.QEMUQtestMachine):
         self._num_drives += 1
         return self
 
+    def add_blockdev(self, opts):
+        self._args.append('-blockdev')
+        if isinstance(opts, str):
+            self._args.append(opts)
+        else:
+            self._args.append(','.join(opts))
+        return self
+
     def pause_drive(self, drive, event=None):
         '''Pause drive r/w operations'''
         if not event:
@@ -235,6 +243,13 @@ class QMPTestCase(unittest.TestCase):
             output[basestr[:-1]] = obj # Strip trailing '.'
         return output
 
+    def qmp_to_opts(self, obj):
+        obj = self.flatten_qmp_object(obj)
+        output_list = list()
+        for key in obj:
+            output_list += [key + '=' + obj[key]]
+        return ','.join(output_list)
+
     def assert_qmp_absent(self, d, path):
         try:
             result = self.dictpath(d, path)
diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c
index 4ccbda14af..0f80194e85 100644
--- a/tests/test-blockjob-txn.c
+++ b/tests/test-blockjob-txn.c
@@ -110,7 +110,6 @@ static BlockJob *test_block_job_start(unsigned int iterations,
     s->result = result;
     data->job = s;
     data->result = result;
-    block_job_start(&s->common);
     return &s->common;
 }
 
@@ -123,6 +122,7 @@ static void test_single_job(int expected)
     txn = block_job_txn_new();
     job = test_block_job_start(1, true, expected, &result);
     block_job_txn_add_job(txn, job);
+    block_job_start(job);
 
     if (expected == -ECANCELED) {
         block_job_cancel(job);
@@ -164,6 +164,8 @@ static void test_pair_jobs(int expected1, int expected2)
     block_job_txn_add_job(txn, job1);
     job2 = test_block_job_start(2, true, expected2, &result2);
     block_job_txn_add_job(txn, job2);
+    block_job_start(job1);
+    block_job_start(job2);
 
     if (expected1 == -ECANCELED) {
         block_job_cancel(job1);
@@ -223,6 +225,8 @@ static void test_pair_jobs_fail_cancel_race(void)
     block_job_txn_add_job(txn, job1);
     job2 = test_block_job_start(2, false, 0, &result2);
     block_job_txn_add_job(txn, job2);
+    block_job_start(job1);
+    block_job_start(job2);
 
     block_job_cancel(job1);
 
diff --git a/tests/test-crypto-block.c b/tests/test-crypto-block.c
index 1957a86743..85e6603d59 100644
--- a/tests/test-crypto-block.c
+++ b/tests/test-crypto-block.c
@@ -187,11 +187,11 @@ static struct QCryptoBlockTestData {
 
 
 static ssize_t test_block_read_func(QCryptoBlock *block,
+                                    void *opaque,
                                     size_t offset,
                                     uint8_t *buf,
                                     size_t buflen,
-                                    Error **errp,
-                                    void *opaque)
+                                    Error **errp)
 {
     Buffer *header = opaque;
 
@@ -204,9 +204,9 @@ static ssize_t test_block_read_func(QCryptoBlock *block,
 
 
 static ssize_t test_block_init_func(QCryptoBlock *block,
+                                    void *opaque,
                                     size_t headerlen,
-                                    Error **errp,
-                                    void *opaque)
+                                    Error **errp)
 {
     Buffer *header = opaque;
 
@@ -219,11 +219,11 @@ static ssize_t test_block_init_func(QCryptoBlock *block,
 
 
 static ssize_t test_block_write_func(QCryptoBlock *block,
+                                     void *opaque,
                                      size_t offset,
                                      const uint8_t *buf,
                                      size_t buflen,
-                                     Error **errp,
-                                     void *opaque)
+                                     Error **errp)
 {
     Buffer *header = opaque;
 
diff --git a/tests/test-io-channel-socket.c b/tests/test-io-channel-socket.c
index aaa9116fb7..c5c131479c 100644
--- a/tests/test-io-channel-socket.c
+++ b/tests/test-io-channel-socket.c
@@ -234,6 +234,8 @@ static void test_io_channel(bool async,
                  qio_channel_has_feature(src, QIO_CHANNEL_FEATURE_FD_PASS));
         g_assert(!passFD ||
                  qio_channel_has_feature(dst, QIO_CHANNEL_FEATURE_FD_PASS));
+        g_assert(qio_channel_has_feature(src, QIO_CHANNEL_FEATURE_SHUTDOWN));
+        g_assert(qio_channel_has_feature(dst, QIO_CHANNEL_FEATURE_SHUTDOWN));
 
         test = qio_channel_test_new();
         qio_channel_test_run_threads(test, true, src, dst);
@@ -248,6 +250,8 @@ static void test_io_channel(bool async,
                  qio_channel_has_feature(src, QIO_CHANNEL_FEATURE_FD_PASS));
         g_assert(!passFD ||
                  qio_channel_has_feature(dst, QIO_CHANNEL_FEATURE_FD_PASS));
+        g_assert(qio_channel_has_feature(src, QIO_CHANNEL_FEATURE_SHUTDOWN));
+        g_assert(qio_channel_has_feature(dst, QIO_CHANNEL_FEATURE_SHUTDOWN));
 
         test = qio_channel_test_new();
         qio_channel_test_run_threads(test, false, src, dst);
@@ -262,6 +266,8 @@ static void test_io_channel(bool async,
                  qio_channel_has_feature(src, QIO_CHANNEL_FEATURE_FD_PASS));
         g_assert(!passFD ||
                  qio_channel_has_feature(dst, QIO_CHANNEL_FEATURE_FD_PASS));
+        g_assert(qio_channel_has_feature(src, QIO_CHANNEL_FEATURE_SHUTDOWN));
+        g_assert(qio_channel_has_feature(dst, QIO_CHANNEL_FEATURE_SHUTDOWN));
 
         test = qio_channel_test_new();
         qio_channel_test_run_threads(test, true, src, dst);
@@ -276,6 +282,8 @@ static void test_io_channel(bool async,
                  qio_channel_has_feature(src, QIO_CHANNEL_FEATURE_FD_PASS));
         g_assert(!passFD ||
                  qio_channel_has_feature(dst, QIO_CHANNEL_FEATURE_FD_PASS));
+        g_assert(qio_channel_has_feature(src, QIO_CHANNEL_FEATURE_SHUTDOWN));
+        g_assert(qio_channel_has_feature(dst, QIO_CHANNEL_FEATURE_SHUTDOWN));
 
         test = qio_channel_test_new();
         qio_channel_test_run_threads(test, false, src, dst);
diff --git a/tests/test-keyval.c b/tests/test-keyval.c
index 71288b082c..ba19560a22 100644
--- a/tests/test-keyval.c
+++ b/tests/test-keyval.c
@@ -14,6 +14,7 @@
 #include "qapi/error.h"
 #include "qapi/qmp/qstring.h"
 #include "qapi/qobject-input-visitor.h"
+#include "test-qapi-visit.h"
 #include "qemu/cutils.h"
 #include "qemu/option.h"
 
@@ -218,14 +219,14 @@ static void test_keyval_parse_list(void)
     QDECREF(qdict);
 
     /* Multiple indexes, last one wins */
-    qdict = keyval_parse("list.1=goner,list.0=null,list.1=eins,list.2=zwei",
+    qdict = keyval_parse("list.1=goner,list.0=null,list.01=eins,list.2=zwei",
                          NULL, &error_abort);
     g_assert_cmpint(qdict_size(qdict), ==, 1);
     check_list012(qdict_get_qlist(qdict, "list"));
     QDECREF(qdict);
 
     /* List at deeper nesting */
-    qdict = keyval_parse("a.list.1=eins,a.list.0=null,a.list.2=zwei",
+    qdict = keyval_parse("a.list.1=eins,a.list.00=null,a.list.2=zwei",
                          NULL, &error_abort);
     g_assert_cmpint(qdict_size(qdict), ==, 1);
     sub_qdict = qdict_get_qdict(qdict, "a");
@@ -242,7 +243,7 @@ static void test_keyval_parse_list(void)
     g_assert(!qdict);
 
     /* Missing list indexes */
-    qdict = keyval_parse("list.2=lonely", NULL, &err);
+    qdict = keyval_parse("list.1=lonely", NULL, &err);
     error_free_or_abort(&err);
     g_assert(!qdict);
     qdict = keyval_parse("list.0=null,list.2=eins,list.02=zwei", NULL, &err);
@@ -608,6 +609,56 @@ static void test_keyval_visit_optional(void)
     visit_free(v);
 }
 
+static void test_keyval_visit_alternate(void)
+{
+    Error *err = NULL;
+    Visitor *v;
+    QDict *qdict;
+    AltNumStr *ans;
+    AltNumInt *ani;
+
+    /*
+     * Can't do scalar alternate variants other than string.  You get
+     * the string variant if there is one, else an error.
+     */
+    qdict = keyval_parse("a=1,b=2", NULL, &error_abort);
+    v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
+    QDECREF(qdict);
+    visit_start_struct(v, NULL, NULL, 0, &error_abort);
+    visit_type_AltNumStr(v, "a", &ans, &error_abort);
+    g_assert_cmpint(ans->type, ==, QTYPE_QSTRING);
+    g_assert_cmpstr(ans->u.s, ==, "1");
+    visit_type_AltNumInt(v, "a", &ani, &err);
+    error_free_or_abort(&err);
+    visit_end_struct(v, NULL);
+    visit_free(v);
+}
+
+static void test_keyval_visit_any(void)
+{
+    Visitor *v;
+    QDict *qdict;
+    QObject *any;
+    QList *qlist;
+    QString *qstr;
+
+    qdict = keyval_parse("a.0=null,a.1=1", NULL, &error_abort);
+    v = qobject_input_visitor_new_keyval(QOBJECT(qdict));
+    QDECREF(qdict);
+    visit_start_struct(v, NULL, NULL, 0, &error_abort);
+    visit_type_any(v, "a", &any, &error_abort);
+    qlist = qobject_to_qlist(any);
+    g_assert(qlist);
+    qstr = qobject_to_qstring(qlist_pop(qlist));
+    g_assert_cmpstr(qstring_get_str(qstr), ==, "null");
+    qstr = qobject_to_qstring(qlist_pop(qlist));
+    g_assert_cmpstr(qstring_get_str(qstr), ==, "1");
+    g_assert(qlist_empty(qlist));
+    visit_check_struct(v, &error_abort);
+    visit_end_struct(v, NULL);
+    visit_free(v);
+}
+
 int main(int argc, char *argv[])
 {
     g_test_init(&argc, &argv, NULL);
@@ -619,6 +670,8 @@ int main(int argc, char *argv[])
     g_test_add_func("/keyval/visit/dict", test_keyval_visit_dict);
     g_test_add_func("/keyval/visit/list", test_keyval_visit_list);
     g_test_add_func("/keyval/visit/optional", test_keyval_visit_optional);
+    g_test_add_func("/keyval/visit/alternate", test_keyval_visit_alternate);
+    g_test_add_func("/keyval/visit/any", test_keyval_visit_any);
     g_test_run();
     return 0;
 }
diff --git a/tests/test-opts-visitor.c b/tests/test-opts-visitor.c
index 2238f8efe5..23e897061c 100644
--- a/tests/test-opts-visitor.c
+++ b/tests/test-opts-visitor.c
@@ -175,6 +175,7 @@ expect_u64_max(OptsVisitorFixture *f, gconstpointer test_data)
 static void
 test_opts_range_unvisited(void)
 {
+    Error *err = NULL;
     intList *list = NULL;
     intList *tail;
     QemuOpts *opts;
@@ -199,10 +200,11 @@ test_opts_range_unvisited(void)
     g_assert_cmpint(tail->value, ==, 1);
     tail = (intList *)visit_next_list(v, (GenericList *)tail, sizeof(*list));
     g_assert(tail);
-    visit_check_list(v, &error_abort); /* BUG: unvisited tail not reported */
+    visit_check_list(v, &error_abort); /* unvisited tail ignored until... */
     visit_end_list(v, (void **)&list);
 
-    visit_check_struct(v, &error_abort);
+    visit_check_struct(v, &err); /* ...here */
+    error_free_or_abort(&err);
     visit_end_struct(v, NULL);
 
     qapi_free_intList(list);
@@ -247,6 +249,25 @@ test_opts_range_beyond(void)
     qemu_opts_del(opts);
 }
 
+static void
+test_opts_dict_unvisited(void)
+{
+    Error *err = NULL;
+    QemuOpts *opts;
+    Visitor *v;
+    UserDefOptions *userdef;
+
+    opts = qemu_opts_parse(qemu_find_opts("userdef"), "i64x=0,bogus=1", false,
+                           &error_abort);
+
+    v = opts_visitor_new(opts);
+    visit_type_UserDefOptions(v, NULL, &userdef, &err);
+    error_free_or_abort(&err);
+    visit_free(v);
+    qemu_opts_del(opts);
+    g_assert(!userdef);
+}
+
 int
 main(int argc, char **argv)
 {
@@ -343,6 +364,8 @@ main(int argc, char **argv)
     g_test_add_func("/visitor/opts/range/beyond",
                     test_opts_range_beyond);
 
+    g_test_add_func("/visitor/opts/dict/unvisited", test_opts_dict_unvisited);
+
     g_test_run();
     return 0;
 }
diff --git a/tests/test-qobject-input-visitor.c b/tests/test-qobject-input-visitor.c
index 6eb48fee7b..f965743b6e 100644
--- a/tests/test-qobject-input-visitor.c
+++ b/tests/test-qobject-input-visitor.c
@@ -116,6 +116,34 @@ static void test_visitor_in_int(TestInputVisitorData *data,
     g_assert_cmpint(res, ==, value);
 }
 
+static void test_visitor_in_uint(TestInputVisitorData *data,
+                                const void *unused)
+{
+    Error *err = NULL;
+    uint64_t res = 0;
+    int value = 42;
+    Visitor *v;
+
+    v = visitor_input_test_init(data, "%d", value);
+
+    visit_type_uint64(v, NULL, &res, &error_abort);
+    g_assert_cmpuint(res, ==, (uint64_t)value);
+
+    /* BUG: value between INT64_MIN and -1 accepted modulo 2^64 */
+
+    v = visitor_input_test_init(data, "%d", -value);
+
+    visit_type_uint64(v, NULL, &res, &error_abort);
+    g_assert_cmpuint(res, ==, (uint64_t)-value);
+
+    /* BUG: value between INT64_MAX+1 and UINT64_MAX rejected */
+
+    v = visitor_input_test_init(data, "18446744073709551574");
+
+    visit_type_uint64(v, NULL, &res, &err);
+    error_free_or_abort(&err);
+}
+
 static void test_visitor_in_int_overflow(TestInputVisitorData *data,
                                          const void *unused)
 {
@@ -1225,6 +1253,8 @@ int main(int argc, char **argv)
 
     input_visitor_test_add("/visitor/input/int",
                            NULL, test_visitor_in_int);
+    input_visitor_test_add("/visitor/input/uint",
+                           NULL, test_visitor_in_uint);
     input_visitor_test_add("/visitor/input/int_overflow",
                            NULL, test_visitor_in_int_overflow);
     input_visitor_test_add("/visitor/input/int_keyval",
diff --git a/tests/test-replication.c b/tests/test-replication.c
index fac2da3f58..3016c6f2e0 100644
--- a/tests/test-replication.c
+++ b/tests/test-replication.c
@@ -144,18 +144,18 @@ static void prepare_imgs(void)
 
     /* Primary */
     bdrv_img_create(p_local_disk, "qcow2", NULL, NULL, NULL, IMG_SIZE,
-                    BDRV_O_RDWR, &local_err, true);
+                    BDRV_O_RDWR, true, &local_err);
     g_assert(!local_err);
 
     /* Secondary */
     bdrv_img_create(s_local_disk, "qcow2", NULL, NULL, NULL, IMG_SIZE,
-                    BDRV_O_RDWR, &local_err, true);
+                    BDRV_O_RDWR, true, &local_err);
     g_assert(!local_err);
     bdrv_img_create(s_active_disk, "qcow2", NULL, NULL, NULL, IMG_SIZE,
-                    BDRV_O_RDWR, &local_err, true);
+                    BDRV_O_RDWR, true, &local_err);
     g_assert(!local_err);
     bdrv_img_create(s_hidden_disk, "qcow2", NULL, NULL, NULL, IMG_SIZE,
-                    BDRV_O_RDWR, &local_err, true);
+                    BDRV_O_RDWR, true, &local_err);
     g_assert(!local_err);
 }
 
diff --git a/tests/test-string-input-visitor.c b/tests/test-string-input-visitor.c
index 6db850bc89..79313a7f7a 100644
--- a/tests/test-string-input-visitor.c
+++ b/tests/test-string-input-visitor.c
@@ -63,6 +63,11 @@ static void test_visitor_in_int(TestInputVisitorData *data,
 
     visit_type_int(v, NULL, &res, &err);
     error_free_or_abort(&err);
+
+    v = visitor_input_test_init(data, "");
+
+    visit_type_int(v, NULL, &res, &err);
+    error_free_or_abort(&err);
 }
 
 static void check_ilist(Visitor *v, int64_t *expected, size_t n)
@@ -140,11 +145,11 @@ static void test_visitor_in_intList(TestInputVisitorData *data,
     v = visitor_input_test_init(data, "18446744073709551615");
     check_ulist(v, expect4, ARRAY_SIZE(expect4));
 
-    /* Empty list is invalid (weird) */
+    /* Empty list */
 
     v = visitor_input_test_init(data, "");
-    visit_type_int64List(v, NULL, &res, &err);
-    error_free_or_abort(&err);
+    visit_type_int64List(v, NULL, &res, &error_abort);
+    g_assert(!res);
 
     /* Not a list */
 
diff --git a/tests/test-throttle.c b/tests/test-throttle.c
index bd7c501b2e..a9201b1fea 100644
--- a/tests/test-throttle.c
+++ b/tests/test-throttle.c
@@ -205,8 +205,8 @@ static void test_config_functions(void)
     orig_cfg.buckets[THROTTLE_OPS_READ].avg  = 69;
     orig_cfg.buckets[THROTTLE_OPS_WRITE].avg = 23;
 
-    orig_cfg.buckets[THROTTLE_BPS_TOTAL].max = 0; /* should be corrected */
-    orig_cfg.buckets[THROTTLE_BPS_READ].max  = 1; /* should not be corrected */
+    orig_cfg.buckets[THROTTLE_BPS_TOTAL].max = 0;
+    orig_cfg.buckets[THROTTLE_BPS_READ].max  = 56;
     orig_cfg.buckets[THROTTLE_BPS_WRITE].max = 120;
 
     orig_cfg.buckets[THROTTLE_OPS_TOTAL].max = 150;
@@ -246,8 +246,8 @@ static void test_config_functions(void)
     g_assert(final_cfg.buckets[THROTTLE_OPS_READ].avg  == 69);
     g_assert(final_cfg.buckets[THROTTLE_OPS_WRITE].avg == 23);
 
-    g_assert(final_cfg.buckets[THROTTLE_BPS_TOTAL].max == 15.3);/* fixed */
-    g_assert(final_cfg.buckets[THROTTLE_BPS_READ].max  == 1);   /* not fixed */
+    g_assert(final_cfg.buckets[THROTTLE_BPS_TOTAL].max == 0);
+    g_assert(final_cfg.buckets[THROTTLE_BPS_READ].max  == 56);
     g_assert(final_cfg.buckets[THROTTLE_BPS_WRITE].max == 120);
 
     g_assert(final_cfg.buckets[THROTTLE_OPS_TOTAL].max == 150);
diff --git a/tests/virtio-9p-test.c b/tests/virtio-9p-test.c
index 43a1ad813f..ad33d96387 100644
--- a/tests/virtio-9p-test.c
+++ b/tests/virtio-9p-test.c
@@ -256,8 +256,8 @@ static void v9fs_req_recv(P9Req *req, uint8_t id)
         qvirtio_wait_queue_isr(v9p->dev, v9p->vq, 1000 * 1000);
 
         v9fs_memread(req, &hdr, 7);
-        le32_to_cpus(&hdr.size);
-        le16_to_cpus(&hdr.tag);
+        hdr.size = ldl_le_p(&hdr.size);
+        hdr.tag = lduw_le_p(&hdr.tag);
         if (hdr.size >= 7) {
             break;
         }
diff --git a/trace/Makefile.objs b/trace/Makefile.objs
index 1b8eb4a616..afd571c3ec 100644
--- a/trace/Makefile.objs
+++ b/trace/Makefile.objs
@@ -9,27 +9,27 @@ $(BUILD_DIR)/trace-events-all: $(trace-events-files)
 
 $(obj)/generated-helpers-wrappers.h: $(obj)/generated-helpers-wrappers.h-timestamp
 	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
-$(obj)/generated-helpers-wrappers.h-timestamp: $(trace-events-files) $(BUILD_DIR)/config-host.mak $(tracetool-y)
+$(obj)/generated-helpers-wrappers.h-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak $(tracetool-y)
 	$(call quiet-command,$(TRACETOOL) \
-		--group=all \
+		--group=root \
 		--format=tcg-helper-wrapper-h \
 		--backend=$(TRACE_BACKENDS) \
 		$< > $@,"GEN","$(patsubst %-timestamp,%,$@)")
 
 $(obj)/generated-helpers.h: $(obj)/generated-helpers.h-timestamp
 	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
-$(obj)/generated-helpers.h-timestamp: $(trace-events-files) $(BUILD_DIR)/config-host.mak $(tracetool-y)
+$(obj)/generated-helpers.h-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak $(tracetool-y)
 	$(call quiet-command,$(TRACETOOL) \
-		--group=all \
+		--group=root \
 		--format=tcg-helper-h \
 		--backend=$(TRACE_BACKENDS) \
 		$< > $@,"GEN","$(patsubst %-timestamp,%,$@)")
 
 $(obj)/generated-helpers.c: $(obj)/generated-helpers.c-timestamp
 	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
-$(obj)/generated-helpers.c-timestamp: $(trace-events-files) $(BUILD_DIR)/config-host.mak $(tracetool-y)
+$(obj)/generated-helpers.c-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak $(tracetool-y)
 	$(call quiet-command,$(TRACETOOL) \
-		--group=all \
+		--group=root \
 		--format=tcg-helper-c \
 		--backend=$(TRACE_BACKENDS) \
 		$< > $@,"GEN","$(patsubst %-timestamp,%,$@)")
@@ -41,9 +41,9 @@ target-obj-y += generated-helpers.o
 
 $(obj)/generated-tcg-tracers.h: $(obj)/generated-tcg-tracers.h-timestamp
 	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
-$(obj)/generated-tcg-tracers.h-timestamp: $(trace-events-files) $(BUILD_DIR)/config-host.mak $(tracetool-y)
+$(obj)/generated-tcg-tracers.h-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak $(tracetool-y)
 	$(call quiet-command,$(TRACETOOL) \
-		--group=all \
+		--group=root \
 		--format=tcg-h \
 		--backend=$(TRACE_BACKENDS) \
 		$< > $@,"GEN","$(patsubst %-timestamp,%,$@)")
diff --git a/ui/cocoa.m b/ui/cocoa.m
index c81f7b6183..207555edf7 100644
--- a/ui/cocoa.m
+++ b/ui/cocoa.m
@@ -45,7 +45,36 @@
 #ifndef MAC_OS_X_VERSION_10_10
 #define MAC_OS_X_VERSION_10_10 101000
 #endif
+#ifndef MAC_OS_X_VERSION_10_12
+#define MAC_OS_X_VERSION_10_12 101200
+#endif
 
+/* macOS 10.12 deprecated many constants, #define the new names for older SDKs */
+#if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_12
+#define NSEventMaskAny                  NSAnyEventMask
+#define NSEventModifierFlagCommand      NSCommandKeyMask
+#define NSEventModifierFlagControl      NSControlKeyMask
+#define NSEventModifierFlagOption       NSAlternateKeyMask
+#define NSEventTypeFlagsChanged         NSFlagsChanged
+#define NSEventTypeKeyUp                NSKeyUp
+#define NSEventTypeKeyDown              NSKeyDown
+#define NSEventTypeMouseMoved           NSMouseMoved
+#define NSEventTypeLeftMouseDown        NSLeftMouseDown
+#define NSEventTypeRightMouseDown       NSRightMouseDown
+#define NSEventTypeOtherMouseDown       NSOtherMouseDown
+#define NSEventTypeLeftMouseDragged     NSLeftMouseDragged
+#define NSEventTypeRightMouseDragged    NSRightMouseDragged
+#define NSEventTypeOtherMouseDragged    NSOtherMouseDragged
+#define NSEventTypeLeftMouseUp          NSLeftMouseUp
+#define NSEventTypeRightMouseUp         NSRightMouseUp
+#define NSEventTypeOtherMouseUp         NSOtherMouseUp
+#define NSEventTypeScrollWheel          NSScrollWheel
+#define NSTextAlignmentCenter           NSCenterTextAlignment
+#define NSWindowStyleMaskBorderless     NSBorderlessWindowMask
+#define NSWindowStyleMaskClosable       NSClosableWindowMask
+#define NSWindowStyleMaskMiniaturizable NSMiniaturizableWindowMask
+#define NSWindowStyleMaskTitled         NSTitledWindowMask
+#endif
 
 //#define DEBUG
 
@@ -494,7 +523,7 @@ QemuCocoaView *cocoaView;
         } else {
             [NSMenu setMenuBarVisible:NO];
             fullScreenWindow = [[NSWindow alloc] initWithContentRect:[[NSScreen mainScreen] frame]
-                styleMask:NSBorderlessWindowMask
+                styleMask:NSWindowStyleMaskBorderless
                 backing:NSBackingStoreBuffered
                 defer:NO];
             [fullScreenWindow setAcceptsMouseMovedEvents: YES];
@@ -517,7 +546,7 @@ QemuCocoaView *cocoaView;
     NSPoint p = [event locationInWindow];
 
     switch ([event type]) {
-        case NSFlagsChanged:
+        case NSEventTypeFlagsChanged:
             keycode = cocoa_keycode_to_qemu([event keyCode]);
 
             if ((keycode == Q_KEY_CODE_META_L || keycode == Q_KEY_CODE_META_R)
@@ -544,15 +573,15 @@ QemuCocoaView *cocoaView;
             }
 
             // release Mouse grab when pressing ctrl+alt
-            if (([event modifierFlags] & NSControlKeyMask) && ([event modifierFlags] & NSAlternateKeyMask)) {
+            if (([event modifierFlags] & NSEventModifierFlagControl) && ([event modifierFlags] & NSEventModifierFlagOption)) {
                 [self ungrabMouse];
             }
             break;
-        case NSKeyDown:
+        case NSEventTypeKeyDown:
             keycode = cocoa_keycode_to_qemu([event keyCode]);
 
             // forward command key combos to the host UI unless the mouse is grabbed
-            if (!isMouseGrabbed && ([event modifierFlags] & NSCommandKeyMask)) {
+            if (!isMouseGrabbed && ([event modifierFlags] & NSEventModifierFlagCommand)) {
                 [NSApp sendEvent:event];
                 return;
             }
@@ -560,7 +589,7 @@ QemuCocoaView *cocoaView;
             // default
 
             // handle control + alt Key Combos (ctrl+alt is reserved for QEMU)
-            if (([event modifierFlags] & NSControlKeyMask) && ([event modifierFlags] & NSAlternateKeyMask)) {
+            if (([event modifierFlags] & NSEventModifierFlagControl) && ([event modifierFlags] & NSEventModifierFlagOption)) {
                 switch (keycode) {
 
                     // enable graphic console
@@ -609,12 +638,12 @@ QemuCocoaView *cocoaView;
                     kbd_put_keysym(keysym);
             }
             break;
-        case NSKeyUp:
+        case NSEventTypeKeyUp:
             keycode = cocoa_keycode_to_qemu([event keyCode]);
 
             // don't pass the guest a spurious key-up if we treated this
             // command-key combo as a host UI action
-            if (!isMouseGrabbed && ([event modifierFlags] & NSCommandKeyMask)) {
+            if (!isMouseGrabbed && ([event modifierFlags] & NSEventModifierFlagCommand)) {
                 return;
             }
 
@@ -622,7 +651,7 @@ QemuCocoaView *cocoaView;
                 qemu_input_event_send_key_qcode(dcl->con, keycode, false);
             }
             break;
-        case NSMouseMoved:
+        case NSEventTypeMouseMoved:
             if (isAbsoluteEnabled) {
                 if (![self screenContainsPoint:p] || ![[self window] isKeyWindow]) {
                     if (isMouseGrabbed) {
@@ -636,39 +665,39 @@ QemuCocoaView *cocoaView;
             }
             mouse_event = true;
             break;
-        case NSLeftMouseDown:
-            if ([event modifierFlags] & NSCommandKeyMask) {
+        case NSEventTypeLeftMouseDown:
+            if ([event modifierFlags] & NSEventModifierFlagCommand) {
                 buttons |= MOUSE_EVENT_RBUTTON;
             } else {
                 buttons |= MOUSE_EVENT_LBUTTON;
             }
             mouse_event = true;
             break;
-        case NSRightMouseDown:
+        case NSEventTypeRightMouseDown:
             buttons |= MOUSE_EVENT_RBUTTON;
             mouse_event = true;
             break;
-        case NSOtherMouseDown:
+        case NSEventTypeOtherMouseDown:
             buttons |= MOUSE_EVENT_MBUTTON;
             mouse_event = true;
             break;
-        case NSLeftMouseDragged:
-            if ([event modifierFlags] & NSCommandKeyMask) {
+        case NSEventTypeLeftMouseDragged:
+            if ([event modifierFlags] & NSEventModifierFlagCommand) {
                 buttons |= MOUSE_EVENT_RBUTTON;
             } else {
                 buttons |= MOUSE_EVENT_LBUTTON;
             }
             mouse_event = true;
             break;
-        case NSRightMouseDragged:
+        case NSEventTypeRightMouseDragged:
             buttons |= MOUSE_EVENT_RBUTTON;
             mouse_event = true;
             break;
-        case NSOtherMouseDragged:
+        case NSEventTypeOtherMouseDragged:
             buttons |= MOUSE_EVENT_MBUTTON;
             mouse_event = true;
             break;
-        case NSLeftMouseUp:
+        case NSEventTypeLeftMouseUp:
             mouse_event = true;
             if (!isMouseGrabbed && [self screenContainsPoint:p]) {
                 if([[self window] isKeyWindow]) {
@@ -676,13 +705,13 @@ QemuCocoaView *cocoaView;
                 }
             }
             break;
-        case NSRightMouseUp:
+        case NSEventTypeRightMouseUp:
             mouse_event = true;
             break;
-        case NSOtherMouseUp:
+        case NSEventTypeOtherMouseUp:
             mouse_event = true;
             break;
-        case NSScrollWheel:
+        case NSEventTypeScrollWheel:
             if (isMouseGrabbed) {
                 buttons |= ([event deltaY] < 0) ?
                     MOUSE_EVENT_WHEELUP : MOUSE_EVENT_WHEELDN;
@@ -847,7 +876,7 @@ QemuCocoaView *cocoaView;
 
         // create a window
         normalWindow = [[NSWindow alloc] initWithContentRect:[cocoaView frame]
-            styleMask:NSTitledWindowMask|NSMiniaturizableWindowMask|NSClosableWindowMask
+            styleMask:NSWindowStyleMaskTitled|NSWindowStyleMaskMiniaturizable|NSWindowStyleMaskClosable
             backing:NSBackingStoreBuffered defer:NO];
         if(!normalWindow) {
             fprintf(stderr, "(cocoa) can't create window\n");
@@ -1152,8 +1181,8 @@ QemuCocoaView *cocoaView;
     int x = 0, y = 0, about_width = 400, about_height = 200;
     NSRect window_rect = NSMakeRect(x, y, about_width, about_height);
     about_window = [[NSWindow alloc] initWithContentRect:window_rect
-                    styleMask:NSTitledWindowMask | NSClosableWindowMask |
-                    NSMiniaturizableWindowMask
+                    styleMask:NSWindowStyleMaskTitled | NSWindowStyleMaskClosable |
+                    NSWindowStyleMaskMiniaturizable
                     backing:NSBackingStoreBuffered
                     defer:NO];
     [about_window setTitle: @"About"];
@@ -1192,7 +1221,7 @@ QemuCocoaView *cocoaView;
     [name_label setEditable: NO];
     [name_label setBezeled: NO];
     [name_label setDrawsBackground: NO];
-    [name_label setAlignment: NSCenterTextAlignment];
+    [name_label setAlignment: NSTextAlignmentCenter];
     NSString *qemu_name = [[NSString alloc] initWithCString: gArgv[0]
                                             encoding: NSASCIIStringEncoding];
     qemu_name = [qemu_name lastPathComponent];
@@ -1208,7 +1237,7 @@ QemuCocoaView *cocoaView;
                                                       version_rect];
     [version_label setEditable: NO];
     [version_label setBezeled: NO];
-    [version_label setAlignment: NSCenterTextAlignment];
+    [version_label setAlignment: NSTextAlignmentCenter];
     [version_label setDrawsBackground: NO];
 
     /* Create the version string*/
@@ -1228,7 +1257,7 @@ QemuCocoaView *cocoaView;
     [copyright_label setEditable: NO];
     [copyright_label setBezeled: NO];
     [copyright_label setDrawsBackground: NO];
-    [copyright_label setAlignment: NSCenterTextAlignment];
+    [copyright_label setAlignment: NSTextAlignmentCenter];
     [copyright_label setStringValue: [NSString stringWithFormat: @"%s",
                                      QEMU_COPYRIGHT]];
     [superView addSubview: copyright_label];
@@ -1285,7 +1314,7 @@ int main (int argc, const char * argv[]) {
     [menu addItem:[NSMenuItem separatorItem]]; //Separator
     [menu addItemWithTitle:@"Hide QEMU" action:@selector(hide:) keyEquivalent:@"h"]; //Hide QEMU
     menuItem = (NSMenuItem *)[menu addItemWithTitle:@"Hide Others" action:@selector(hideOtherApplications:) keyEquivalent:@"h"]; // Hide Others
-    [menuItem setKeyEquivalentModifierMask:(NSAlternateKeyMask|NSCommandKeyMask)];
+    [menuItem setKeyEquivalentModifierMask:(NSEventModifierFlagOption|NSEventModifierFlagCommand)];
     [menu addItemWithTitle:@"Show All" action:@selector(unhideAllApplications:) keyEquivalent:@""]; // Show All
     [menu addItem:[NSMenuItem separatorItem]]; //Separator
     [menu addItemWithTitle:@"Quit QEMU" action:@selector(terminate:) keyEquivalent:@"q"];
@@ -1399,7 +1428,7 @@ static void cocoa_refresh(DisplayChangeListener *dcl)
     NSEvent *event;
     distantPast = [NSDate distantPast];
     do {
-        event = [NSApp nextEventMatchingMask:NSAnyEventMask untilDate:distantPast
+        event = [NSApp nextEventMatchingMask:NSEventMaskAny untilDate:distantPast
                         inMode: NSDefaultRunLoopMode dequeue:YES];
         if (event != nil) {
             [cocoaView handleEvent:event];
diff --git a/ui/console.c b/ui/console.c
index 937c950840..189eecfd29 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -1410,6 +1410,8 @@ void register_displaychangelistener(DisplayChangeListener *dcl)
     static DisplaySurface *dummy;
     QemuConsole *con;
 
+    assert(!dcl->ds);
+
     if (dcl->ops->dpy_gl_ctx_create) {
         /* display has opengl support */
         assert(dcl->con);
@@ -1538,6 +1540,8 @@ void dpy_gfx_replace_surface(QemuConsole *con,
     DisplaySurface *old_surface = con->surface;
     DisplayChangeListener *dcl;
 
+    assert(old_surface != surface);
+
     con->surface = surface;
     QLIST_FOREACH(dcl, &s->listeners, next) {
         if (con != (dcl->con ? dcl->con : active_console)) {
@@ -1576,17 +1580,22 @@ bool dpy_gfx_check_format(QemuConsole *con,
 }
 
 /*
- * Safe DPY refresh for TCG guests. This runs when the TCG vCPUs are
- * quiescent so we can avoid races between dirty page tracking for
- * direct frame-buffer access by the guest.
+ * Safe DPY refresh for TCG guests. We use the exclusive mechanism to
+ * ensure the TCG vCPUs are quiescent so we can avoid races between
+ * dirty page tracking for direct frame-buffer access by the guest.
  *
  * This is a temporary stopgap until we've fixed the dirty tracking
  * races in display adapters.
  */
-static void do_safe_dpy_refresh(CPUState *cpu, run_on_cpu_data opaque)
+static void do_safe_dpy_refresh(DisplayChangeListener *dcl)
 {
-    DisplayChangeListener *dcl = opaque.host_ptr;
+    qemu_mutex_unlock_iothread();
+    start_exclusive();
+    qemu_mutex_lock_iothread();
     dcl->ops->dpy_refresh(dcl);
+    qemu_mutex_unlock_iothread();
+    end_exclusive();
+    qemu_mutex_lock_iothread();
 }
 
 static void dpy_refresh(DisplayState *s)
@@ -1596,8 +1605,7 @@ static void dpy_refresh(DisplayState *s)
     QLIST_FOREACH(dcl, &s->listeners, next) {
         if (dcl->ops->dpy_refresh) {
             if (tcg_enabled()) {
-                async_safe_run_on_cpu(first_cpu, do_safe_dpy_refresh,
-                                      RUN_ON_CPU_HOST_PTR(dcl));
+                do_safe_dpy_refresh(dcl);
             } else {
                 dcl->ops->dpy_refresh(dcl);
             }
diff --git a/ui/egl-helpers.c b/ui/egl-helpers.c
index 584dd1b04d..b7b6b2e3cc 100644
--- a/ui/egl-helpers.c
+++ b/ui/egl-helpers.c
@@ -192,6 +192,56 @@ EGLSurface qemu_egl_init_surface_x11(EGLContext ectx, Window win)
 
 /* ---------------------------------------------------------------------- */
 
+/*
+ * Taken from glamor_egl.h from the Xorg xserver, which is MIT licensed
+ *
+ * Create an EGLDisplay from a native display type. This is a little quirky
+ * for a few reasons.
+ *
+ * 1: GetPlatformDisplayEXT and GetPlatformDisplay are the API you want to
+ * use, but have different function signatures in the third argument; this
+ * happens not to matter for us, at the moment, but it means epoxy won't alias
+ * them together.
+ *
+ * 2: epoxy 1.3 and earlier don't understand EGL client extensions, which
+ * means you can't call "eglGetPlatformDisplayEXT" directly, as the resolver
+ * will crash.
+ *
+ * 3: You can't tell whether you have EGL 1.5 at this point, because
+ * eglQueryString(EGL_VERSION) is a property of the display, which we don't
+ * have yet. So you have to query for extensions no matter what. Fortunately
+ * epoxy_has_egl_extension _does_ let you query for client extensions, so
+ * we don't have to write our own extension string parsing.
+ *
+ * 4. There is no EGL_KHR_platform_base to complement the EXT one, thus one
+ * needs to know EGL 1.5 is supported in order to use the eglGetPlatformDisplay
+ * function pointer.
+ * We can workaround this (circular dependency) by probing for the EGL 1.5
+ * platform extensions (EGL_KHR_platform_gbm and friends) yet it doesn't seem
+ * like mesa will be able to advertise these (even though it can do EGL 1.5).
+ */
+static EGLDisplay qemu_egl_get_display(void *native)
+{
+    EGLDisplay dpy = EGL_NO_DISPLAY;
+
+#ifdef EGL_MESA_platform_gbm
+    /* In practise any EGL 1.5 implementation would support the EXT extension */
+    if (epoxy_has_egl_extension(NULL, "EGL_EXT_platform_base")) {
+        PFNEGLGETPLATFORMDISPLAYEXTPROC getPlatformDisplayEXT =
+            (void *) eglGetProcAddress("eglGetPlatformDisplayEXT");
+        if (getPlatformDisplayEXT) {
+            dpy = getPlatformDisplayEXT(EGL_PLATFORM_GBM_MESA, native, NULL);
+        }
+    }
+#endif
+
+    if (dpy == EGL_NO_DISPLAY) {
+        /* fallback */
+        dpy = eglGetDisplay(native);
+    }
+    return dpy;
+}
+
 int qemu_egl_init_dpy(EGLNativeDisplayType dpy, bool gles, bool debug)
 {
     static const EGLint conf_att_gl[] = {
@@ -222,12 +272,8 @@ int qemu_egl_init_dpy(EGLNativeDisplayType dpy, bool gles, bool debug)
         setenv("LIBGL_DEBUG", "verbose", true);
     }
 
-    egl_dbg("eglGetDisplay (dpy %p) ...\n", dpy);
-#ifdef EGL_MESA_platform_gbm
-    qemu_egl_display = eglGetPlatformDisplayEXT(EGL_PLATFORM_GBM_MESA, dpy, NULL);
-#else
-    qemu_egl_display = eglGetDisplay(dpy);
-#endif
+    egl_dbg("qemu_egl_get_display (dpy %p) ...\n", dpy);
+    qemu_egl_display = qemu_egl_get_display(dpy);
     if (qemu_egl_display == EGL_NO_DISPLAY) {
         error_report("egl: eglGetDisplay failed");
         return -1;
diff --git a/ui/input-linux.c b/ui/input-linux.c
index ac31f47719..dc0613ca1f 100644
--- a/ui/input-linux.c
+++ b/ui/input-linux.c
@@ -169,6 +169,8 @@ struct InputLinux {
     bool        has_abs_x;
     int         num_keys;
     int         num_btns;
+    struct input_event event;
+    int         read_offset;
 
     QTAILQ_ENTRY(InputLinux) next;
 };
@@ -327,25 +329,30 @@ static void input_linux_handle_mouse(InputLinux *il, struct input_event *event)
 static void input_linux_event(void *opaque)
 {
     InputLinux *il = opaque;
-    struct input_event event;
     int rc;
+    int read_size;
+    uint8_t *p = (uint8_t *)&il->event;
 
     for (;;) {
-        rc = read(il->fd, &event, sizeof(event));
-        if (rc != sizeof(event)) {
+        read_size = sizeof(il->event) - il->read_offset;
+        rc = read(il->fd, &p[il->read_offset], read_size);
+        if (rc != read_size) {
             if (rc < 0 && errno != EAGAIN) {
                 fprintf(stderr, "%s: read: %s\n", __func__, strerror(errno));
                 qemu_set_fd_handler(il->fd, NULL, NULL, NULL);
                 close(il->fd);
+            } else if (rc > 0) {
+                il->read_offset += rc;
             }
             break;
         }
+        il->read_offset = 0;
 
         if (il->num_keys) {
-            input_linux_handle_keyboard(il, &event);
+            input_linux_handle_keyboard(il, &il->event);
         }
         if (il->has_rel_x && il->num_btns) {
-            input_linux_handle_mouse(il, &event);
+            input_linux_handle_mouse(il, &il->event);
         }
     }
 }
diff --git a/ui/vnc.c b/ui/vnc.c
index 6e93b883b5..349cfc9d86 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -129,10 +129,13 @@ static void vnc_init_basic_info(SocketAddress *addr,
         info->family = NETWORK_ADDRESS_FAMILY_UNIX;
         break;
 
-    default:
-        error_setg(errp, "Unsupported socket kind %d",
-                   addr->type);
+    case SOCKET_ADDRESS_KIND_VSOCK:
+    case SOCKET_ADDRESS_KIND_FD:
+        error_setg(errp, "Unsupported socket address type %s",
+                   SocketAddressKind_lookup[addr->type]);
         break;
+    default:
+        abort();
     }
 
     return;
@@ -411,10 +414,13 @@ VncInfo *qmp_query_vnc(Error **errp)
             info->family = NETWORK_ADDRESS_FAMILY_UNIX;
             break;
 
-        default:
-            error_setg(errp, "Unsupported socket kind %d",
-                       addr->type);
+        case SOCKET_ADDRESS_KIND_VSOCK:
+        case SOCKET_ADDRESS_KIND_FD:
+            error_setg(errp, "Unsupported socket address type %s",
+                       SocketAddressKind_lookup[addr->type]);
             goto out_error;
+        default:
+            abort();
         }
 
         info->has_host = true;
@@ -3401,6 +3407,7 @@ vnc_display_create_creds(bool x509,
 
 static int vnc_display_get_address(const char *addrstr,
                                    bool websocket,
+                                   bool reverse,
                                    int displaynum,
                                    int to,
                                    bool has_ipv4,
@@ -3480,21 +3487,22 @@ static int vnc_display_get_address(const char *addrstr,
                 inet->port = g_strdup(port);
             }
         } else {
+            int offset = reverse ? 0 : 5900;
             if (parse_uint_full(port, &baseport, 10) < 0) {
                 error_setg(errp, "can't convert to a number: %s", port);
                 goto cleanup;
             }
             if (baseport > 65535 ||
-                baseport + 5900 > 65535) {
+                baseport + offset > 65535) {
                 error_setg(errp, "port %s out of range", port);
                 goto cleanup;
             }
             inet->port = g_strdup_printf(
-                "%d", (int)baseport + 5900);
+                "%d", (int)baseport + offset);
 
             if (to) {
                 inet->has_to = true;
-                inet->to = to + 5900;
+                inet->to = to + offset;
             }
         }
 
@@ -3516,6 +3524,7 @@ static int vnc_display_get_address(const char *addrstr,
 }
 
 static int vnc_display_get_addresses(QemuOpts *opts,
+                                     bool reverse,
                                      SocketAddress ***retsaddr,
                                      size_t *retnsaddr,
                                      SocketAddress ***retwsaddr,
@@ -3555,7 +3564,7 @@ static int vnc_display_get_addresses(QemuOpts *opts,
     qemu_opt_iter_init(&addriter, opts, "vnc");
     while ((addr = qemu_opt_iter_next(&addriter)) != NULL) {
         int rv;
-        rv = vnc_display_get_address(addr, false, 0, to,
+        rv = vnc_display_get_address(addr, false, reverse, 0, to,
                                      has_ipv4, has_ipv6,
                                      ipv4, ipv6,
                                      &saddr, errp);
@@ -3580,7 +3589,7 @@ static int vnc_display_get_addresses(QemuOpts *opts,
 
     qemu_opt_iter_init(&addriter, opts, "websocket");
     while ((addr = qemu_opt_iter_next(&addriter)) != NULL) {
-        if (vnc_display_get_address(addr, true, displaynum, to,
+        if (vnc_display_get_address(addr, true, reverse, displaynum, to,
                                     has_ipv4, has_ipv6,
                                     ipv4, ipv6,
                                     &wsaddr, errp) < 0) {
@@ -3639,6 +3648,7 @@ static int vnc_display_connect(VncDisplay *vd,
         error_setg(errp, "Expected a single address in reverse mode");
         return -1;
     }
+    /* TODO SOCKET_ADDRESS_KIND_FD when fd has AF_UNIX */
     vd->is_unix = saddr[0]->type == SOCKET_ADDRESS_KIND_UNIX;
     sioc = qio_channel_socket_new();
     qio_channel_set_name(QIO_CHANNEL(sioc), "vnc-reverse");
@@ -3777,15 +3787,12 @@ void vnc_display_open(const char *id, Error **errp)
         return;
     }
 
-    if (vnc_display_get_addresses(opts, &saddr, &nsaddr,
+    reverse = qemu_opt_get_bool(opts, "reverse", false);
+    if (vnc_display_get_addresses(opts, reverse, &saddr, &nsaddr,
                                   &wsaddr, &nwsaddr, errp) < 0) {
         goto fail;
     }
 
-    if (saddr == NULL) {
-        return;
-    }
-
     password = qemu_opt_get_bool(opts, "password", false);
     if (password) {
         if (fips_get_state()) {
@@ -3803,7 +3810,6 @@ void vnc_display_open(const char *id, Error **errp)
         }
     }
 
-    reverse = qemu_opt_get_bool(opts, "reverse", false);
     lock_key_sync = qemu_opt_get_bool(opts, "lock-key-sync", true);
     key_delay_ms = qemu_opt_get_number(opts, "key-delay-ms", 1);
     sasl = qemu_opt_get_bool(opts, "sasl", false);
@@ -3971,6 +3977,10 @@ void vnc_display_open(const char *id, Error **errp)
         register_displaychangelistener(&vd->dcl);
     }
 
+    if (saddr == NULL) {
+        goto cleanup;
+    }
+
     if (reverse) {
         if (vnc_display_connect(vd, saddr, nsaddr, wsaddr, nwsaddr, errp) < 0) {
             goto fail;
diff --git a/user-exec.c b/user-exec.c
index 6db075884d..a8f95fa1e1 100644
--- a/user-exec.c
+++ b/user-exec.c
@@ -57,10 +57,23 @@ static void cpu_exit_tb_from_sighandler(CPUState *cpu, sigset_t *old_set)
 static inline int handle_cpu_signal(uintptr_t pc, unsigned long address,
                                     int is_write, sigset_t *old_set)
 {
-    CPUState *cpu;
+    CPUState *cpu = current_cpu;
     CPUClass *cc;
     int ret;
 
+    /* For synchronous signals we expect to be coming from the vCPU
+     * thread (so current_cpu should be valid) and either from running
+     * code or during translation which can fault as we cross pages.
+     *
+     * If neither is true then something has gone wrong and we should
+     * abort rather than try and restart the vCPU execution.
+     */
+    if (!cpu || !cpu->running) {
+        printf("qemu:%s received signal outside vCPU context @ pc=0x%"
+               PRIxPTR "\n",  __func__, pc);
+        abort();
+    }
+
 #if defined(DEBUG_SIGNAL)
     printf("qemu: SIGSEGV pc=0x%08lx address=%08lx w=%d oldset=0x%08lx\n",
            pc, address, is_write, *(unsigned long *)old_set);
@@ -83,7 +96,7 @@ static inline int handle_cpu_signal(uintptr_t pc, unsigned long address,
              * currently executing TB was modified and must be exited
              * immediately.
              */
-            cpu_exit_tb_from_sighandler(current_cpu, old_set);
+            cpu_exit_tb_from_sighandler(cpu, old_set);
             g_assert_not_reached();
         default:
             g_assert_not_reached();
@@ -94,7 +107,6 @@ static inline int handle_cpu_signal(uintptr_t pc, unsigned long address,
        are still valid segv ones */
     address = h2g_nocheck(address);
 
-    cpu = current_cpu;
     cc = CPU_GET_CLASS(cpu);
     /* see if it is an MMU fault */
     g_assert(cc->handle_mmu_fault);
diff --git a/util/async.c b/util/async.c
index 663e297e1f..355af73ee7 100644
--- a/util/async.c
+++ b/util/async.c
@@ -453,6 +453,11 @@ void aio_co_wake(struct Coroutine *co)
     smp_read_barrier_depends();
     ctx = atomic_read(&co->ctx);
 
+    aio_co_enter(ctx, co);
+}
+
+void aio_co_enter(AioContext *ctx, struct Coroutine *co)
+{
     if (ctx != qemu_get_current_aio_context()) {
         aio_co_schedule(ctx, co);
         return;
@@ -464,7 +469,7 @@ void aio_co_wake(struct Coroutine *co)
         QSIMPLEQ_INSERT_TAIL(&self->co_queue_wakeup, co, co_queue_next);
     } else {
         aio_context_acquire(ctx);
-        qemu_coroutine_enter(co);
+        qemu_aio_coroutine_enter(ctx, co);
         aio_context_release(ctx);
     }
 }
diff --git a/util/bitmap.c b/util/bitmap.c
index c1a84ca5e3..efced9a7d8 100644
--- a/util/bitmap.c
+++ b/util/bitmap.c
@@ -287,6 +287,17 @@ bool bitmap_test_and_clear_atomic(unsigned long *map, long start, long nr)
     return dirty != 0;
 }
 
+void bitmap_copy_and_clear_atomic(unsigned long *dst, unsigned long *src,
+                                  long nr)
+{
+    while (nr > 0) {
+        *dst = atomic_xchg(src, 0);
+        dst++;
+        src++;
+        nr -= BITS_PER_LONG;
+    }
+}
+
 #define ALIGN_MASK(x,mask)      (((x)+(mask))&~(mask))
 
 /**
diff --git a/util/error.c b/util/error.c
index 9c40b1f458..020b86b9f0 100644
--- a/util/error.c
+++ b/util/error.c
@@ -134,6 +134,7 @@ void error_vprepend(Error **errp, const char *fmt, va_list ap)
     newmsg = g_string_new(NULL);
     g_string_vprintf(newmsg, fmt, ap);
     g_string_append(newmsg, (*errp)->msg);
+    g_free((*errp)->msg);
     (*errp)->msg = g_string_free(newmsg, 0);
 }
 
diff --git a/util/event_notifier-posix.c b/util/event_notifier-posix.c
index 7e40252ade..acdbe3b483 100644
--- a/util/event_notifier-posix.c
+++ b/util/event_notifier-posix.c
@@ -81,8 +81,10 @@ void event_notifier_cleanup(EventNotifier *e)
 {
     if (e->rfd != e->wfd) {
         close(e->rfd);
+        e->rfd = -1;
     }
     close(e->wfd);
+    e->wfd = -1;
 }
 
 int event_notifier_get_fd(const EventNotifier *e)
diff --git a/util/event_notifier-win32.c b/util/event_notifier-win32.c
index 519fb59123..62c53b0a99 100644
--- a/util/event_notifier-win32.c
+++ b/util/event_notifier-win32.c
@@ -25,6 +25,7 @@ int event_notifier_init(EventNotifier *e, int active)
 void event_notifier_cleanup(EventNotifier *e)
 {
     CloseHandle(e->event);
+    e->event = NULL;
 }
 
 HANDLE event_notifier_get_handle(EventNotifier *e)
diff --git a/util/keyval.c b/util/keyval.c
index f646b36821..93d5db6b59 100644
--- a/util/keyval.c
+++ b/util/keyval.c
@@ -21,22 +21,36 @@
  *
  * Semantics defined by reduction to JSON:
  *
- *   key-vals is a tree of objects and arrays rooted at object R
- *   where for each key-val = key-fragment . ... = val in key-vals
- *       R op key-fragment op ... = val'
- *       where (left-associative) op is
- *                 array subscript L[key-fragment] for numeric key-fragment
- *                 member reference L.key-fragment otherwise
- *             val' is val with ',,' replaced by ','
- *   and only R may be empty.
+ *   key-vals specifies a JSON object, i.e. a tree whose root is an
+ *   object, inner nodes other than the root are objects or arrays,
+ *   and leaves are strings.
  *
- *   Duplicate keys are permitted; all but the last one are ignored.
+ *   Each key-val = key-fragment '.' ... '=' val specifies a path from
+ *   root to a leaf (left of '='), and the leaf's value (right of
+ *   '=').
  *
- *   The equations must have a solution.  Counter-example: a.b=1,a=2
- *   doesn't have one, because R.a must be an object to satisfy a.b=1
- *   and a string to satisfy a=2.
+ *   A path from the root is defined recursively:
+ *       L '.' key-fragment is a child of the node denoted by path L
+ *       key-fragment is a child of the tree root
+ *   If key-fragment is numeric, the parent is an array and the child
+ *   is its key-fragment-th member, counting from zero.
+ *   Else, the parent is an object, and the child is its member named
+ *   key-fragment.
  *
- * Key-fragments must be valid QAPI names or consist only of digits.
+ *   This constrains inner nodes to be either array or object.  The
+ *   constraints must be satisfiable.  Counter-example: a.b=1,a=2 is
+ *   not, because root.a must be an object to satisfy a.b=1 and a
+ *   string to satisfy a=2.
+ *
+ *   Array subscripts can occur in any order, but the set of
+ *   subscripts must not have gaps.  For instance, a.1=v is not okay,
+ *   because root.a[0] is missing.
+ *
+ *   If multiple key-val denote the same leaf, the last one determines
+ *   the value.
+ *
+ * Key-fragments must be valid QAPI names or consist only of decimal
+ * digits.
  *
  * The length of any key-fragment must be between 1 and 127.
  *
@@ -47,6 +61,16 @@
  * "key absent" already means "optional object/array absent", which
  * isn't the same as "empty object/array present".
  *
+ * Design flaw: scalar values can only be strings; there is no way to
+ * denote numbers, true, false or null.  The special QObject input
+ * visitor returned by qobject_input_visitor_new_keyval() mostly hides
+ * this by automatically converting strings to the type the visitor
+ * expects.  Breaks down for alternate types and type 'any', where the
+ * visitor's expectation isn't clear.  Code visiting such types needs
+ * to do the conversion itself, but only when using this keyval
+ * visitor.  Awkward.  Alternate types without a string member don't
+ * work at all.
+ *
  * Additional syntax for use with an implied key:
  *
  *   key-vals-ik  = val-no-key [ ',' key-vals ]
@@ -64,8 +88,8 @@
 
 /*
  * Convert @key to a list index.
- * Convert all leading digits to a (non-negative) number, capped at
- * INT_MAX.
+ * Convert all leading decimal digits to a (non-negative) number,
+ * capped at INT_MAX.
  * If @end is non-null, assign a pointer to the first character after
  * the number to *@end.
  * Else, fail if any characters follow.
@@ -337,7 +361,8 @@ static QObject *keyval_listify(QDict *cur, GSList *key_of_cur, Error **errp)
     }
 
     /*
-     * Make a list from @elt[], reporting any missing elements.
+     * Make a list from @elt[], reporting the first missing element,
+     * if any.
      * If we dropped an index >= nelt in the previous loop, this loop
      * will run into the sentinel and report index @nelt missing.
      */
diff --git a/util/main-loop.c b/util/main-loop.c
index 4534c89308..19cad6b8b6 100644
--- a/util/main-loop.c
+++ b/util/main-loop.c
@@ -218,9 +218,12 @@ static void glib_pollfds_poll(void)
 
 static int os_host_main_loop_wait(int64_t timeout)
 {
+    GMainContext *context = g_main_context_default();
     int ret;
     static int spin_counter;
 
+    g_main_context_acquire(context);
+
     glib_pollfds_fill(&timeout);
 
     /* If the I/O thread is very busy or we are incorrectly busy waiting in
@@ -256,6 +259,9 @@ static int os_host_main_loop_wait(int64_t timeout)
     }
 
     glib_pollfds_poll();
+
+    g_main_context_release(context);
+
     return ret;
 }
 #else
@@ -412,12 +418,15 @@ static int os_host_main_loop_wait(int64_t timeout)
     fd_set rfds, wfds, xfds;
     int nfds;
 
+    g_main_context_acquire(context);
+
     /* XXX: need to suppress polling by better using win32 events */
     ret = 0;
     for (pe = first_polling_entry; pe != NULL; pe = pe->next) {
         ret |= pe->func(pe->opaque);
     }
     if (ret != 0) {
+        g_main_context_release(context);
         return ret;
     }
 
@@ -472,6 +481,8 @@ static int os_host_main_loop_wait(int64_t timeout)
         g_main_context_dispatch(context);
     }
 
+    g_main_context_release(context);
+
     return select_ret || g_poll_ret;
 }
 #endif
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 3fe6089c3e..4d9189e9ef 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -55,7 +55,7 @@
 #include "qemu/error-report.h"
 #endif
 
-#define MAX_MEM_PREALLOC_THREAD_COUNT (MIN(sysconf(_SC_NPROCESSORS_ONLN), 16))
+#define MAX_MEM_PREALLOC_THREAD_COUNT 16
 
 struct MemsetThread {
     char *addr;
@@ -381,6 +381,18 @@ static void *do_touch_pages(void *arg)
     return NULL;
 }
 
+static inline int get_memset_num_threads(int smp_cpus)
+{
+    long host_procs = sysconf(_SC_NPROCESSORS_ONLN);
+    int ret = 1;
+
+    if (host_procs > 0) {
+        ret = MIN(MIN(host_procs, MAX_MEM_PREALLOC_THREAD_COUNT), smp_cpus);
+    }
+    /* In case sysconf() fails, we fall back to single threaded */
+    return ret;
+}
+
 static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages,
                             int smp_cpus)
 {
@@ -389,7 +401,7 @@ static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages,
     int i = 0;
 
     memset_thread_failed = false;
-    memset_num_threads = MIN(smp_cpus, MAX_MEM_PREALLOC_THREAD_COUNT);
+    memset_num_threads = get_memset_num_threads(smp_cpus);
     memset_thread = g_new0(MemsetThread, memset_num_threads);
     numpages_per_thread = (numpages / memset_num_threads);
     size_per_thread = (hpagesize * numpages_per_thread);
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
index 72412e5649..486af9a622 100644
--- a/util/qemu-coroutine.c
+++ b/util/qemu-coroutine.c
@@ -102,12 +102,12 @@ static void coroutine_delete(Coroutine *co)
     qemu_coroutine_delete(co);
 }
 
-void qemu_coroutine_enter(Coroutine *co)
+void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co)
 {
     Coroutine *self = qemu_coroutine_self();
     CoroutineAction ret;
 
-    trace_qemu_coroutine_enter(self, co, co->entry_arg);
+    trace_qemu_aio_coroutine_enter(ctx, self, co, co->entry_arg);
 
     if (co->caller) {
         fprintf(stderr, "Co-routine re-entered recursively\n");
@@ -115,7 +115,7 @@ void qemu_coroutine_enter(Coroutine *co)
     }
 
     co->caller = self;
-    co->ctx = qemu_get_current_aio_context();
+    co->ctx = ctx;
 
     /* Store co->ctx before anything that stores co.  Matches
      * barrier in aio_co_wake and qemu_co_mutex_wake.
@@ -139,6 +139,11 @@ void qemu_coroutine_enter(Coroutine *co)
     }
 }
 
+void qemu_coroutine_enter(Coroutine *co)
+{
+    qemu_aio_coroutine_enter(qemu_get_current_aio_context(), co);
+}
+
 void qemu_coroutine_enter_if_inactive(Coroutine *co)
 {
     if (!qemu_coroutine_entered(co)) {
diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
index 7c120c45ce..8188d9a8d7 100644
--- a/util/qemu-sockets.c
+++ b/util/qemu-sockets.c
@@ -25,6 +25,7 @@
 #include "qapi/error.h"
 #include "qemu/sockets.h"
 #include "qemu/main-loop.h"
+#include "qapi/clone-visitor.h"
 #include "qapi/qobject-input-visitor.h"
 #include "qapi/qobject-output-visitor.h"
 #include "qapi-visit.h"
@@ -426,8 +427,9 @@ static struct addrinfo *inet_parse_connect_saddr(InetSocketAddress *saddr,
  * function succeeds, callback will be called when the connection
  * completes, with the file descriptor on success, or -1 on error.
  */
-int inet_connect_saddr(InetSocketAddress *saddr, Error **errp,
-                       NonBlockingConnectHandler *callback, void *opaque)
+int inet_connect_saddr(InetSocketAddress *saddr,
+                       NonBlockingConnectHandler *callback, void *opaque,
+                       Error **errp)
 {
     Error *local_err = NULL;
     struct addrinfo *res, *e;
@@ -658,7 +660,7 @@ int inet_connect(const char *str, Error **errp)
 
     addr = inet_parse(str, errp);
     if (addr != NULL) {
-        sock = inet_connect_saddr(addr, errp, NULL, NULL);
+        sock = inet_connect_saddr(addr, NULL, NULL, errp);
         qapi_free_InetSocketAddress(addr);
     }
     return sock;
@@ -726,9 +728,10 @@ static int vsock_connect_addr(const struct sockaddr_vm *svm, bool *in_progress,
     return sock;
 }
 
-static int vsock_connect_saddr(VsockSocketAddress *vaddr, Error **errp,
+static int vsock_connect_saddr(VsockSocketAddress *vaddr,
                                NonBlockingConnectHandler *callback,
-                               void *opaque)
+                               void *opaque,
+                               Error **errp)
 {
     struct sockaddr_vm svm;
     int sock = -1;
@@ -817,9 +820,9 @@ static void vsock_unsupported(Error **errp)
     error_setg(errp, "socket family AF_VSOCK unsupported");
 }
 
-static int vsock_connect_saddr(VsockSocketAddress *vaddr, Error **errp,
+static int vsock_connect_saddr(VsockSocketAddress *vaddr,
                                NonBlockingConnectHandler *callback,
-                               void *opaque)
+                               void *opaque, Error **errp)
 {
     vsock_unsupported(errp);
     return -1;
@@ -909,8 +912,9 @@ err:
     return -1;
 }
 
-static int unix_connect_saddr(UnixSocketAddress *saddr, Error **errp,
-                              NonBlockingConnectHandler *callback, void *opaque)
+static int unix_connect_saddr(UnixSocketAddress *saddr,
+                              NonBlockingConnectHandler *callback, void *opaque,
+                              Error **errp)
 {
     struct sockaddr_un un;
     ConnectState *connect_state = NULL;
@@ -977,8 +981,9 @@ static int unix_listen_saddr(UnixSocketAddress *saddr,
     return -1;
 }
 
-static int unix_connect_saddr(UnixSocketAddress *saddr, Error **errp,
-                              NonBlockingConnectHandler *callback, void *opaque)
+static int unix_connect_saddr(UnixSocketAddress *saddr,
+                              NonBlockingConnectHandler *callback, void *opaque,
+                              Error **errp)
 {
     error_setg(errp, "unix sockets are not available on windows");
     errno = ENOTSUP;
@@ -1024,7 +1029,7 @@ int unix_connect(const char *path, Error **errp)
 
     saddr = g_new0(UnixSocketAddress, 1);
     saddr->path = g_strdup(path);
-    sock = unix_connect_saddr(saddr, errp, NULL, NULL);
+    sock = unix_connect_saddr(saddr, NULL, NULL, errp);
     qapi_free_UnixSocketAddress(saddr);
     return sock;
 }
@@ -1073,18 +1078,18 @@ fail:
     return NULL;
 }
 
-int socket_connect(SocketAddress *addr, Error **errp,
-                   NonBlockingConnectHandler *callback, void *opaque)
+int socket_connect(SocketAddress *addr, NonBlockingConnectHandler *callback,
+                   void *opaque, Error **errp)
 {
     int fd;
 
     switch (addr->type) {
     case SOCKET_ADDRESS_KIND_INET:
-        fd = inet_connect_saddr(addr->u.inet.data, errp, callback, opaque);
+        fd = inet_connect_saddr(addr->u.inet.data, callback, opaque, errp);
         break;
 
     case SOCKET_ADDRESS_KIND_UNIX:
-        fd = unix_connect_saddr(addr->u.q_unix.data, errp, callback, opaque);
+        fd = unix_connect_saddr(addr->u.q_unix.data, callback, opaque, errp);
         break;
 
     case SOCKET_ADDRESS_KIND_FD:
@@ -1096,7 +1101,7 @@ int socket_connect(SocketAddress *addr, Error **errp,
         break;
 
     case SOCKET_ADDRESS_KIND_VSOCK:
-        fd = vsock_connect_saddr(addr->u.vsock.data, errp, callback, opaque);
+        fd = vsock_connect_saddr(addr->u.vsock.data, callback, opaque, errp);
         break;
 
     default:
@@ -1154,6 +1159,10 @@ int socket_dgram(SocketAddress *remote, SocketAddress *local, Error **errp)
 {
     int fd;
 
+    /*
+     * TODO SOCKET_ADDRESS_KIND_FD when fd is AF_INET or AF_INET6
+     * (although other address families can do SOCK_DGRAM, too)
+     */
     switch (remote->type) {
     case SOCKET_ADDRESS_KIND_INET:
         fd = inet_dgram_saddr(remote->u.inet.data,
@@ -1307,19 +1316,14 @@ char *socket_address_to_string(struct SocketAddress *addr, Error **errp)
 {
     char *buf;
     InetSocketAddress *inet;
-    char host_port[INET6_ADDRSTRLEN + 5 + 4];
 
     switch (addr->type) {
     case SOCKET_ADDRESS_KIND_INET:
         inet = addr->u.inet.data;
         if (strchr(inet->host, ':') == NULL) {
-            snprintf(host_port, sizeof(host_port), "%s:%s", inet->host,
-                    inet->port);
-            buf = g_strdup(host_port);
+            buf = g_strdup_printf("%s:%s", inet->host, inet->port);
         } else {
-            snprintf(host_port, sizeof(host_port), "[%s]:%s", inet->host,
-                    inet->port);
-            buf = g_strdup(host_port);
+            buf = g_strdup_printf("[%s]:%s", inet->host, inet->port);
         }
         break;
 
@@ -1338,9 +1342,38 @@ char *socket_address_to_string(struct SocketAddress *addr, Error **errp)
         break;
 
     default:
-        error_setg(errp, "socket family %d unsupported",
-                   addr->type);
-        return NULL;
+        abort();
     }
     return buf;
 }
+
+SocketAddress *socket_address_crumple(SocketAddressFlat *addr_flat)
+{
+    SocketAddress *addr = g_new(SocketAddress, 1);
+
+    switch (addr_flat->type) {
+    case SOCKET_ADDRESS_FLAT_TYPE_INET:
+        addr->type = SOCKET_ADDRESS_KIND_INET;
+        addr->u.inet.data = QAPI_CLONE(InetSocketAddress,
+                                       &addr_flat->u.inet);
+        break;
+    case SOCKET_ADDRESS_FLAT_TYPE_UNIX:
+        addr->type = SOCKET_ADDRESS_KIND_UNIX;
+        addr->u.q_unix.data = QAPI_CLONE(UnixSocketAddress,
+                                         &addr_flat->u.q_unix);
+        break;
+    case SOCKET_ADDRESS_FLAT_TYPE_VSOCK:
+        addr->type = SOCKET_ADDRESS_KIND_VSOCK;
+        addr->u.vsock.data = QAPI_CLONE(VsockSocketAddress,
+                                        &addr_flat->u.vsock);
+        break;
+    case SOCKET_ADDRESS_FLAT_TYPE_FD:
+        addr->type = SOCKET_ADDRESS_KIND_FD;
+        addr->u.fd.data = QAPI_CLONE(String, &addr_flat->u.fd);
+        break;
+    default:
+        abort();
+    }
+
+    return addr;
+}
diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
index 29c3e4dd85..59befd5202 100644
--- a/util/qemu-thread-win32.c
+++ b/util/qemu-thread-win32.c
@@ -10,6 +10,11 @@
  * See the COPYING file in the top-level directory.
  *
  */
+
+#ifndef _WIN32_WINNT
+#define _WIN32_WINNT 0x0600
+#endif
+
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qemu/thread.h"
@@ -39,44 +44,30 @@ static void error_exit(int err, const char *msg)
 
 void qemu_mutex_init(QemuMutex *mutex)
 {
-    mutex->owner = 0;
-    InitializeCriticalSection(&mutex->lock);
+    InitializeSRWLock(&mutex->lock);
 }
 
 void qemu_mutex_destroy(QemuMutex *mutex)
 {
-    assert(mutex->owner == 0);
-    DeleteCriticalSection(&mutex->lock);
+    InitializeSRWLock(&mutex->lock);
 }
 
 void qemu_mutex_lock(QemuMutex *mutex)
 {
-    EnterCriticalSection(&mutex->lock);
-
-    /* Win32 CRITICAL_SECTIONs are recursive.  Assert that we're not
-     * using them as such.
-     */
-    assert(mutex->owner == 0);
-    mutex->owner = GetCurrentThreadId();
+    AcquireSRWLockExclusive(&mutex->lock);
 }
 
 int qemu_mutex_trylock(QemuMutex *mutex)
 {
     int owned;
 
-    owned = TryEnterCriticalSection(&mutex->lock);
-    if (owned) {
-        assert(mutex->owner == 0);
-        mutex->owner = GetCurrentThreadId();
-    }
+    owned = TryAcquireSRWLockExclusive(&mutex->lock);
     return !owned;
 }
 
 void qemu_mutex_unlock(QemuMutex *mutex)
 {
-    assert(mutex->owner == GetCurrentThreadId());
-    mutex->owner = 0;
-    LeaveCriticalSection(&mutex->lock);
+    ReleaseSRWLockExclusive(&mutex->lock);
 }
 
 void qemu_rec_mutex_init(QemuRecMutex *mutex)
@@ -107,124 +98,27 @@ void qemu_rec_mutex_unlock(QemuRecMutex *mutex)
 void qemu_cond_init(QemuCond *cond)
 {
     memset(cond, 0, sizeof(*cond));
-
-    cond->sema = CreateSemaphore(NULL, 0, LONG_MAX, NULL);
-    if (!cond->sema) {
-        error_exit(GetLastError(), __func__);
-    }
-    cond->continue_event = CreateEvent(NULL,    /* security */
-                                       FALSE,   /* auto-reset */
-                                       FALSE,   /* not signaled */
-                                       NULL);   /* name */
-    if (!cond->continue_event) {
-        error_exit(GetLastError(), __func__);
-    }
+    InitializeConditionVariable(&cond->var);
 }
 
 void qemu_cond_destroy(QemuCond *cond)
 {
-    BOOL result;
-    result = CloseHandle(cond->continue_event);
-    if (!result) {
-        error_exit(GetLastError(), __func__);
-    }
-    cond->continue_event = 0;
-    result = CloseHandle(cond->sema);
-    if (!result) {
-        error_exit(GetLastError(), __func__);
-    }
-    cond->sema = 0;
+    InitializeConditionVariable(&cond->var);
 }
 
 void qemu_cond_signal(QemuCond *cond)
 {
-    DWORD result;
-
-    /*
-     * Signal only when there are waiters.  cond->waiters is
-     * incremented by pthread_cond_wait under the external lock,
-     * so we are safe about that.
-     */
-    if (cond->waiters == 0) {
-        return;
-    }
-
-    /*
-     * Waiting threads decrement it outside the external lock, but
-     * only if another thread is executing pthread_cond_broadcast and
-     * has the mutex.  So, it also cannot be decremented concurrently
-     * with this particular access.
-     */
-    cond->target = cond->waiters - 1;
-    result = SignalObjectAndWait(cond->sema, cond->continue_event,
-                                 INFINITE, FALSE);
-    if (result == WAIT_ABANDONED || result == WAIT_FAILED) {
-        error_exit(GetLastError(), __func__);
-    }
+    WakeConditionVariable(&cond->var);
 }
 
 void qemu_cond_broadcast(QemuCond *cond)
 {
-    BOOLEAN result;
-    /*
-     * As in pthread_cond_signal, access to cond->waiters and
-     * cond->target is locked via the external mutex.
-     */
-    if (cond->waiters == 0) {
-        return;
-    }
-
-    cond->target = 0;
-    result = ReleaseSemaphore(cond->sema, cond->waiters, NULL);
-    if (!result) {
-        error_exit(GetLastError(), __func__);
-    }
-
-    /*
-     * At this point all waiters continue. Each one takes its
-     * slice of the semaphore. Now it's our turn to wait: Since
-     * the external mutex is held, no thread can leave cond_wait,
-     * yet. For this reason, we can be sure that no thread gets
-     * a chance to eat *more* than one slice. OTOH, it means
-     * that the last waiter must send us a wake-up.
-     */
-    WaitForSingleObject(cond->continue_event, INFINITE);
+    WakeAllConditionVariable(&cond->var);
 }
 
 void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex)
 {
-    /*
-     * This access is protected under the mutex.
-     */
-    cond->waiters++;
-
-    /*
-     * Unlock external mutex and wait for signal.
-     * NOTE: we've held mutex locked long enough to increment
-     * waiters count above, so there's no problem with
-     * leaving mutex unlocked before we wait on semaphore.
-     */
-    qemu_mutex_unlock(mutex);
-    WaitForSingleObject(cond->sema, INFINITE);
-
-    /* Now waiters must rendez-vous with the signaling thread and
-     * let it continue.  For cond_broadcast this has heavy contention
-     * and triggers thundering herd.  So goes life.
-     *
-     * Decrease waiters count.  The mutex is not taken, so we have
-     * to do this atomically.
-     *
-     * All waiters contend for the mutex at the end of this function
-     * until the signaling thread relinquishes it.  To ensure
-     * each waiter consumes exactly one slice of the semaphore,
-     * the signaling thread stops until it is told by the last
-     * waiter that it can go on.
-     */
-    if (InterlockedDecrement(&cond->waiters) == cond->target) {
-        SetEvent(cond->continue_event);
-    }
-
-    qemu_mutex_lock(mutex);
+    SleepConditionVariableSRW(&cond->var, &mutex->lock, INFINITE, 0);
 }
 
 void qemu_sem_init(QemuSemaphore *sem, int init)
diff --git a/util/throttle.c b/util/throttle.c
index 3817d9b904..3570ed25fc 100644
--- a/util/throttle.c
+++ b/util/throttle.c
@@ -380,6 +380,14 @@ static void throttle_fix_bucket(LeakyBucket *bkt)
     }
 }
 
+/* undo internal bucket parameter changes (see throttle_fix_bucket()) */
+static void throttle_unfix_bucket(LeakyBucket *bkt)
+{
+    if (bkt->max < bkt->avg) {
+        bkt->max = 0;
+    }
+}
+
 /* take care of canceling a timer */
 static void throttle_cancel_timer(QEMUTimer *timer)
 {
@@ -420,7 +428,13 @@ void throttle_config(ThrottleState *ts,
  */
 void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg)
 {
+    int i;
+
     *cfg = ts->cfg;
+
+    for (i = 0; i < BUCKETS_COUNT; i++) {
+        throttle_unfix_bucket(&cfg->buckets[i]);
+    }
 }
 
 
diff --git a/util/trace-events b/util/trace-events
index ac27d94a97..b44ef4f895 100644
--- a/util/trace-events
+++ b/util/trace-events
@@ -22,7 +22,7 @@ buffer_move(const char *buf, size_t len, const char *from) "%s: %zd bytes from %
 buffer_free(const char *buf, size_t len) "%s: capacity %zd"
 
 # util/qemu-coroutine.c
-qemu_coroutine_enter(void *from, void *to, void *opaque) "from %p to %p opaque %p"
+qemu_aio_coroutine_enter(void *ctx, void *from, void *to, void *opaque) "ctx %p from %p to %p opaque %p"
 qemu_coroutine_yield(void *from, void *to) "from %p to %p"
 qemu_coroutine_terminate(void *co) "self %p"