498 files changed, 21179 insertions, 6589 deletions
diff --git a/.shippable.yml b/.shippable.yml
new file mode 100644
index 0000000000..653bd750fe
--- /dev/null
+++ b/.shippable.yml
@@ -0,0 +1,21 @@
+language: c
+env:
+  matrix:
+    - IMAGE=debian-armhf-cross
+      TARGET_LIST=arm-softmmu,arm-linux-user
+    - IMAGE=debian-arm64-cross
+      TARGET_LIST=aarch64-softmmu,aarch64-linux-user
+    - IMAGE=debian-s390x-cross
+      TARGET_LIST=s390x-softmmu,s390x-linux-user
+build:
+  pre_ci:
+    - make docker-image-${IMAGE}
+  pre_ci_boot:
+    image_name: qemu
+    image_tag: ${IMAGE}
+    pull: false
+    options: "-e HOME=/root"
+  ci:
+    - unset CC
+    - ./configure ${QEMU_CONFIGURE_OPTS} --target-list=${TARGET_LIST}
+    - make -j2
diff --git a/CODING_STYLE b/CODING_STYLE
index f53180bf3f..2fa0c0b65b 100644
--- a/CODING_STYLE
+++ b/CODING_STYLE
@@ -116,3 +116,10 @@ if (a == 1) {
 Rationale: Yoda conditions (as in 'if (1 == a)') are awkward to read.
 Besides, good compilers already warn users when '==' is mis-typed as '=',
 even when the constant is on the right.
+
+7. Comment style
+
+We use traditional C-style /* */ comments and avoid // comments.
+
+Rationale: The // form is valid in C99, so this is purely a matter of
+consistency of style. The checkpatch script will warn you about this.
diff --git a/MAINTAINERS b/MAINTAINERS
index 4714df883b..b9a2171e9b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -908,6 +908,8 @@ F: hw/acpi/*
 F: hw/smbios/*
 F: hw/i386/acpi-build.[hc]
 F: hw/arm/virt-acpi-build.c
+F: tests/bios-tables-test.c
+F: tests/acpi-utils.[hc]
 
 ppc4xx
 M: Alexander Graf <agraf@suse.de>
@@ -1122,6 +1124,15 @@ F: hw/nvram/chrp_nvram.c
 F: include/hw/nvram/chrp_nvram.h
 F: tests/prom-env-test.c
 
+VM Generation ID
+M: Ben Warren <ben@skyportsystems.com>
+S: Maintained
+F: hw/acpi/vmgenid.c
+F: include/hw/acpi/vmgenid.h
+F: docs/specs/vmgenid.txt
+F: tests/vmgenid-test.c
+F: stubs/vmgenid.c
+
 Subsystems
 ----------
 Audio
@@ -1800,9 +1811,14 @@ F: docs/block-replication.txt
 Build and test automation
 -------------------------
 M: Alex Bennée <alex.bennee@linaro.org>
+M: Fam Zheng <famz@redhat.com>
 L: qemu-devel@nongnu.org
-S: Supported
+S: Maintained
 F: .travis.yml
+F: .shippable.yml
+F: tests/docker/
+W: https://travis-ci.org/qemu/qemu
+W: http://patchew.org/QEMU/
 
 Documentation
 -------------
@@ -1811,9 +1827,3 @@ M: Daniel P. Berrange <berrange@redhat.com>
 S: Odd Fixes
 F: docs/build-system.txt
 
-Docker testing
---------------
-Docker based testing framework and cases
-M: Fam Zheng <famz@redhat.com>
-S: Maintained
-F: tests/docker/
diff --git a/audio/audio.c b/audio/audio.c
index c845a44f0a..c8898d8422 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -28,6 +28,7 @@
 #include "qemu/timer.h"
 #include "sysemu/sysemu.h"
 #include "qemu/cutils.h"
+#include "sysemu/replay.h"
 
 #define AUDIO_CAP "audio"
 #include "audio_int.h"
@@ -1112,7 +1113,7 @@ static int audio_is_timer_needed (void)
 static void audio_reset_timer (AudioState *s)
 {
     if (audio_is_timer_needed ()) {
-        timer_mod (s->ts,
+        timer_mod_anticipate_ns(s->ts,
             qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + conf.period.ticks);
     }
     else {
@@ -1387,6 +1388,7 @@ static void audio_run_out (AudioState *s)
 
         prev_rpos = hw->rpos;
         played = hw->pcm_ops->run_out (hw, live);
+        replay_audio_out(&played);
         if (audio_bug (AUDIO_FUNC, hw->rpos >= hw->samples)) {
             dolog ("hw->rpos=%d hw->samples=%d played=%d\n",
                    hw->rpos, hw->samples, played);
@@ -1450,9 +1452,12 @@ static void audio_run_in (AudioState *s)
 
     while ((hw = audio_pcm_hw_find_any_enabled_in (hw))) {
         SWVoiceIn *sw;
-        int captured, min;
+        int captured = 0, min;
 
-        captured = hw->pcm_ops->run_in (hw);
+        if (replay_mode != REPLAY_MODE_PLAY) {
+            captured = hw->pcm_ops->run_in(hw);
+        }
+        replay_audio_in(&captured, hw->conv_buf, &hw->wpos, hw->samples);
 
         min = audio_pcm_hw_find_min_in (hw);
         hw->total_samples_captured += captured - min;
diff --git a/audio/audio.h b/audio/audio.h
index c3c51988f5..f4339a185e 100644
--- a/audio/audio.h
+++ b/audio/audio.h
@@ -166,4 +166,9 @@ int wav_start_capture (CaptureState *s, const char *path, int freq,
 bool audio_is_cleaning_up(void);
 void audio_cleanup(void);
 
+void audio_sample_to_uint64(void *samples, int pos,
+                            uint64_t *left, uint64_t *right);
+void audio_sample_from_uint64(void *samples, int pos,
+                            uint64_t left, uint64_t right);
+
 #endif /* QEMU_AUDIO_H */
diff --git a/audio/mixeng.c b/audio/mixeng.c
index 66c0328d42..0bf9b5360f 100644
--- a/audio/mixeng.c
+++ b/audio/mixeng.c
@@ -25,6 +25,7 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qemu/bswap.h"
+#include "qemu/error-report.h"
 #include "audio.h"
 
 #define AUDIO_CAP "mixeng"
@@ -267,6 +268,37 @@ f_sample *mixeng_clip[2][2][2][3] = {
     }
 };
 
+
+void audio_sample_to_uint64(void *samples, int pos,
+                            uint64_t *left, uint64_t *right)
+{
+    struct st_sample *sample = samples;
+    sample += pos;
+#ifdef FLOAT_MIXENG
+    error_report(
+        "Coreaudio and floating point samples are not supported by replay yet");
+    abort();
+#else
+    *left = sample->l;
+    *right = sample->r;
+#endif
+}
+
+void audio_sample_from_uint64(void *samples, int pos,
+                            uint64_t left, uint64_t right)
+{
+    struct st_sample *sample = samples;
+    sample += pos;
+#ifdef FLOAT_MIXENG
+    error_report(
+        "Coreaudio and floating point samples are not supported by replay yet");
+    abort();
+#else
+    sample->l = left;
+    sample->r = right;
+#endif
+}
+
 /*
  * August 21, 1998
  * Copyright 1998 Fabrice Bellard.
diff --git a/audio/sdlaudio.c b/audio/sdlaudio.c
index db69fe1416..e8d91d22af 100644
--- a/audio/sdlaudio.c
+++ b/audio/sdlaudio.c
@@ -38,10 +38,14 @@
 #define AUDIO_CAP "sdl"
 #include "audio_int.h"
 
+#define USE_SEMAPHORE (SDL_MAJOR_VERSION < 2)
+
 typedef struct SDLVoiceOut {
     HWVoiceOut hw;
     int live;
+#if USE_SEMAPHORE
     int rpos;
+#endif
     int decr;
 } SDLVoiceOut;
 
@@ -53,8 +57,10 @@ static struct {
 
 static struct SDLAudioState {
     int exit;
+#if USE_SEMAPHORE
     SDL_mutex *mutex;
     SDL_sem *sem;
+#endif
     int initialized;
     bool driver_created;
 } glob_sdl;
@@ -73,31 +79,45 @@ static void GCC_FMT_ATTR (1, 2) sdl_logerr (const char *fmt, ...)
 
 static int sdl_lock (SDLAudioState *s, const char *forfn)
 {
+#if USE_SEMAPHORE
     if (SDL_LockMutex (s->mutex)) {
         sdl_logerr ("SDL_LockMutex for %s failed\n", forfn);
         return -1;
     }
+#else
+    SDL_LockAudio();
+#endif
+
     return 0;
 }
 
 static int sdl_unlock (SDLAudioState *s, const char *forfn)
 {
+#if USE_SEMAPHORE
     if (SDL_UnlockMutex (s->mutex)) {
         sdl_logerr ("SDL_UnlockMutex for %s failed\n", forfn);
         return -1;
     }
+#else
+    SDL_UnlockAudio();
+#endif
+
     return 0;
 }
 
 static int sdl_post (SDLAudioState *s, const char *forfn)
 {
+#if USE_SEMAPHORE
     if (SDL_SemPost (s->sem)) {
         sdl_logerr ("SDL_SemPost for %s failed\n", forfn);
         return -1;
     }
+#endif
+
     return 0;
 }
 
+#if USE_SEMAPHORE
 static int sdl_wait (SDLAudioState *s, const char *forfn)
 {
     if (SDL_SemWait (s->sem)) {
@@ -106,6 +126,7 @@ static int sdl_wait (SDLAudioState *s, const char *forfn)
     }
     return 0;
 }
+#endif
 
 static int sdl_unlock_and_post (SDLAudioState *s, const char *forfn)
 {
@@ -246,6 +267,7 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
         int to_mix, decr;
 
         /* dolog ("in callback samples=%d\n", samples); */
+#if USE_SEMAPHORE
         sdl_wait (s, "sdl_callback");
         if (s->exit) {
             return;
@@ -264,6 +286,11 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
         if (!sdl->live) {
             goto again;
         }
+#else
+        if (s->exit || !sdl->live) {
+            break;
+        }
+#endif
 
         /* dolog ("in callback live=%d\n", live); */
         to_mix = audio_MIN (samples, sdl->live);
@@ -274,7 +301,11 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
 
             /* dolog ("in callback to_mix %d, chunk %d\n", to_mix, chunk); */
             hw->clip (buf, src, chunk);
+#if USE_SEMAPHORE
             sdl->rpos = (sdl->rpos + chunk) % hw->samples;
+#else
+            hw->rpos = (hw->rpos + chunk) % hw->samples;
+#endif
             to_mix -= chunk;
             buf += chunk << hw->info.shift;
         }
@@ -282,12 +313,21 @@ static void sdl_callback (void *opaque, Uint8 *buf, int len)
         sdl->live -= decr;
         sdl->decr += decr;
 
+#if USE_SEMAPHORE
     again:
         if (sdl_unlock (s, "sdl_callback")) {
             return;
         }
+#endif
     }
     /* dolog ("done len=%d\n", len); */
+
+#if (SDL_MAJOR_VERSION >= 2)
+    /* SDL2 does not clear the remaining buffer for us, so do it on our own */
+    if (samples) {
+        memset(buf, 0, samples << hw->info.shift);
+    }
+#endif
 }
 
 static int sdl_write_out (SWVoiceOut *sw, void *buf, int len)
@@ -315,8 +355,12 @@ static int sdl_run_out (HWVoiceOut *hw, int live)
     decr = audio_MIN (sdl->decr, live);
     sdl->decr -= decr;
 
+#if USE_SEMAPHORE
     sdl->live = live - decr;
     hw->rpos = sdl->rpos;
+#else
+    sdl->live = live;
+#endif
 
     if (sdl->live > 0) {
         sdl_unlock_and_post (s, "sdl_run_out");
@@ -405,6 +449,7 @@ static void *sdl_audio_init (void)
         return NULL;
     }
 
+#if USE_SEMAPHORE
     s->mutex = SDL_CreateMutex ();
     if (!s->mutex) {
         sdl_logerr ("Failed to create SDL mutex\n");
@@ -419,6 +464,7 @@ static void *sdl_audio_init (void)
         SDL_QuitSubSystem (SDL_INIT_AUDIO);
         return NULL;
     }
+#endif
 
     s->driver_created = true;
     return s;
@@ -428,8 +474,10 @@ static void sdl_audio_fini (void *opaque)
 {
     SDLAudioState *s = opaque;
     sdl_close (s);
+#if USE_SEMAPHORE
     SDL_DestroySemaphore (s->sem);
     SDL_DestroyMutex (s->mutex);
+#endif
     SDL_QuitSubSystem (SDL_INIT_AUDIO);
     s->driver_created = false;
 }
diff --git a/block.c b/block.c
index 743c349100..f293ccb5af 100644
--- a/block.c
+++ b/block.c
@@ -588,21 +588,20 @@ BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
     return drv;
 }
 
-static int find_image_format(BdrvChild *file, const char *filename,
+static int find_image_format(BlockBackend *file, const char *filename,
                              BlockDriver **pdrv, Error **errp)
 {
-    BlockDriverState *bs = file->bs;
     BlockDriver *drv;
     uint8_t buf[BLOCK_PROBE_BUF_SIZE];
     int ret = 0;
 
     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
-    if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
+    if (blk_is_sg(file) || !blk_is_inserted(file) || blk_getlength(file) == 0) {
         *pdrv = &bdrv_raw;
         return ret;
     }
 
-    ret = bdrv_pread(file, 0, buf, sizeof(buf));
+    ret = blk_pread(file, 0, buf, sizeof(buf));
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Could not read image for determining its "
                          "format");
@@ -708,6 +707,12 @@ int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
     return 0;
 }
 
+static char *bdrv_child_get_parent_desc(BdrvChild *c)
+{
+    BlockDriverState *parent = c->opaque;
+    return g_strdup(bdrv_get_device_or_node_name(parent));
+}
+
 static void bdrv_child_cb_drained_begin(BdrvChild *child)
 {
     BlockDriverState *bs = child->opaque;
@@ -775,6 +780,7 @@ static void bdrv_inherited_options(int *child_flags, QDict *child_options,
 }
 
 const BdrvChildRole child_file = {
+    .get_parent_desc = bdrv_child_get_parent_desc,
     .inherit_options = bdrv_inherited_options,
     .drained_begin   = bdrv_child_cb_drained_begin,
     .drained_end     = bdrv_child_cb_drained_end,
@@ -795,11 +801,63 @@ static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options,
 }
 
 const BdrvChildRole child_format = {
+    .get_parent_desc = bdrv_child_get_parent_desc,
     .inherit_options = bdrv_inherited_fmt_options,
     .drained_begin   = bdrv_child_cb_drained_begin,
     .drained_end     = bdrv_child_cb_drained_end,
 };
 
+static void bdrv_backing_attach(BdrvChild *c)
+{
+    BlockDriverState *parent = c->opaque;
+    BlockDriverState *backing_hd = c->bs;
+
+    assert(!parent->backing_blocker);
+    error_setg(&parent->backing_blocker,
+               "node is used as backing hd of '%s'",
+               bdrv_get_device_or_node_name(parent));
+
+    parent->open_flags &= ~BDRV_O_NO_BACKING;
+    pstrcpy(parent->backing_file, sizeof(parent->backing_file),
+            backing_hd->filename);
+    pstrcpy(parent->backing_format, sizeof(parent->backing_format),
+            backing_hd->drv ? backing_hd->drv->format_name : "");
+
+    bdrv_op_block_all(backing_hd, parent->backing_blocker);
+    /* Otherwise we won't be able to commit or stream */
+    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
+                    parent->backing_blocker);
+    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM,
+                    parent->backing_blocker);
+    /*
+     * We do backup in 3 ways:
+     * 1. drive backup
+     *    The target bs is new opened, and the source is top BDS
+     * 2. blockdev backup
+     *    Both the source and the target are top BDSes.
+     * 3. internal backup(used for block replication)
+     *    Both the source and the target are backing file
+     *
+     * In case 1 and 2, neither the source nor the target is the backing file.
+     * In case 3, we will block the top BDS, so there is only one block job
+     * for the top BDS and its backing chain.
+     */
+    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE,
+                    parent->backing_blocker);
+    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
+                    parent->backing_blocker);
+}
+
+static void bdrv_backing_detach(BdrvChild *c)
+{
+    BlockDriverState *parent = c->opaque;
+
+    assert(parent->backing_blocker);
+    bdrv_op_unblock_all(c->bs, parent->backing_blocker);
+    error_free(parent->backing_blocker);
+    parent->backing_blocker = NULL;
+}
+
 /*
  * Returns the options and flags that bs->backing should get, based on the
  * given options and flags for the parent BDS
@@ -824,7 +882,10 @@ static void bdrv_backing_options(int *child_flags, QDict *child_options,
     *child_flags = flags;
 }
 
-static const BdrvChildRole child_backing = {
+const BdrvChildRole child_backing = {
+    .get_parent_desc = bdrv_child_get_parent_desc,
+    .attach          = bdrv_backing_attach,
+    .detach          = bdrv_backing_detach,
     .inherit_options = bdrv_backing_options,
     .drained_begin   = bdrv_child_cb_drained_begin,
     .drained_end     = bdrv_child_cb_drained_end,
@@ -926,6 +987,95 @@ out:
     g_free(gen_node_name);
 }
 
+static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
+                            const char *node_name, QDict *options,
+                            int open_flags, Error **errp)
+{
+    Error *local_err = NULL;
+    int ret;
+
+    bdrv_assign_node_name(bs, node_name, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return -EINVAL;
+    }
+
+    bs->drv = drv;
+    bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
+    bs->opaque = g_malloc0(drv->instance_size);
+
+    if (drv->bdrv_file_open) {
+        assert(!drv->bdrv_needs_filename || bs->filename[0]);
+        ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
+    } else if (drv->bdrv_open) {
+        ret = drv->bdrv_open(bs, options, open_flags, &local_err);
+    } else {
+        ret = 0;
+    }
+
+    if (ret < 0) {
+        if (local_err) {
+            error_propagate(errp, local_err);
+        } else if (bs->filename[0]) {
+            error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
+        } else {
+            error_setg_errno(errp, -ret, "Could not open image");
+        }
+        goto free_and_fail;
+    }
+
+    ret = refresh_total_sectors(bs, bs->total_sectors);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Could not refresh total sector count");
+        goto free_and_fail;
+    }
+
+    bdrv_refresh_limits(bs, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+        goto free_and_fail;
+    }
+
+    assert(bdrv_opt_mem_align(bs) != 0);
+    assert(bdrv_min_mem_align(bs) != 0);
+    assert(is_power_of_2(bs->bl.request_alignment));
+
+    return 0;
+
+free_and_fail:
+    /* FIXME Close bs first if already opened*/
+    g_free(bs->opaque);
+    bs->opaque = NULL;
+    bs->drv = NULL;
+    return ret;
+}
+
+BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
+                                       int flags, Error **errp)
+{
+    BlockDriverState *bs;
+    int ret;
+
+    bs = bdrv_new();
+    bs->open_flags = flags;
+    bs->explicit_options = qdict_new();
+    bs->options = qdict_new();
+    bs->opaque = NULL;
+
+    update_options_from_flags(bs->options, flags);
+
+    ret = bdrv_open_driver(bs, drv, node_name, bs->options, flags, errp);
+    if (ret < 0) {
+        QDECREF(bs->explicit_options);
+        QDECREF(bs->options);
+        bdrv_unref(bs);
+        return NULL;
+    }
+
+    return bs;
+}
+
 QemuOptsList bdrv_runtime_opts = {
     .name = "bdrv_common",
     .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
@@ -974,7 +1124,7 @@ QemuOptsList bdrv_runtime_opts = {
  *
  * Removes all processed options from *options.
  */
-static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
+static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
                             QDict *options, Error **errp)
 {
     int ret, open_flags;
@@ -1005,7 +1155,7 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
     assert(drv != NULL);
 
     if (file != NULL) {
-        filename = file->bs->filename;
+        filename = blk_bs(file)->filename;
     } else {
         filename = qdict_get_try_str(options, "filename");
     }
@@ -1020,14 +1170,6 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
     trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
                            drv->format_name);
 
-    node_name = qemu_opt_get(opts, "node-name");
-    bdrv_assign_node_name(bs, node_name, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto fail_opts;
-    }
-
     bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
 
     if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
@@ -1093,62 +1235,19 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
     }
     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
 
-    bs->drv = drv;
-    bs->opaque = g_malloc0(drv->instance_size);
-
     /* Open the image, either directly or using a protocol */
     open_flags = bdrv_open_flags(bs, bs->open_flags);
-    if (drv->bdrv_file_open) {
-        assert(file == NULL);
-        assert(!drv->bdrv_needs_filename || filename != NULL);
-        ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
-    } else {
-        if (file == NULL) {
-            error_setg(errp, "Can't use '%s' as a block driver for the "
-                       "protocol level", drv->format_name);
-            ret = -EINVAL;
-            goto free_and_fail;
-        }
-        bs->file = file;
-        ret = drv->bdrv_open(bs, options, open_flags, &local_err);
-    }
-
-    if (ret < 0) {
-        if (local_err) {
-            error_propagate(errp, local_err);
-        } else if (bs->filename[0]) {
-            error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
-        } else {
-            error_setg_errno(errp, -ret, "Could not open image");
-        }
-        goto free_and_fail;
-    }
+    node_name = qemu_opt_get(opts, "node-name");
 
-    ret = refresh_total_sectors(bs, bs->total_sectors);
+    assert(!drv->bdrv_file_open || file == NULL);
+    ret = bdrv_open_driver(bs, drv, node_name, options, open_flags, errp);
     if (ret < 0) {
-        error_setg_errno(errp, -ret, "Could not refresh total sector count");
-        goto free_and_fail;
-    }
-
-    bdrv_refresh_limits(bs, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto free_and_fail;
+        goto fail_opts;
     }
 
-    assert(bdrv_opt_mem_align(bs) != 0);
-    assert(bdrv_min_mem_align(bs) != 0);
-    assert(is_power_of_2(bs->bl.request_alignment));
-
     qemu_opts_del(opts);
     return 0;
 
-free_and_fail:
-    bs->file = NULL;
-    g_free(bs->opaque);
-    bs->opaque = NULL;
-    bs->drv = NULL;
 fail_opts:
     qemu_opts_del(opts);
     return ret;
@@ -1169,13 +1268,13 @@ static QDict *parse_json_filename(const char *filename, Error **errp)
         return NULL;
     }
 
-    if (qobject_type(options_obj) != QTYPE_QDICT) {
+    options = qobject_to_qdict(options_obj);
+    if (!options) {
         qobject_decref(options_obj);
         error_setg(errp, "Invalid JSON object given");
         return NULL;
     }
 
-    options = qobject_to_qdict(options_obj);
     qdict_flatten(options);
 
     return options;
@@ -1289,15 +1388,352 @@ static int bdrv_fill_options(QDict **options, const char *filename,
     return 0;
 }
 
-static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
+/*
+ * Check whether permissions on this node can be changed in a way that
+ * @cumulative_perms and @cumulative_shared_perms are the new cumulative
+ * permissions of all its parents. This involves checking whether all necessary
+ * permission changes to child nodes can be performed.
+ *
+ * A call to this function must always be followed by a call to bdrv_set_perm()
+ * or bdrv_abort_perm_update().
+ */
+static int bdrv_check_perm(BlockDriverState *bs, uint64_t cumulative_perms,
+                           uint64_t cumulative_shared_perms, Error **errp)
+{
+    BlockDriver *drv = bs->drv;
+    BdrvChild *c;
+    int ret;
+
+    /* Write permissions never work with read-only images */
+    if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
+        bdrv_is_read_only(bs))
+    {
+        error_setg(errp, "Block node is read-only");
+        return -EPERM;
+    }
+
+    /* Check this node */
+    if (!drv) {
+        return 0;
+    }
+
+    if (drv->bdrv_check_perm) {
+        return drv->bdrv_check_perm(bs, cumulative_perms,
+                                    cumulative_shared_perms, errp);
+    }
+
+    /* Drivers that never have children can omit .bdrv_child_perm() */
+    if (!drv->bdrv_child_perm) {
+        assert(QLIST_EMPTY(&bs->children));
+        return 0;
+    }
+
+    /* Check all children */
+    QLIST_FOREACH(c, &bs->children, next) {
+        uint64_t cur_perm, cur_shared;
+        drv->bdrv_child_perm(bs, c, c->role,
+                             cumulative_perms, cumulative_shared_perms,
+                             &cur_perm, &cur_shared);
+        ret = bdrv_child_check_perm(c, cur_perm, cur_shared, errp);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
+/*
+ * Notifies drivers that after a previous bdrv_check_perm() call, the
+ * permission update is not performed and any preparations made for it (e.g.
+ * taken file locks) need to be undone.
+ *
+ * This function recursively notifies all child nodes.
+ */
+static void bdrv_abort_perm_update(BlockDriverState *bs)
+{
+    BlockDriver *drv = bs->drv;
+    BdrvChild *c;
+
+    if (!drv) {
+        return;
+    }
+
+    if (drv->bdrv_abort_perm_update) {
+        drv->bdrv_abort_perm_update(bs);
+    }
+
+    QLIST_FOREACH(c, &bs->children, next) {
+        bdrv_child_abort_perm_update(c);
+    }
+}
+
+static void bdrv_set_perm(BlockDriverState *bs, uint64_t cumulative_perms,
+                          uint64_t cumulative_shared_perms)
+{
+    BlockDriver *drv = bs->drv;
+    BdrvChild *c;
+
+    if (!drv) {
+        return;
+    }
+
+    /* Update this node */
+    if (drv->bdrv_set_perm) {
+        drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms);
+    }
+
+    /* Drivers that never have children can omit .bdrv_child_perm() */
+    if (!drv->bdrv_child_perm) {
+        assert(QLIST_EMPTY(&bs->children));
+        return;
+    }
+
+    /* Update all children */
+    QLIST_FOREACH(c, &bs->children, next) {
+        uint64_t cur_perm, cur_shared;
+        drv->bdrv_child_perm(bs, c, c->role,
+                             cumulative_perms, cumulative_shared_perms,
+                             &cur_perm, &cur_shared);
+        bdrv_child_set_perm(c, cur_perm, cur_shared);
+    }
+}
+
+static void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
+                                     uint64_t *shared_perm)
+{
+    BdrvChild *c;
+    uint64_t cumulative_perms = 0;
+    uint64_t cumulative_shared_perms = BLK_PERM_ALL;
+
+    QLIST_FOREACH(c, &bs->parents, next_parent) {
+        cumulative_perms |= c->perm;
+        cumulative_shared_perms &= c->shared_perm;
+    }
+
+    *perm = cumulative_perms;
+    *shared_perm = cumulative_shared_perms;
+}
+
+static char *bdrv_child_user_desc(BdrvChild *c)
+{
+    if (c->role->get_parent_desc) {
+        return c->role->get_parent_desc(c);
+    }
+
+    return g_strdup("another user");
+}
+
+static char *bdrv_perm_names(uint64_t perm)
+{
+    struct perm_name {
+        uint64_t perm;
+        const char *name;
+    } permissions[] = {
+        { BLK_PERM_CONSISTENT_READ, "consistent read" },
+        { BLK_PERM_WRITE,           "write" },
+        { BLK_PERM_WRITE_UNCHANGED, "write unchanged" },
+        { BLK_PERM_RESIZE,          "resize" },
+        { BLK_PERM_GRAPH_MOD,       "change children" },
+        { 0, NULL }
+    };
+
+    char *result = g_strdup("");
+    struct perm_name *p;
+
+    for (p = permissions; p->name; p++) {
+        if (perm & p->perm) {
+            char *old = result;
+            result = g_strdup_printf("%s%s%s", old, *old ? ", " : "", p->name);
+            g_free(old);
+        }
+    }
+
+    return result;
+}
+
+/*
+ * Checks whether a new reference to @bs can be added if the new user requires
+ * @new_used_perm/@new_shared_perm as its permissions. If @ignore_child is set,
+ * this old reference is ignored in the calculations; this allows checking
+ * permission updates for an existing reference.
+ *
+ * Needs to be followed by a call to either bdrv_set_perm() or
+ * bdrv_abort_perm_update(). */
+static int bdrv_check_update_perm(BlockDriverState *bs, uint64_t new_used_perm,
+                                  uint64_t new_shared_perm,
+                                  BdrvChild *ignore_child, Error **errp)
+{
+    BdrvChild *c;
+    uint64_t cumulative_perms = new_used_perm;
+    uint64_t cumulative_shared_perms = new_shared_perm;
+
+    /* There is no reason why anyone couldn't tolerate write_unchanged */
+    assert(new_shared_perm & BLK_PERM_WRITE_UNCHANGED);
+
+    QLIST_FOREACH(c, &bs->parents, next_parent) {
+        if (c == ignore_child) {
+            continue;
+        }
+
+        if ((new_used_perm & c->shared_perm) != new_used_perm) {
+            char *user = bdrv_child_user_desc(c);
+            char *perm_names = bdrv_perm_names(new_used_perm & ~c->shared_perm);
+            error_setg(errp, "Conflicts with use by %s as '%s', which does not "
+                             "allow '%s' on %s",
+                       user, c->name, perm_names, bdrv_get_node_name(c->bs));
+            g_free(user);
+            g_free(perm_names);
+            return -EPERM;
+        }
+
+        if ((c->perm & new_shared_perm) != c->perm) {
+            char *user = bdrv_child_user_desc(c);
+            char *perm_names = bdrv_perm_names(c->perm & ~new_shared_perm);
+            error_setg(errp, "Conflicts with use by %s as '%s', which uses "
+                             "'%s' on %s",
+                       user, c->name, perm_names, bdrv_get_node_name(c->bs));
+            g_free(user);
+            g_free(perm_names);
+            return -EPERM;
+        }
+
+        cumulative_perms |= c->perm;
+        cumulative_shared_perms &= c->shared_perm;
+    }
+
+    return bdrv_check_perm(bs, cumulative_perms, cumulative_shared_perms, errp);
+}
+
+/* Needs to be followed by a call to either bdrv_child_set_perm() or
+ * bdrv_child_abort_perm_update(). */
+int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
+                          Error **errp)
+{
+    return bdrv_check_update_perm(c->bs, perm, shared, c, errp);
+}
+
+void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared)
+{
+    uint64_t cumulative_perms, cumulative_shared_perms;
+
+    c->perm = perm;
+    c->shared_perm = shared;
+
+    bdrv_get_cumulative_perm(c->bs, &cumulative_perms,
+                             &cumulative_shared_perms);
+    bdrv_set_perm(c->bs, cumulative_perms, cumulative_shared_perms);
+}
+
+void bdrv_child_abort_perm_update(BdrvChild *c)
+{
+    bdrv_abort_perm_update(c->bs);
+}
+
+int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
+                            Error **errp)
+{
+    int ret;
+
+    ret = bdrv_child_check_perm(c, perm, shared, errp);
+    if (ret < 0) {
+        bdrv_child_abort_perm_update(c);
+        return ret;
+    }
+
+    bdrv_child_set_perm(c, perm, shared);
+
+    return 0;
+}
+
+#define DEFAULT_PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \
+                                 | BLK_PERM_WRITE \
+                                 | BLK_PERM_WRITE_UNCHANGED \
+                                 | BLK_PERM_RESIZE)
+#define DEFAULT_PERM_UNCHANGED (BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH)
+
+void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
+                               const BdrvChildRole *role,
+                               uint64_t perm, uint64_t shared,
+                               uint64_t *nperm, uint64_t *nshared)
+{
+    if (c == NULL) {
+        *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
+        *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED;
+        return;
+    }
+
+    *nperm = (perm & DEFAULT_PERM_PASSTHROUGH) |
+             (c->perm & DEFAULT_PERM_UNCHANGED);
+    *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) |
+               (c->shared_perm & DEFAULT_PERM_UNCHANGED);
+}
+
+void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
+                               const BdrvChildRole *role,
+                               uint64_t perm, uint64_t shared,
+                               uint64_t *nperm, uint64_t *nshared)
+{
+    bool backing = (role == &child_backing);
+    assert(role == &child_backing || role == &child_file);
+
+    if (!backing) {
+        /* Apart from the modifications below, the same permissions are
+         * forwarded and left alone as for filters */
+        bdrv_filter_default_perms(bs, c, role, perm, shared, &perm, &shared);
+
+        /* Format drivers may touch metadata even if the guest doesn't write */
+        if (!bdrv_is_read_only(bs)) {
+            perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
+        }
+
+        /* bs->file always needs to be consistent because of the metadata. We
+         * can never allow other users to resize or write to it. */
+        perm |= BLK_PERM_CONSISTENT_READ;
+        shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
+    } else {
+        /* We want consistent read from backing files if the parent needs it.
+         * No other operations are performed on backing files. */
+        perm &= BLK_PERM_CONSISTENT_READ;
+
+        /* If the parent can deal with changing data, we're okay with a
+         * writable and resizable backing file. */
+        /* TODO Require !(perm & BLK_PERM_CONSISTENT_READ), too? */
+        if (shared & BLK_PERM_WRITE) {
+            shared = BLK_PERM_WRITE | BLK_PERM_RESIZE;
+        } else {
+            shared = 0;
+        }
+
+        shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_GRAPH_MOD |
+                  BLK_PERM_WRITE_UNCHANGED;
+    }
+
+    *nperm = perm;
+    *nshared = shared;
+}
+
+static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs,
+                               bool check_new_perm)
 {
     BlockDriverState *old_bs = child->bs;
+    uint64_t perm, shared_perm;
 
     if (old_bs) {
         if (old_bs->quiesce_counter && child->role->drained_end) {
             child->role->drained_end(child);
         }
+        if (child->role->detach) {
+            child->role->detach(child);
+        }
         QLIST_REMOVE(child, next_parent);
+
+        /* Update permissions for old node. This is guaranteed to succeed
+         * because we're just taking a parent away, so we're loosening
+         * restrictions. */
+        bdrv_get_cumulative_perm(old_bs, &perm, &shared_perm);
+        bdrv_check_perm(old_bs, perm, shared_perm, &error_abort);
+        bdrv_set_perm(old_bs, perm, shared_perm);
     }
 
     child->bs = new_bs;
@@ -1307,23 +1743,46 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
         if (new_bs->quiesce_counter && child->role->drained_begin) {
             child->role->drained_begin(child);
         }
+
+        bdrv_get_cumulative_perm(new_bs, &perm, &shared_perm);
+        if (check_new_perm) {
+            bdrv_check_perm(new_bs, perm, shared_perm, &error_abort);
+        }
+        bdrv_set_perm(new_bs, perm, shared_perm);
+
+        if (child->role->attach) {
+            child->role->attach(child);
+        }
     }
 }
 
 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
                                   const char *child_name,
                                   const BdrvChildRole *child_role,
-                                  void *opaque)
+                                  uint64_t perm, uint64_t shared_perm,
+                                  void *opaque, Error **errp)
 {
-    BdrvChild *child = g_new(BdrvChild, 1);
+    BdrvChild *child;
+    int ret;
+
+    ret = bdrv_check_update_perm(child_bs, perm, shared_perm, NULL, errp);
+    if (ret < 0) {
+        bdrv_abort_perm_update(child_bs);
+        return NULL;
+    }
+
+    child = g_new(BdrvChild, 1);
     *child = (BdrvChild) {
-        .bs     = NULL,
-        .name   = g_strdup(child_name),
-        .role   = child_role,
-        .opaque = opaque,
+        .bs             = NULL,
+        .name           = g_strdup(child_name),
+        .role           = child_role,
+        .perm           = perm,
+        .shared_perm    = shared_perm,
+        .opaque         = opaque,
     };
 
-    bdrv_replace_child(child, child_bs);
+    /* This performs the matching bdrv_set_perm() for the above check. */
+    bdrv_replace_child(child, child_bs, false);
 
     return child;
 }
@@ -1331,10 +1790,24 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
 BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
                              BlockDriverState *child_bs,
                              const char *child_name,
-                             const BdrvChildRole *child_role)
+                             const BdrvChildRole *child_role,
+                             Error **errp)
 {
-    BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role,
-                                              parent_bs);
+    BdrvChild *child;
+    uint64_t perm, shared_perm;
+
+    bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm);
+
+    assert(parent_bs->drv);
+    parent_bs->drv->bdrv_child_perm(parent_bs, NULL, child_role,
+                                    perm, shared_perm, &perm, &shared_perm);
+
+    child = bdrv_root_attach_child(child_bs, child_name, child_role,
+                                   perm, shared_perm, parent_bs, errp);
+    if (child == NULL) {
+        return NULL;
+    }
+
     QLIST_INSERT_HEAD(&parent_bs->children, child, next);
     return child;
 }
@@ -1346,7 +1819,7 @@ static void bdrv_detach_child(BdrvChild *child)
         child->next.le_prev = NULL;
     }
 
-    bdrv_replace_child(child, NULL);
+    bdrv_replace_child(child, NULL, false);
 
     g_free(child->name);
     g_free(child);
@@ -1368,7 +1841,18 @@ void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
     }
 
     if (child->bs->inherits_from == parent) {
-        child->bs->inherits_from = NULL;
+        BdrvChild *c;
+
+        /* Remove inherits_from only when the last reference between parent and
+         * child->bs goes away. */
+        QLIST_FOREACH(c, &parent->children, next) {
+            if (c != child && c->bs == child->bs) {
+                break;
+            }
+        }
+        if (c == NULL) {
+            child->bs->inherits_from = NULL;
+        }
     }
 
     bdrv_root_unref_child(child);
@@ -1399,57 +1883,28 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs)
  * Sets the backing file link of a BDS. A new reference is created; callers
  * which don't need their own reference any more must call bdrv_unref().
  */
-void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
+void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
+                         Error **errp)
 {
     if (backing_hd) {
         bdrv_ref(backing_hd);
     }
 
     if (bs->backing) {
-        assert(bs->backing_blocker);
-        bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
         bdrv_unref_child(bs, bs->backing);
-    } else if (backing_hd) {
-        error_setg(&bs->backing_blocker,
-                   "node is used as backing hd of '%s'",
-                   bdrv_get_device_or_node_name(bs));
     }
 
     if (!backing_hd) {
-        error_free(bs->backing_blocker);
-        bs->backing_blocker = NULL;
         bs->backing = NULL;
         goto out;
     }
-    bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing);
-    bs->open_flags &= ~BDRV_O_NO_BACKING;
-    pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
-    pstrcpy(bs->backing_format, sizeof(bs->backing_format),
-            backing_hd->drv ? backing_hd->drv->format_name : "");
 
-    bdrv_op_block_all(backing_hd, bs->backing_blocker);
-    /* Otherwise we won't be able to commit or stream */
-    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
-                    bs->backing_blocker);
-    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM,
-                    bs->backing_blocker);
-    /*
-     * We do backup in 3 ways:
-     * 1. drive backup
-     *    The target bs is new opened, and the source is top BDS
-     * 2. blockdev backup
-     *    Both the source and the target are top BDSes.
-     * 3. internal backup(used for block replication)
-     *    Both the source and the target are backing file
-     *
-     * In case 1 and 2, neither the source nor the target is the backing file.
-     * In case 3, we will block the top BDS, so there is only one block job
-     * for the top BDS and its backing chain.
-     */
-    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE,
-                    bs->backing_blocker);
-    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
-                    bs->backing_blocker);
+    bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing,
+                                    errp);
+    if (!bs->backing) {
+        bdrv_unref(backing_hd);
+    }
+
 out:
     bdrv_refresh_limits(bs, NULL);
 }
@@ -1532,8 +1987,12 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
 
     /* Hook up the backing file link; drop our reference, bs owns the
      * backing_hd reference now */
-    bdrv_set_backing_hd(bs, backing_hd);
+    bdrv_set_backing_hd(bs, backing_hd, &local_err);
     bdrv_unref(backing_hd);
+    if (local_err) {
+        ret = -EINVAL;
+        goto free_exit;
+    }
 
     qdict_del(parent_options, bdref_key);
 
@@ -1543,28 +2002,12 @@ free_exit:
     return ret;
 }
 
-/*
- * Opens a disk image whose options are given as BlockdevRef in another block
- * device's options.
- *
- * If allow_none is true, no image will be opened if filename is false and no
- * BlockdevRef is given. NULL will be returned, but errp remains unset.
- *
- * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
- * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
- * itself, all options starting with "${bdref_key}." are considered part of the
- * BlockdevRef.
- *
- * The BlockdevRef will be removed from the options QDict.
- */
-BdrvChild *bdrv_open_child(const char *filename,
-                           QDict *options, const char *bdref_key,
-                           BlockDriverState* parent,
-                           const BdrvChildRole *child_role,
-                           bool allow_none, Error **errp)
+static BlockDriverState *
+bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key,
+                   BlockDriverState *parent, const BdrvChildRole *child_role,
+                   bool allow_none, Error **errp)
 {
-    BdrvChild *c = NULL;
-    BlockDriverState *bs;
+    BlockDriverState *bs = NULL;
     QDict *image_options;
     char *bdref_key_dot;
     const char *reference;
@@ -1591,10 +2034,46 @@ BdrvChild *bdrv_open_child(const char *filename,
         goto done;
     }
 
-    c = bdrv_attach_child(parent, bs, bdref_key, child_role);
-
 done:
     qdict_del(options, bdref_key);
+    return bs;
+}
+
+/*
+ * Opens a disk image whose options are given as BlockdevRef in another block
+ * device's options.
+ *
+ * If allow_none is true, no image will be opened if filename is false and no
+ * BlockdevRef is given. NULL will be returned, but errp remains unset.
+ *
+ * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
+ * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
+ * itself, all options starting with "${bdref_key}." are considered part of the
+ * BlockdevRef.
+ *
+ * The BlockdevRef will be removed from the options QDict.
+ */
+BdrvChild *bdrv_open_child(const char *filename,
+                           QDict *options, const char *bdref_key,
+                           BlockDriverState *parent,
+                           const BdrvChildRole *child_role,
+                           bool allow_none, Error **errp)
+{
+    BdrvChild *c;
+    BlockDriverState *bs;
+
+    bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_role,
+                            allow_none, errp);
+    if (bs == NULL) {
+        return NULL;
+    }
+
+    c = bdrv_attach_child(parent, bs, bdref_key, child_role, errp);
+    if (!c) {
+        bdrv_unref(bs);
+        return NULL;
+    }
+
     return c;
 }
 
@@ -1608,6 +2087,7 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
     int64_t total_size;
     QemuOpts *opts = NULL;
     BlockDriverState *bs_snapshot;
+    Error *local_err = NULL;
     int ret;
 
     /* if snapshot, we create a temporary backing file and open it
@@ -1657,7 +2137,12 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
      * call bdrv_unref() on it), so in order to be able to return one, we have
      * to increase bs_snapshot's refcount here */
     bdrv_ref(bs_snapshot);
-    bdrv_append(bs_snapshot, bs);
+    bdrv_append(bs_snapshot, bs, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+        goto out;
+    }
 
     g_free(tmp_filename);
     return bs_snapshot;
@@ -1691,7 +2176,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
                                            Error **errp)
 {
     int ret;
-    BdrvChild *file = NULL;
+    BlockBackend *file = NULL;
     BlockDriverState *bs;
     BlockDriver *drv = NULL;
     const char *drvname;
@@ -1789,13 +2274,28 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
         qdict_del(options, "backing");
     }
 
-    /* Open image file without format layer */
+    /* Open image file without format layer. This BlockBackend is only used for
+     * probing, the block drivers will do their own bdrv_open_child() for the
+     * same BDS, which is why we put the node name back into options. */
     if ((flags & BDRV_O_PROTOCOL) == 0) {
-        file = bdrv_open_child(filename, options, "file", bs,
-                               &child_file, true, &local_err);
+        BlockDriverState *file_bs;
+
+        file_bs = bdrv_open_child_bs(filename, options, "file", bs,
+                                     &child_file, true, &local_err);
         if (local_err) {
             goto fail;
         }
+        if (file_bs != NULL) {
+            file = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
+            blk_insert_bs(file, file_bs, &local_err);
+            bdrv_unref(file_bs);
+            if (local_err) {
+                goto fail;
+            }
+
+            qdict_put(options, "file",
+                      qstring_from_str(bdrv_get_node_name(file_bs)));
+        }
     }
 
     /* Image format probing */
@@ -1835,8 +2335,8 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
         goto fail;
     }
 
-    if (file && (bs->file != file)) {
-        bdrv_unref_child(bs, file);
+    if (file) {
+        blk_unref(file);
         file = NULL;
     }
 
@@ -1898,8 +2398,9 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
     return bs;
 
 fail:
-    if (file != NULL) {
-        bdrv_unref_child(bs, file);
+    blk_unref(file);
+    if (bs->file != NULL) {
+        bdrv_unref_child(bs, bs->file);
     }
     QDECREF(snapshot_options);
     QDECREF(bs->explicit_options);
@@ -2331,7 +2832,7 @@ static void bdrv_close(BlockDriverState *bs)
         bs->drv->bdrv_close(bs);
         bs->drv = NULL;
 
-        bdrv_set_backing_hd(bs, NULL);
+        bdrv_set_backing_hd(bs, NULL, &error_abort);
 
         if (bs->file != NULL) {
             bdrv_unref_child(bs, bs->file);
@@ -2391,10 +2892,13 @@ static void change_parent_backing_link(BlockDriverState *from,
     BdrvChild *c, *next, *to_c;
 
     QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
+        if (c->role->stay_at_node) {
+            continue;
+        }
         if (c->role == &child_backing) {
-            /* @from is generally not allowed to be a backing file, except for
-             * when @to is the overlay. In that case, @from may not be replaced
-             * by @to as @to's backing node. */
+            /* If @from is a backing file of @to, ignore the child to avoid
+             * creating a loop. We only want to change the pointer of other
+             * parents. */
             QLIST_FOREACH(to_c, &to->children, next) {
                 if (to_c == c) {
                     break;
@@ -2405,9 +2909,10 @@ static void change_parent_backing_link(BlockDriverState *from,
             }
         }
 
-        assert(c->role != &child_backing);
         bdrv_ref(to);
-        bdrv_replace_child(c, to);
+        /* FIXME Are we sure that bdrv_replace_child() can't run into
+         * &error_abort because of permissions? */
+        bdrv_replace_child(c, to, true);
         bdrv_unref(from);
     }
 }
@@ -2428,19 +2933,25 @@ static void change_parent_backing_link(BlockDriverState *from,
  * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
  * reference of its own, it must call bdrv_ref().
  */
-void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
+void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
+                 Error **errp)
 {
-    assert(!bdrv_requests_pending(bs_top));
-    assert(!bdrv_requests_pending(bs_new));
+    Error *local_err = NULL;
 
-    bdrv_ref(bs_top);
+    assert(!atomic_read(&bs_top->in_flight));
+    assert(!atomic_read(&bs_new->in_flight));
+
+    bdrv_set_backing_hd(bs_new, bs_top, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        goto out;
+    }
 
     change_parent_backing_link(bs_top, bs_new);
-    bdrv_set_backing_hd(bs_new, bs_top);
-    bdrv_unref(bs_top);
 
     /* bs_new is now referenced by its new parents, we don't need the
      * additional reference any more. */
+out:
     bdrv_unref(bs_new);
 }
 
@@ -2584,6 +3095,7 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
                            BlockDriverState *base, const char *backing_file_str)
 {
     BlockDriverState *new_top_bs = NULL;
+    Error *local_err = NULL;
     int ret = -EIO;
 
     if (!top->drv || !base->drv) {
@@ -2616,7 +3128,13 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
     if (ret) {
         goto exit;
     }
-    bdrv_set_backing_hd(new_top_bs, base);
+
+    bdrv_set_backing_hd(new_top_bs, base, &local_err);
+    if (local_err) {
+        ret = -EPERM;
+        error_report_err(local_err);
+        goto exit;
+    }
 
     ret = 0;
 exit:
@@ -2626,10 +3144,14 @@ exit:
 /**
  * Truncate file to 'offset' bytes (needed only for file protocols)
  */
-int bdrv_truncate(BlockDriverState *bs, int64_t offset)
+int bdrv_truncate(BdrvChild *child, int64_t offset)
 {
+    BlockDriverState *bs = child->bs;
     BlockDriver *drv = bs->drv;
     int ret;
+
+    assert(child->perm & BLK_PERM_RESIZE);
+
     if (!drv)
         return -ENOMEDIUM;
     if (!drv->bdrv_truncate)
diff --git a/block/backup.c b/block/backup.c
index fe010e78e3..d1ab617c7e 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -618,14 +618,24 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
         goto error;
     }
 
-    job = block_job_create(job_id, &backup_job_driver, bs, speed,
-                           creation_flags, cb, opaque, errp);
+    /* job->common.len is fixed, so we can't allow resize */
+    job = block_job_create(job_id, &backup_job_driver, bs,
+                           BLK_PERM_CONSISTENT_READ,
+                           BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
+                           BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD,
+                           speed, creation_flags, cb, opaque, errp);
     if (!job) {
         goto error;
     }
 
-    job->target = blk_new();
-    blk_insert_bs(job->target, target);
+    /* The target must match the source in size, so no resize here either */
+    job->target = blk_new(BLK_PERM_WRITE,
+                          BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
+                          BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD);
+    ret = blk_insert_bs(job->target, target, errp);
+    if (ret < 0) {
+        goto error;
+    }
 
     job->on_source_error = on_source_error;
     job->on_target_error = on_target_error;
@@ -652,7 +662,9 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
         job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
     }
 
-    block_job_add_bdrv(&job->common, target);
+    /* Required permissions are already taken with target's blk_new() */
+    block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
+                       &error_abort);
     job->common.len = len;
     block_job_txn_add_job(txn, &job->common);
 
diff --git a/block/blkdebug.c b/block/blkdebug.c
index d8eee1b9b4..67e8024e36 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -663,7 +663,7 @@ static int64_t blkdebug_getlength(BlockDriverState *bs)
 
 static int blkdebug_truncate(BlockDriverState *bs, int64_t offset)
 {
-    return bdrv_truncate(bs->file->bs, offset);
+    return bdrv_truncate(bs->file, offset);
 }
 
 static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
@@ -734,6 +734,8 @@ static BlockDriver bdrv_blkdebug = {
     .bdrv_file_open         = blkdebug_open,
     .bdrv_close             = blkdebug_close,
     .bdrv_reopen_prepare    = blkdebug_reopen_prepare,
+    .bdrv_child_perm        = bdrv_filter_default_perms,
+
     .bdrv_getlength         = blkdebug_getlength,
     .bdrv_truncate          = blkdebug_truncate,
     .bdrv_refresh_filename  = blkdebug_refresh_filename,
diff --git a/block/blkreplay.c b/block/blkreplay.c
index cfc8c5be02..e1102119fb 100755
--- a/block/blkreplay.c
+++ b/block/blkreplay.c
@@ -137,6 +137,7 @@ static BlockDriver bdrv_blkreplay = {
 
     .bdrv_file_open         = blkreplay_open,
     .bdrv_close             = blkreplay_close,
+    .bdrv_child_perm        = bdrv_filter_default_perms,
     .bdrv_getlength         = blkreplay_getlength,
 
     .bdrv_co_preadv         = blkreplay_co_preadv,
diff --git a/block/blkverify.c b/block/blkverify.c
index 43a940c2f5..9a1e21c6ad 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -320,6 +320,7 @@ static BlockDriver bdrv_blkverify = {
     .bdrv_parse_filename              = blkverify_parse_filename,
     .bdrv_file_open                   = blkverify_open,
     .bdrv_close                       = blkverify_close,
+    .bdrv_child_perm                  = bdrv_filter_default_perms,
     .bdrv_getlength                   = blkverify_getlength,
     .bdrv_refresh_filename            = blkverify_refresh_filename,
 
diff --git a/block/block-backend.c b/block/block-backend.c
index 819f27213a..daa7908d01 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -59,6 +59,9 @@ struct BlockBackend {
     bool iostatus_enabled;
     BlockDeviceIoStatus iostatus;
 
+    uint64_t perm;
+    uint64_t shared_perm;
+
     bool allow_write_beyond_eof;
 
     NotifierList remove_bs_notifiers, insert_bs_notifiers;
@@ -77,6 +80,7 @@ static const AIOCBInfo block_backend_aiocb_info = {
 
 static void drive_info_del(DriveInfo *dinfo);
 static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
+static char *blk_get_attached_dev_id(BlockBackend *blk);
 
 /* All BlockBackends */
 static QTAILQ_HEAD(, BlockBackend) block_backends =
@@ -99,6 +103,25 @@ static void blk_root_drained_end(BdrvChild *child);
 static void blk_root_change_media(BdrvChild *child, bool load);
 static void blk_root_resize(BdrvChild *child);
 
+static char *blk_root_get_parent_desc(BdrvChild *child)
+{
+    BlockBackend *blk = child->opaque;
+    char *dev_id;
+
+    if (blk->name) {
+        return g_strdup(blk->name);
+    }
+
+    dev_id = blk_get_attached_dev_id(blk);
+    if (*dev_id) {
+        return dev_id;
+    } else {
+        /* TODO Callback into the BB owner for something more detailed */
+        g_free(dev_id);
+        return g_strdup("a block device");
+    }
+}
+
 static const char *blk_root_get_name(BdrvChild *child)
 {
     return blk_name(child->opaque);
@@ -110,6 +133,7 @@ static const BdrvChildRole child_root = {
     .change_media       = blk_root_change_media,
     .resize             = blk_root_resize,
     .get_name           = blk_root_get_name,
+    .get_parent_desc    = blk_root_get_parent_desc,
 
     .drained_begin      = blk_root_drained_begin,
     .drained_end        = blk_root_drained_end,
@@ -117,15 +141,23 @@ static const BdrvChildRole child_root = {
 
 /*
  * Create a new BlockBackend with a reference count of one.
- * Store an error through @errp on failure, unless it's null.
+ *
+ * @perm is a bitmasks of BLK_PERM_* constants which describes the permissions
+ * to request for a block driver node that is attached to this BlockBackend.
+ * @shared_perm is a bitmask which describes which permissions may be granted
+ * to other users of the attached node.
+ * Both sets of permissions can be changed later using blk_set_perm().
+ *
  * Return the new BlockBackend on success, null on failure.
  */
-BlockBackend *blk_new(void)
+BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm)
 {
     BlockBackend *blk;
 
     blk = g_new0(BlockBackend, 1);
     blk->refcnt = 1;
+    blk->perm = perm;
+    blk->shared_perm = shared_perm;
     blk_set_enable_write_cache(blk, true);
 
     qemu_co_queue_init(&blk->public.throttled_reqs[0]);
@@ -155,15 +187,33 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
 {
     BlockBackend *blk;
     BlockDriverState *bs;
+    uint64_t perm;
+
+    /* blk_new_open() is mainly used in .bdrv_create implementations and the
+     * tools where sharing isn't a concern because the BDS stays private, so we
+     * just request permission according to the flags.
+     *
+     * The exceptions are xen_disk and blockdev_init(); in these cases, the
+     * caller of blk_new_open() doesn't make use of the permissions, but they
+     * shouldn't hurt either. We can still share everything here because the
+     * guest devices will add their own blockers if they can't share. */
+    perm = BLK_PERM_CONSISTENT_READ;
+    if (flags & BDRV_O_RDWR) {
+        perm |= BLK_PERM_WRITE;
+    }
+    if (flags & BDRV_O_RESIZE) {
+        perm |= BLK_PERM_RESIZE;
+    }
 
-    blk = blk_new();
+    blk = blk_new(perm, BLK_PERM_ALL);
     bs = bdrv_open(filename, reference, options, flags, errp);
     if (!bs) {
         blk_unref(blk);
         return NULL;
     }
 
-    blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
+    blk->root = bdrv_root_attach_child(bs, "root", &child_root,
+                                       perm, BLK_PERM_ALL, blk, &error_abort);
 
     return blk;
 }
@@ -495,16 +545,49 @@ void blk_remove_bs(BlockBackend *blk)
 /*
  * Associates a new BlockDriverState with @blk.
  */
-void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
+int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
 {
+    blk->root = bdrv_root_attach_child(bs, "root", &child_root,
+                                       blk->perm, blk->shared_perm, blk, errp);
+    if (blk->root == NULL) {
+        return -EPERM;
+    }
     bdrv_ref(bs);
-    blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
 
     notifier_list_notify(&blk->insert_bs_notifiers, blk);
     if (blk->public.throttle_state) {
         throttle_timers_attach_aio_context(
             &blk->public.throttle_timers, bdrv_get_aio_context(bs));
     }
+
+    return 0;
+}
+
+/*
+ * Sets the permission bitmasks that the user of the BlockBackend needs.
+ */
+int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
+                 Error **errp)
+{
+    int ret;
+
+    if (blk->root) {
+        ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    blk->perm = perm;
+    blk->shared_perm = shared_perm;
+
+    return 0;
+}
+
+void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
+{
+    *perm = blk->perm;
+    *shared_perm = blk->shared_perm;
 }
 
 static int blk_do_attach_dev(BlockBackend *blk, void *dev)
@@ -553,6 +636,7 @@ void blk_detach_dev(BlockBackend *blk, void *dev)
     blk->dev_ops = NULL;
     blk->dev_opaque = NULL;
     blk->guest_block_size = 512;
+    blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort);
     blk_unref(blk);
 }
 
@@ -620,19 +704,29 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
 
 /*
  * Notify @blk's attached device model of media change.
- * If @load is true, notify of media load.
- * Else, notify of media eject.
+ *
+ * If @load is true, notify of media load. This action can fail, meaning that
+ * the medium cannot be loaded. @errp is set then.
+ *
+ * If @load is false, notify of media eject. This can never fail.
+ *
  * Also send DEVICE_TRAY_MOVED events as appropriate.
  */
-void blk_dev_change_media_cb(BlockBackend *blk, bool load)
+void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp)
 {
     if (blk->dev_ops && blk->dev_ops->change_media_cb) {
         bool tray_was_open, tray_is_open;
+        Error *local_err = NULL;
 
         assert(!blk->legacy_dev);
 
         tray_was_open = blk_dev_is_tray_open(blk);
-        blk->dev_ops->change_media_cb(blk->dev_opaque, load);
+        blk->dev_ops->change_media_cb(blk->dev_opaque, load, &local_err);
+        if (local_err) {
+            assert(load == true);
+            error_propagate(errp, local_err);
+            return;
+        }
         tray_is_open = blk_dev_is_tray_open(blk);
 
         if (tray_was_open != tray_is_open) {
@@ -646,7 +740,7 @@ void blk_dev_change_media_cb(BlockBackend *blk, bool load)
 
 static void blk_root_change_media(BdrvChild *child, bool load)
 {
-    blk_dev_change_media_cb(child->opaque, load);
+    blk_dev_change_media_cb(child->opaque, load, NULL);
 }
 
 /*
@@ -1605,7 +1699,7 @@ int blk_truncate(BlockBackend *blk, int64_t offset)
         return -ENOMEDIUM;
     }
 
-    return bdrv_truncate(blk_bs(blk), offset);
+    return bdrv_truncate(blk->root, offset);
 }
 
 static void blk_pdiscard_entry(void *opaque)
diff --git a/block/bochs.c b/block/bochs.c
index 8c9652ebeb..516da56c3b 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -104,6 +104,12 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
     struct bochs_header bochs;
     int ret;
 
+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
+                               false, errp);
+    if (!bs->file) {
+        return -EINVAL;
+    }
+
     bs->read_only = true; /* no write support yet */
 
     ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs));
@@ -287,6 +293,7 @@ static BlockDriver bdrv_bochs = {
     .instance_size	= sizeof(BDRVBochsState),
     .bdrv_probe		= bochs_probe,
     .bdrv_open		= bochs_open,
+    .bdrv_child_perm     = bdrv_format_default_perms,
     .bdrv_refresh_limits = bochs_refresh_limits,
     .bdrv_co_preadv = bochs_co_preadv,
     .bdrv_close		= bochs_close,
diff --git a/block/cloop.c b/block/cloop.c
index 7b75f7ef7b..a6c7b9dbe6 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -66,6 +66,12 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
     uint32_t offsets_size, max_compressed_block_size = 1, i;
     int ret;
 
+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
+                               false, errp);
+    if (!bs->file) {
+        return -EINVAL;
+    }
+
     bs->read_only = true;
 
     /* read header */
@@ -284,6 +290,7 @@ static BlockDriver bdrv_cloop = {
     .instance_size  = sizeof(BDRVCloopState),
     .bdrv_probe     = cloop_probe,
     .bdrv_open      = cloop_open,
+    .bdrv_child_perm     = bdrv_format_default_perms,
     .bdrv_refresh_limits = cloop_refresh_limits,
     .bdrv_co_preadv = cloop_co_preadv,
     .bdrv_close     = cloop_close,
diff --git a/block/commit.c b/block/commit.c
index c284e8535d..22a0a4db98 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -36,6 +36,7 @@ typedef struct CommitBlockJob {
     BlockJob common;
     RateLimit limit;
     BlockDriverState *active;
+    BlockDriverState *commit_top_bs;
     BlockBackend *top;
     BlockBackend *base;
     BlockdevOnError on_error;
@@ -83,12 +84,23 @@ static void commit_complete(BlockJob *job, void *opaque)
     BlockDriverState *active = s->active;
     BlockDriverState *top = blk_bs(s->top);
     BlockDriverState *base = blk_bs(s->base);
-    BlockDriverState *overlay_bs = bdrv_find_overlay(active, top);
+    BlockDriverState *overlay_bs = bdrv_find_overlay(active, s->commit_top_bs);
     int ret = data->ret;
+    bool remove_commit_top_bs = false;
+
+    /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before
+     * the normal backing chain can be restored. */
+    blk_unref(s->base);
 
     if (!block_job_is_cancelled(&s->common) && ret == 0) {
         /* success */
-        ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str);
+        ret = bdrv_drop_intermediate(active, s->commit_top_bs, base,
+                                     s->backing_file_str);
+    } else if (overlay_bs) {
+        /* XXX Can (or should) we somehow keep 'consistent read' blocked even
+         * after the failed/cancelled commit job is gone? If we already wrote
+         * something to base, the intermediate images aren't valid any more. */
+        remove_commit_top_bs = true;
     }
 
     /* restore base open flags here if appropriate (e.g., change the base back
@@ -102,9 +114,15 @@ static void commit_complete(BlockJob *job, void *opaque)
     }
     g_free(s->backing_file_str);
     blk_unref(s->top);
-    blk_unref(s->base);
     block_job_completed(&s->common, ret);
     g_free(data);
+
+    /* If bdrv_drop_intermediate() didn't already do that, remove the commit
+     * filter driver from the backing chain. Do this as the final step so that
+     * the 'consistent read' permission can be granted.  */
+    if (remove_commit_top_bs) {
+        bdrv_set_backing_hd(overlay_bs, top, &error_abort);
+    }
 }
 
 static void coroutine_fn commit_run(void *opaque)
@@ -208,10 +226,38 @@ static const BlockJobDriver commit_job_driver = {
     .start         = commit_run,
 };
 
+static int coroutine_fn bdrv_commit_top_preadv(BlockDriverState *bs,
+    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+    return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
+}
+
+static void bdrv_commit_top_close(BlockDriverState *bs)
+{
+}
+
+static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c,
+                                       const BdrvChildRole *role,
+                                       uint64_t perm, uint64_t shared,
+                                       uint64_t *nperm, uint64_t *nshared)
+{
+    *nperm = 0;
+    *nshared = BLK_PERM_ALL;
+}
+
+/* Dummy node that provides consistent read to its users without requiring it
+ * from its backing file and that allows writes on the backing file chain. */
+static BlockDriver bdrv_commit_top = {
+    .format_name        = "commit_top",
+    .bdrv_co_preadv     = bdrv_commit_top_preadv,
+    .bdrv_close         = bdrv_commit_top_close,
+    .bdrv_child_perm    = bdrv_commit_top_child_perm,
+};
+
 void commit_start(const char *job_id, BlockDriverState *bs,
                   BlockDriverState *base, BlockDriverState *top, int64_t speed,
                   BlockdevOnError on_error, const char *backing_file_str,
-                  Error **errp)
+                  const char *filter_node_name, Error **errp)
 {
     CommitBlockJob *s;
     BlockReopenQueue *reopen_queue = NULL;
@@ -219,7 +265,9 @@ void commit_start(const char *job_id, BlockDriverState *bs,
     int orig_base_flags;
     BlockDriverState *iter;
     BlockDriverState *overlay_bs;
+    BlockDriverState *commit_top_bs = NULL;
     Error *local_err = NULL;
+    int ret;
 
     assert(top != bs);
     if (top == base) {
@@ -234,8 +282,8 @@ void commit_start(const char *job_id, BlockDriverState *bs,
         return;
     }
 
-    s = block_job_create(job_id, &commit_job_driver, bs, speed,
-                         BLOCK_JOB_DEFAULT, NULL, NULL, errp);
+    s = block_job_create(job_id, &commit_job_driver, bs, 0, BLK_PERM_ALL,
+                         speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp);
     if (!s) {
         return;
     }
@@ -256,30 +304,70 @@ void commit_start(const char *job_id, BlockDriverState *bs,
         bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err);
         if (local_err != NULL) {
             error_propagate(errp, local_err);
-            block_job_unref(&s->common);
-            return;
+            goto fail;
         }
     }
 
+    /* Insert commit_top block node above top, so we can block consistent read
+     * on the backing chain below it */
+    commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, filter_node_name, 0,
+                                         errp);
+    if (commit_top_bs == NULL) {
+        goto fail;
+    }
+
+    bdrv_set_backing_hd(commit_top_bs, top, &error_abort);
+    bdrv_set_backing_hd(overlay_bs, commit_top_bs, &error_abort);
+
+    s->commit_top_bs = commit_top_bs;
+    bdrv_unref(commit_top_bs);
 
     /* Block all nodes between top and base, because they will
      * disappear from the chain after this operation. */
     assert(bdrv_chain_contains(top, base));
-    for (iter = top; iter != backing_bs(base); iter = backing_bs(iter)) {
-        block_job_add_bdrv(&s->common, iter);
+    for (iter = top; iter != base; iter = backing_bs(iter)) {
+        /* XXX BLK_PERM_WRITE needs to be allowed so we don't block ourselves
+         * at s->base (if writes are blocked for a node, they are also blocked
+         * for its backing file). The other options would be a second filter
+         * driver above s->base. */
+        ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
+                                 BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE,
+                                 errp);
+        if (ret < 0) {
+            goto fail;
+        }
+    }
+
+    ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp);
+    if (ret < 0) {
+        goto fail;
     }
+
     /* overlay_bs must be blocked because it needs to be modified to
-     * update the backing image string, but if it's the root node then
-     * don't block it again */
-    if (bs != overlay_bs) {
-        block_job_add_bdrv(&s->common, overlay_bs);
+     * update the backing image string. */
+    ret = block_job_add_bdrv(&s->common, "overlay of top", overlay_bs,
+                             BLK_PERM_GRAPH_MOD, BLK_PERM_ALL, errp);
+    if (ret < 0) {
+        goto fail;
     }
 
-    s->base = blk_new();
-    blk_insert_bs(s->base, base);
+    s->base = blk_new(BLK_PERM_CONSISTENT_READ
+                      | BLK_PERM_WRITE
+                      | BLK_PERM_RESIZE,
+                      BLK_PERM_CONSISTENT_READ
+                      | BLK_PERM_GRAPH_MOD
+                      | BLK_PERM_WRITE_UNCHANGED);
+    ret = blk_insert_bs(s->base, base, errp);
+    if (ret < 0) {
+        goto fail;
+    }
 
-    s->top = blk_new();
-    blk_insert_bs(s->top, top);
+    /* Required permissions are already taken with block_job_add_bdrv() */
+    s->top = blk_new(0, BLK_PERM_ALL);
+    blk_insert_bs(s->top, top, errp);
+    if (ret < 0) {
+        goto fail;
+    }
 
     s->active = bs;
 
@@ -292,6 +380,19 @@ void commit_start(const char *job_id, BlockDriverState *bs,
 
     trace_commit_start(bs, base, top, s);
     block_job_start(&s->common);
+    return;
+
+fail:
+    if (s->base) {
+        blk_unref(s->base);
+    }
+    if (s->top) {
+        blk_unref(s->top);
+    }
+    if (commit_top_bs) {
+        bdrv_set_backing_hd(overlay_bs, top, &error_abort);
+    }
+    block_job_unref(&s->common);
 }
 
 
@@ -301,11 +402,14 @@ void commit_start(const char *job_id, BlockDriverState *bs,
 int bdrv_commit(BlockDriverState *bs)
 {
     BlockBackend *src, *backing;
+    BlockDriverState *backing_file_bs = NULL;
+    BlockDriverState *commit_top_bs = NULL;
     BlockDriver *drv = bs->drv;
     int64_t sector, total_sectors, length, backing_length;
     int n, ro, open_flags;
     int ret = 0;
     uint8_t *buf = NULL;
+    Error *local_err = NULL;
 
     if (!drv)
         return -ENOMEDIUM;
@@ -328,11 +432,33 @@ int bdrv_commit(BlockDriverState *bs)
         }
     }
 
-    src = blk_new();
-    blk_insert_bs(src, bs);
+    src = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
+    backing = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
 
-    backing = blk_new();
-    blk_insert_bs(backing, bs->backing->bs);
+    ret = blk_insert_bs(src, bs, &local_err);
+    if (ret < 0) {
+        error_report_err(local_err);
+        goto ro_cleanup;
+    }
+
+    /* Insert commit_top block node above backing, so we can write to it */
+    backing_file_bs = backing_bs(bs);
+
+    commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, NULL, BDRV_O_RDWR,
+                                         &local_err);
+    if (commit_top_bs == NULL) {
+        error_report_err(local_err);
+        goto ro_cleanup;
+    }
+
+    bdrv_set_backing_hd(commit_top_bs, backing_file_bs, &error_abort);
+    bdrv_set_backing_hd(bs, commit_top_bs, &error_abort);
+
+    ret = blk_insert_bs(backing, backing_file_bs, &local_err);
+    if (ret < 0) {
+        error_report_err(local_err);
+        goto ro_cleanup;
+    }
 
     length = blk_getlength(src);
     if (length < 0) {
@@ -404,8 +530,12 @@ int bdrv_commit(BlockDriverState *bs)
 ro_cleanup:
     qemu_vfree(buf);
 
-    blk_unref(src);
     blk_unref(backing);
+    if (backing_file_bs) {
+        bdrv_set_backing_hd(bs, backing_file_bs, &error_abort);
+    }
+    bdrv_unref(commit_top_bs);
+    blk_unref(src);
 
     if (ro) {
         /* ignoring error return here */
diff --git a/block/crypto.c b/block/crypto.c
index 7aa7eb553e..4a2038888d 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -300,6 +300,12 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
     QCryptoBlockOpenOptions *open_opts = NULL;
     unsigned int cflags = 0;
 
+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
+                               false, errp);
+    if (!bs->file) {
+        return -EINVAL;
+    }
+
     opts = qemu_opts_create(opts_spec, NULL, 0, &error_abort);
     qemu_opts_absorb_qdict(opts, options, &local_err);
     if (local_err) {
@@ -383,7 +389,7 @@ static int block_crypto_truncate(BlockDriverState *bs, int64_t offset)
 
     offset += payload_offset;
 
-    return bdrv_truncate(bs->file->bs, offset);
+    return bdrv_truncate(bs->file, offset);
 }
 
 static void block_crypto_close(BlockDriverState *bs)
@@ -622,6 +628,7 @@ BlockDriver bdrv_crypto_luks = {
     .bdrv_probe         = block_crypto_probe_luks,
     .bdrv_open          = block_crypto_open_luks,
     .bdrv_close         = block_crypto_close,
+    .bdrv_child_perm    = bdrv_format_default_perms,
     .bdrv_create        = block_crypto_create_luks,
     .bdrv_truncate      = block_crypto_truncate,
     .create_opts        = &block_crypto_create_opts_luks,
diff --git a/block/curl.c b/block/curl.c
index 2939cc77e9..e83dcd8f50 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -135,6 +135,7 @@ typedef struct BDRVCURLState {
     char *cookie;
     bool accept_range;
     AioContext *aio_context;
+    QemuMutex mutex;
     char *username;
     char *password;
     char *proxyusername;
@@ -333,6 +334,7 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
     return FIND_RET_NONE;
 }
 
+/* Called with s->mutex held.  */
 static void curl_multi_check_completion(BDRVCURLState *s)
 {
     int msgs_in_queue;
@@ -374,7 +376,9 @@ static void curl_multi_check_completion(BDRVCURLState *s)
                         continue;
                     }
 
+                    qemu_mutex_unlock(&s->mutex);
                     acb->common.cb(acb->common.opaque, -EPROTO);
+                    qemu_mutex_lock(&s->mutex);
                     qemu_aio_unref(acb);
                     state->acb[i] = NULL;
                 }
@@ -386,6 +390,7 @@ static void curl_multi_check_completion(BDRVCURLState *s)
     }
 }
 
+/* Called with s->mutex held.  */
 static void curl_multi_do_locked(CURLState *s)
 {
     CURLSocket *socket, *next_socket;
@@ -409,19 +414,19 @@ static void curl_multi_do(void *arg)
 {
     CURLState *s = (CURLState *)arg;
 
-    aio_context_acquire(s->s->aio_context);
+    qemu_mutex_lock(&s->s->mutex);
     curl_multi_do_locked(s);
-    aio_context_release(s->s->aio_context);
+    qemu_mutex_unlock(&s->s->mutex);
 }
 
 static void curl_multi_read(void *arg)
 {
     CURLState *s = (CURLState *)arg;
 
-    aio_context_acquire(s->s->aio_context);
+    qemu_mutex_lock(&s->s->mutex);
     curl_multi_do_locked(s);
     curl_multi_check_completion(s->s);
-    aio_context_release(s->s->aio_context);
+    qemu_mutex_unlock(&s->s->mutex);
 }
 
 static void curl_multi_timeout_do(void *arg)
@@ -434,11 +439,11 @@ static void curl_multi_timeout_do(void *arg)
         return;
     }
 
-    aio_context_acquire(s->aio_context);
+    qemu_mutex_lock(&s->mutex);
     curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
 
     curl_multi_check_completion(s);
-    aio_context_release(s->aio_context);
+    qemu_mutex_unlock(&s->mutex);
 #else
     abort();
 #endif
@@ -771,6 +776,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
     curl_easy_cleanup(state->curl);
     state->curl = NULL;
 
+    qemu_mutex_init(&s->mutex);
     curl_attach_aio_context(bs, bdrv_get_aio_context(bs));
 
     qemu_opts_del(opts);
@@ -801,12 +807,11 @@ static void curl_readv_bh_cb(void *p)
     CURLAIOCB *acb = p;
     BlockDriverState *bs = acb->common.bs;
     BDRVCURLState *s = bs->opaque;
-    AioContext *ctx = bdrv_get_aio_context(bs);
 
     size_t start = acb->sector_num * BDRV_SECTOR_SIZE;
     size_t end;
 
-    aio_context_acquire(ctx);
+    qemu_mutex_lock(&s->mutex);
 
     // In case we have the requested data already (e.g. read-ahead),
     // we can just call the callback and be done.
@@ -854,7 +859,7 @@ static void curl_readv_bh_cb(void *p)
     curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
 
 out:
-    aio_context_release(ctx);
+    qemu_mutex_unlock(&s->mutex);
     if (ret != -EINPROGRESS) {
         acb->common.cb(acb->common.opaque, ret);
         qemu_aio_unref(acb);
@@ -883,6 +888,7 @@ static void curl_close(BlockDriverState *bs)
 
     DPRINTF("CURL: Close\n");
     curl_detach_aio_context(bs);
+    qemu_mutex_destroy(&s->mutex);
 
     g_free(s->cookie);
     g_free(s->url);
diff --git a/block/dmg.c b/block/dmg.c
index 58a3ae86c1..a7d25fc47b 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -413,6 +413,12 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
     int64_t offset;
     int ret;
 
+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
+                               false, errp);
+    if (!bs->file) {
+        return -EINVAL;
+    }
+
     block_module_load_one("dmg-bz2");
     bs->read_only = true;
 
@@ -691,6 +697,7 @@ static BlockDriver bdrv_dmg = {
     .bdrv_probe     = dmg_probe,
     .bdrv_open      = dmg_open,
     .bdrv_refresh_limits = dmg_refresh_limits,
+    .bdrv_child_perm     = bdrv_format_default_perms,
     .bdrv_co_preadv = dmg_co_preadv,
     .bdrv_close     = dmg_close,
 };
diff --git a/block/file-posix.c b/block/file-posix.c
index 2134e0ef96..4de1abd023 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -1591,18 +1591,17 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
 #endif
     }
 
-    if (ftruncate(fd, total_size) != 0) {
-        result = -errno;
-        error_setg_errno(errp, -result, "Could not resize file");
-        goto out_close;
-    }
-
     switch (prealloc) {
 #ifdef CONFIG_POSIX_FALLOCATE
     case PREALLOC_MODE_FALLOC:
-        /* posix_fallocate() doesn't set errno. */
+        /*
+         * Truncating before posix_fallocate() makes it about twice slower on
+         * file systems that do not support fallocate(), trying to check if a
+         * block is allocated before allocating it, so don't do that here.
+         */
         result = -posix_fallocate(fd, 0, total_size);
         if (result != 0) {
+            /* posix_fallocate() doesn't set errno. */
             error_setg_errno(errp, -result,
                              "Could not preallocate data for the new file");
         }
@@ -1610,6 +1609,17 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
 #endif
     case PREALLOC_MODE_FULL:
     {
+        /*
+         * Knowing the final size from the beginning could allow the file
+         * system driver to do less allocations and possibly avoid
+         * fragmentation of the file.
+         */
+        if (ftruncate(fd, total_size) != 0) {
+            result = -errno;
+            error_setg_errno(errp, -result, "Could not resize file");
+            goto out_close;
+        }
+
         int64_t num = 0, left = total_size;
         buf = g_malloc0(65536);
 
@@ -1636,6 +1646,10 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
         break;
     }
     case PREALLOC_MODE_OFF:
+        if (ftruncate(fd, total_size) != 0) {
+            result = -errno;
+            error_setg_errno(errp, -result, "Could not resize file");
+        }
         break;
     default:
         result = -EINVAL;
diff --git a/block/io.c b/block/io.c
index d5c45447fd..8f38d46de0 100644
--- a/block/io.c
+++ b/block/io.c
@@ -925,9 +925,11 @@ bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
     return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov);
 }
 
-static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
+static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
         int64_t offset, unsigned int bytes, QEMUIOVector *qiov)
 {
+    BlockDriverState *bs = child->bs;
+
     /* Perform I/O through a temporary buffer so that users who scribble over
      * their read buffer while the operation is in progress do not end up
      * modifying the image file.  This is critical for zero-copy guest I/O
@@ -943,6 +945,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
     size_t skip_bytes;
     int ret;
 
+    assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
+
     /* Cover entire cluster so no additional backing file I/O is required when
      * allocating cluster in the image file.
      */
@@ -1001,10 +1005,11 @@ err:
  * handles copy on read, zeroing after EOF, and fragmentation of large
  * reads; any other features must be implemented by the caller.
  */
-static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
+static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
     BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
     int64_t align, QEMUIOVector *qiov, int flags)
 {
+    BlockDriverState *bs = child->bs;
     int64_t total_bytes, max_bytes;
     int ret = 0;
     uint64_t bytes_remaining = bytes;
@@ -1050,7 +1055,7 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
         }
 
         if (!ret || pnum != nb_sectors) {
-            ret = bdrv_co_do_copy_on_readv(bs, offset, bytes, qiov);
+            ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov);
             goto out;
         }
     }
@@ -1158,7 +1163,7 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
     }
 
     tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
-    ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
+    ret = bdrv_aligned_preadv(child, &req, offset, bytes, align,
                               use_local_qiov ? &local_qiov : qiov,
                               flags);
     tracked_request_end(&req);
@@ -1306,10 +1311,11 @@ fail:
  * Forwards an already correctly aligned write request to the BlockDriver,
  * after possibly fragmenting it.
  */
-static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
+static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
     BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
     int64_t align, QEMUIOVector *qiov, int flags)
 {
+    BlockDriverState *bs = child->bs;
     BlockDriver *drv = bs->drv;
     bool waited;
     int ret;
@@ -1332,6 +1338,8 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
     assert(!waited || !req->serialising);
     assert(req->overlap_offset <= offset);
     assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
+    assert(child->perm & BLK_PERM_WRITE);
+    assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);
 
     ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
 
@@ -1397,12 +1405,13 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
     return ret;
 }
 
-static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
+static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
                                                 int64_t offset,
                                                 unsigned int bytes,
                                                 BdrvRequestFlags flags,
                                                 BdrvTrackedRequest *req)
 {
+    BlockDriverState *bs = child->bs;
     uint8_t *buf = NULL;
     QEMUIOVector local_qiov;
     struct iovec iov;
@@ -1430,7 +1439,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
         mark_request_serialising(req, align);
         wait_serialising_requests(req);
         bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
-        ret = bdrv_aligned_preadv(bs, req, offset & ~(align - 1), align,
+        ret = bdrv_aligned_preadv(child, req, offset & ~(align - 1), align,
                                   align, &local_qiov, 0);
         if (ret < 0) {
             goto fail;
@@ -1438,7 +1447,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
         bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
 
         memset(buf + head_padding_bytes, 0, zero_bytes);
-        ret = bdrv_aligned_pwritev(bs, req, offset & ~(align - 1), align,
+        ret = bdrv_aligned_pwritev(child, req, offset & ~(align - 1), align,
                                    align, &local_qiov,
                                    flags & ~BDRV_REQ_ZERO_WRITE);
         if (ret < 0) {
@@ -1452,7 +1461,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
     if (bytes >= align) {
         /* Write the aligned part in the middle. */
         uint64_t aligned_bytes = bytes & ~(align - 1);
-        ret = bdrv_aligned_pwritev(bs, req, offset, aligned_bytes, align,
+        ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
                                    NULL, flags);
         if (ret < 0) {
             goto fail;
@@ -1468,7 +1477,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
         mark_request_serialising(req, align);
         wait_serialising_requests(req);
         bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
-        ret = bdrv_aligned_preadv(bs, req, offset, align,
+        ret = bdrv_aligned_preadv(child, req, offset, align,
                                   align, &local_qiov, 0);
         if (ret < 0) {
             goto fail;
@@ -1476,7 +1485,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
         bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
 
         memset(buf, 0, bytes);
-        ret = bdrv_aligned_pwritev(bs, req, offset, align, align,
+        ret = bdrv_aligned_pwritev(child, req, offset, align, align,
                                    &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE);
     }
 fail:
@@ -1523,7 +1532,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
     tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
 
     if (!qiov) {
-        ret = bdrv_co_do_zero_pwritev(bs, offset, bytes, flags, &req);
+        ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req);
         goto out;
     }
 
@@ -1542,7 +1551,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
         qemu_iovec_init_external(&head_qiov, &head_iov, 1);
 
         bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
-        ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
+        ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align,
                                   align, &head_qiov, 0);
         if (ret < 0) {
             goto fail;
@@ -1584,8 +1593,8 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
         qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
 
         bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
-        ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
-                                  align, &tail_qiov, 0);
+        ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1),
+                                  align, align, &tail_qiov, 0);
         if (ret < 0) {
             goto fail;
         }
@@ -1603,7 +1612,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
         bytes = ROUND_UP(bytes, align);
     }
 
-    ret = bdrv_aligned_pwritev(bs, &req, offset, bytes, align,
+    ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
                                use_local_qiov ? &local_qiov : qiov,
                                flags);
 
diff --git a/block/iscsi.c b/block/iscsi.c
index 2561be90de..76319a1a6e 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -58,6 +58,7 @@ typedef struct IscsiLun {
     int events;
     QEMUTimer *nop_timer;
     QEMUTimer *event_timer;
+    QemuMutex mutex;
     struct scsi_inquiry_logical_block_provisioning lbp;
     struct scsi_inquiry_block_limits bl;
     unsigned char *zeroblock;
@@ -252,6 +253,7 @@ static int iscsi_translate_sense(struct scsi_sense *sense)
     return ret;
 }
 
+/* Called (via iscsi_service) with QemuMutex held.  */
 static void
 iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
                         void *command_data, void *opaque)
@@ -352,6 +354,7 @@ static const AIOCBInfo iscsi_aiocb_info = {
 static void iscsi_process_read(void *arg);
 static void iscsi_process_write(void *arg);
 
+/* Called with QemuMutex held.  */
 static void
 iscsi_set_events(IscsiLun *iscsilun)
 {
@@ -395,10 +398,10 @@ iscsi_process_read(void *arg)
     IscsiLun *iscsilun = arg;
     struct iscsi_context *iscsi = iscsilun->iscsi;
 
-    aio_context_acquire(iscsilun->aio_context);
+    qemu_mutex_lock(&iscsilun->mutex);
     iscsi_service(iscsi, POLLIN);
     iscsi_set_events(iscsilun);
-    aio_context_release(iscsilun->aio_context);
+    qemu_mutex_unlock(&iscsilun->mutex);
 }
 
 static void
@@ -407,10 +410,10 @@ iscsi_process_write(void *arg)
     IscsiLun *iscsilun = arg;
     struct iscsi_context *iscsi = iscsilun->iscsi;
 
-    aio_context_acquire(iscsilun->aio_context);
+    qemu_mutex_lock(&iscsilun->mutex);
     iscsi_service(iscsi, POLLOUT);
     iscsi_set_events(iscsilun);
-    aio_context_release(iscsilun->aio_context);
+    qemu_mutex_unlock(&iscsilun->mutex);
 }
 
 static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
@@ -589,6 +592,7 @@ iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
     uint64_t lba;
     uint32_t num_sectors;
     bool fua = flags & BDRV_REQ_FUA;
+    int r = 0;
 
     if (fua) {
         assert(iscsilun->dpofua);
@@ -604,6 +608,7 @@ iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
     lba = sector_qemu2lun(sector_num, iscsilun);
     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
     iscsi_co_init_iscsitask(iscsilun, &iTask);
+    qemu_mutex_lock(&iscsilun->mutex);
 retry:
     if (iscsilun->use_16_for_rw) {
 #if LIBISCSI_API_VERSION >= (20160603)
@@ -640,7 +645,9 @@ retry:
 #endif
     while (!iTask.complete) {
         iscsi_set_events(iscsilun);
+        qemu_mutex_unlock(&iscsilun->mutex);
         qemu_coroutine_yield();
+        qemu_mutex_lock(&iscsilun->mutex);
     }
 
     if (iTask.task != NULL) {
@@ -655,12 +662,15 @@ retry:
 
     if (iTask.status != SCSI_STATUS_GOOD) {
         iscsi_allocmap_set_invalid(iscsilun, sector_num, nb_sectors);
-        return iTask.err_code;
+        r = iTask.err_code;
+        goto out_unlock;
     }
 
     iscsi_allocmap_set_allocated(iscsilun, sector_num, nb_sectors);
 
-    return 0;
+out_unlock:
+    qemu_mutex_unlock(&iscsilun->mutex);
+    return r;
 }
 
 
@@ -693,18 +703,21 @@ static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
         goto out;
     }
 
+    qemu_mutex_lock(&iscsilun->mutex);
 retry:
     if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
                                   sector_qemu2lun(sector_num, iscsilun),
                                   8 + 16, iscsi_co_generic_cb,
                                   &iTask) == NULL) {
         ret = -ENOMEM;
-        goto out;
+        goto out_unlock;
     }
 
     while (!iTask.complete) {
         iscsi_set_events(iscsilun);
+        qemu_mutex_unlock(&iscsilun->mutex);
         qemu_coroutine_yield();
+        qemu_mutex_lock(&iscsilun->mutex);
     }
 
     if (iTask.do_retry) {
@@ -721,20 +734,20 @@ retry:
          * because the device is busy or the cmd is not
          * supported) we pretend all blocks are allocated
          * for backwards compatibility */
-        goto out;
+        goto out_unlock;
     }
 
     lbas = scsi_datain_unmarshall(iTask.task);
     if (lbas == NULL) {
         ret = -EIO;
-        goto out;
+        goto out_unlock;
     }
 
     lbasd = &lbas->descriptors[0];
 
     if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
         ret = -EIO;
-        goto out;
+        goto out_unlock;
     }
 
     *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
@@ -756,6 +769,8 @@ retry:
     if (*pnum > nb_sectors) {
         *pnum = nb_sectors;
     }
+out_unlock:
+    qemu_mutex_unlock(&iscsilun->mutex);
 out:
     if (iTask.task != NULL) {
         scsi_free_scsi_task(iTask.task);
@@ -818,6 +833,7 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
 
     iscsi_co_init_iscsitask(iscsilun, &iTask);
+    qemu_mutex_lock(&iscsilun->mutex);
 retry:
     if (iscsilun->use_16_for_rw) {
 #if LIBISCSI_API_VERSION >= (20160603)
@@ -855,7 +871,9 @@ retry:
 #endif
     while (!iTask.complete) {
         iscsi_set_events(iscsilun);
+        qemu_mutex_unlock(&iscsilun->mutex);
         qemu_coroutine_yield();
+        qemu_mutex_lock(&iscsilun->mutex);
     }
 
     if (iTask.task != NULL) {
@@ -867,6 +885,7 @@ retry:
         iTask.complete = 0;
         goto retry;
     }
+    qemu_mutex_unlock(&iscsilun->mutex);
 
     if (iTask.status != SCSI_STATUS_GOOD) {
         return iTask.err_code;
@@ -881,6 +900,7 @@ static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
     struct IscsiTask iTask;
 
     iscsi_co_init_iscsitask(iscsilun, &iTask);
+    qemu_mutex_lock(&iscsilun->mutex);
 retry:
     if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
                                       0, iscsi_co_generic_cb, &iTask) == NULL) {
@@ -889,7 +909,9 @@ retry:
 
     while (!iTask.complete) {
         iscsi_set_events(iscsilun);
+        qemu_mutex_unlock(&iscsilun->mutex);
         qemu_coroutine_yield();
+        qemu_mutex_lock(&iscsilun->mutex);
     }
 
     if (iTask.task != NULL) {
@@ -901,6 +923,7 @@ retry:
         iTask.complete = 0;
         goto retry;
     }
+    qemu_mutex_unlock(&iscsilun->mutex);
 
     if (iTask.status != SCSI_STATUS_GOOD) {
         return iTask.err_code;
@@ -910,6 +933,7 @@ retry:
 }
 
 #ifdef __linux__
+/* Called (via iscsi_service) with QemuMutex held.  */
 static void
 iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
                      void *command_data, void *opaque)
@@ -1034,6 +1058,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
     acb->task->expxferlen = acb->ioh->dxfer_len;
 
     data.size = 0;
+    qemu_mutex_lock(&iscsilun->mutex);
     if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
         if (acb->ioh->iovec_count == 0) {
             data.data = acb->ioh->dxferp;
@@ -1049,6 +1074,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
                                  iscsi_aio_ioctl_cb,
                                  (data.size > 0) ? &data : NULL,
                                  acb) != 0) {
+        qemu_mutex_unlock(&iscsilun->mutex);
         scsi_free_scsi_task(acb->task);
         qemu_aio_unref(acb);
         return NULL;
@@ -1068,6 +1094,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
     }
 
     iscsi_set_events(iscsilun);
+    qemu_mutex_unlock(&iscsilun->mutex);
 
     return &acb->common;
 }
@@ -1092,6 +1119,7 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
     IscsiLun *iscsilun = bs->opaque;
     struct IscsiTask iTask;
     struct unmap_list list;
+    int r = 0;
 
     if (!is_byte_request_lun_aligned(offset, count, iscsilun)) {
         return -ENOTSUP;
@@ -1106,15 +1134,19 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
     list.num = count / iscsilun->block_size;
 
     iscsi_co_init_iscsitask(iscsilun, &iTask);
+    qemu_mutex_lock(&iscsilun->mutex);
 retry:
     if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
                          iscsi_co_generic_cb, &iTask) == NULL) {
-        return -ENOMEM;
+        r = -ENOMEM;
+        goto out_unlock;
     }
 
     while (!iTask.complete) {
         iscsi_set_events(iscsilun);
+        qemu_mutex_unlock(&iscsilun->mutex);
         qemu_coroutine_yield();
+        qemu_mutex_lock(&iscsilun->mutex);
     }
 
     if (iTask.task != NULL) {
@@ -1131,17 +1163,20 @@ retry:
         /* the target might fail with a check condition if it
            is not happy with the alignment of the UNMAP request
            we silently fail in this case */
-        return 0;
+        goto out_unlock;
     }
 
     if (iTask.status != SCSI_STATUS_GOOD) {
-        return iTask.err_code;
+        r = iTask.err_code;
+        goto out_unlock;
     }
 
     iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
                                count >> BDRV_SECTOR_BITS);
 
-    return 0;
+out_unlock:
+    qemu_mutex_unlock(&iscsilun->mutex);
+    return r;
 }
 
 static int
@@ -1153,6 +1188,7 @@ coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
     uint64_t lba;
     uint32_t nb_blocks;
     bool use_16_for_ws = iscsilun->use_16_for_rw;
+    int r = 0;
 
     if (!is_byte_request_lun_aligned(offset, count, iscsilun)) {
         return -ENOTSUP;
@@ -1186,6 +1222,7 @@ coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
         }
     }
 
+    qemu_mutex_lock(&iscsilun->mutex);
     iscsi_co_init_iscsitask(iscsilun, &iTask);
 retry:
     if (use_16_for_ws) {
@@ -1205,7 +1242,9 @@ retry:
 
     while (!iTask.complete) {
         iscsi_set_events(iscsilun);
+        qemu_mutex_unlock(&iscsilun->mutex);
         qemu_coroutine_yield();
+        qemu_mutex_lock(&iscsilun->mutex);
     }
 
     if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
@@ -1215,7 +1254,8 @@ retry:
         /* WRITE SAME is not supported by the target */
         iscsilun->has_write_same = false;
         scsi_free_scsi_task(iTask.task);
-        return -ENOTSUP;
+        r = -ENOTSUP;
+        goto out_unlock;
     }
 
     if (iTask.task != NULL) {
@@ -1231,7 +1271,8 @@ retry:
     if (iTask.status != SCSI_STATUS_GOOD) {
         iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
                                    count >> BDRV_SECTOR_BITS);
-        return iTask.err_code;
+        r = iTask.err_code;
+        goto out_unlock;
     }
 
     if (flags & BDRV_REQ_MAY_UNMAP) {
@@ -1242,32 +1283,19 @@ retry:
                                      count >> BDRV_SECTOR_BITS);
     }
 
-    return 0;
+out_unlock:
+    qemu_mutex_unlock(&iscsilun->mutex);
+    return r;
 }
 
-static void parse_chap(struct iscsi_context *iscsi, const char *target,
+static void apply_chap(struct iscsi_context *iscsi, QemuOpts *opts,
                        Error **errp)
 {
-    QemuOptsList *list;
-    QemuOpts *opts;
     const char *user = NULL;
     const char *password = NULL;
     const char *secretid;
     char *secret = NULL;
 
-    list = qemu_find_opts("iscsi");
-    if (!list) {
-        return;
-    }
-
-    opts = qemu_opts_find(list, target);
-    if (opts == NULL) {
-        opts = QTAILQ_FIRST(&list->head);
-        if (!opts) {
-            return;
-        }
-    }
-
     user = qemu_opt_get(opts, "user");
     if (!user) {
         return;
@@ -1298,64 +1326,36 @@ static void parse_chap(struct iscsi_context *iscsi, const char *target,
     g_free(secret);
 }
 
-static void parse_header_digest(struct iscsi_context *iscsi, const char *target,
+static void apply_header_digest(struct iscsi_context *iscsi, QemuOpts *opts,
                                 Error **errp)
 {
-    QemuOptsList *list;
-    QemuOpts *opts;
     const char *digest = NULL;
 
-    list = qemu_find_opts("iscsi");
-    if (!list) {
-        return;
-    }
-
-    opts = qemu_opts_find(list, target);
-    if (opts == NULL) {
-        opts = QTAILQ_FIRST(&list->head);
-        if (!opts) {
-            return;
-        }
-    }
-
     digest = qemu_opt_get(opts, "header-digest");
     if (!digest) {
-        return;
-    }
-
-    if (!strcmp(digest, "CRC32C")) {
+        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
+    } else if (!strcmp(digest, "crc32c")) {
         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
-    } else if (!strcmp(digest, "NONE")) {
+    } else if (!strcmp(digest, "none")) {
         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
-    } else if (!strcmp(digest, "CRC32C-NONE")) {
+    } else if (!strcmp(digest, "crc32c-none")) {
         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
-    } else if (!strcmp(digest, "NONE-CRC32C")) {
+    } else if (!strcmp(digest, "none-crc32c")) {
         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
     } else {
         error_setg(errp, "Invalid header-digest setting : %s", digest);
     }
 }
 
-static char *parse_initiator_name(const char *target)
+static char *get_initiator_name(QemuOpts *opts)
 {
-    QemuOptsList *list;
-    QemuOpts *opts;
     const char *name;
     char *iscsi_name;
     UuidInfo *uuid_info;
 
-    list = qemu_find_opts("iscsi");
-    if (list) {
-        opts = qemu_opts_find(list, target);
-        if (!opts) {
-            opts = QTAILQ_FIRST(&list->head);
-        }
-        if (opts) {
-            name = qemu_opt_get(opts, "initiator-name");
-            if (name) {
-                return g_strdup(name);
-            }
-        }
+    name = qemu_opt_get(opts, "initiator-name");
+    if (name) {
+        return g_strdup(name);
     }
 
     uuid_info = qmp_query_uuid(NULL);
@@ -1370,34 +1370,11 @@ static char *parse_initiator_name(const char *target)
     return iscsi_name;
 }
 
-static int parse_timeout(const char *target)
-{
-    QemuOptsList *list;
-    QemuOpts *opts;
-    const char *timeout;
-
-    list = qemu_find_opts("iscsi");
-    if (list) {
-        opts = qemu_opts_find(list, target);
-        if (!opts) {
-            opts = QTAILQ_FIRST(&list->head);
-        }
-        if (opts) {
-            timeout = qemu_opt_get(opts, "timeout");
-            if (timeout) {
-                return atoi(timeout);
-            }
-        }
-    }
-
-    return 0;
-}
-
 static void iscsi_nop_timed_event(void *opaque)
 {
     IscsiLun *iscsilun = opaque;
 
-    aio_context_acquire(iscsilun->aio_context);
+    qemu_mutex_lock(&iscsilun->mutex);
     if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) {
         error_report("iSCSI: NOP timeout. Reconnecting...");
         iscsilun->request_timed_out = true;
@@ -1410,7 +1387,7 @@ static void iscsi_nop_timed_event(void *opaque)
     iscsi_set_events(iscsilun);
 
 out:
-    aio_context_release(iscsilun->aio_context);
+    qemu_mutex_unlock(&iscsilun->mutex);
 }
 
 static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
@@ -1483,20 +1460,6 @@ static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
     }
 }
 
-/* TODO Convert to fine grained options */
-static QemuOptsList runtime_opts = {
-    .name = "iscsi",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = "filename",
-            .type = QEMU_OPT_STRING,
-            .help = "URL to the iscsi image",
-        },
-        { /* end of list */ }
-    },
-};
-
 static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
                                           int evpd, int pc, void **inq, Error **errp)
 {
@@ -1614,24 +1577,178 @@ out:
     }
 }
 
+static void iscsi_parse_iscsi_option(const char *target, QDict *options)
+{
+    QemuOptsList *list;
+    QemuOpts *opts;
+    const char *user, *password, *password_secret, *initiator_name,
+               *header_digest, *timeout;
+
+    list = qemu_find_opts("iscsi");
+    if (!list) {
+        return;
+    }
+
+    opts = qemu_opts_find(list, target);
+    if (opts == NULL) {
+        opts = QTAILQ_FIRST(&list->head);
+        if (!opts) {
+            return;
+        }
+    }
+
+    user = qemu_opt_get(opts, "user");
+    if (user) {
+        qdict_set_default_str(options, "user", user);
+    }
+
+    password = qemu_opt_get(opts, "password");
+    if (password) {
+        qdict_set_default_str(options, "password", password);
+    }
+
+    password_secret = qemu_opt_get(opts, "password-secret");
+    if (password_secret) {
+        qdict_set_default_str(options, "password-secret", password_secret);
+    }
+
+    initiator_name = qemu_opt_get(opts, "initiator-name");
+    if (initiator_name) {
+        qdict_set_default_str(options, "initiator-name", initiator_name);
+    }
+
+    header_digest = qemu_opt_get(opts, "header-digest");
+    if (header_digest) {
+        /* -iscsi takes upper case values, but QAPI only supports lower case
+         * enum constant names, so we have to convert here. */
+        char *qapi_value = g_ascii_strdown(header_digest, -1);
+        qdict_set_default_str(options, "header-digest", qapi_value);
+        g_free(qapi_value);
+    }
+
+    timeout = qemu_opt_get(opts, "timeout");
+    if (timeout) {
+        qdict_set_default_str(options, "timeout", timeout);
+    }
+}
+
 /*
  * We support iscsi url's on the form
  * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
  */
+static void iscsi_parse_filename(const char *filename, QDict *options,
+                                 Error **errp)
+{
+    struct iscsi_url *iscsi_url;
+    const char *transport_name;
+    char *lun_str;
+
+    iscsi_url = iscsi_parse_full_url(NULL, filename);
+    if (iscsi_url == NULL) {
+        error_setg(errp, "Failed to parse URL : %s", filename);
+        return;
+    }
+
+#if LIBISCSI_API_VERSION >= (20160603)
+    switch (iscsi_url->transport) {
+    case TCP_TRANSPORT:
+        transport_name = "tcp";
+        break;
+    case ISER_TRANSPORT:
+        transport_name = "iser";
+        break;
+    default:
+        error_setg(errp, "Unknown transport type (%d)",
+                   iscsi_url->transport);
+        return;
+    }
+#else
+    transport_name = "tcp";
+#endif
+
+    qdict_set_default_str(options, "transport", transport_name);
+    qdict_set_default_str(options, "portal", iscsi_url->portal);
+    qdict_set_default_str(options, "target", iscsi_url->target);
+
+    lun_str = g_strdup_printf("%d", iscsi_url->lun);
+    qdict_set_default_str(options, "lun", lun_str);
+    g_free(lun_str);
+
+    /* User/password from -iscsi take precedence over those from the URL */
+    iscsi_parse_iscsi_option(iscsi_url->target, options);
+
+    if (iscsi_url->user[0] != '\0') {
+        qdict_set_default_str(options, "user", iscsi_url->user);
+        qdict_set_default_str(options, "password", iscsi_url->passwd);
+    }
+
+    iscsi_destroy_url(iscsi_url);
+}
+
+static QemuOptsList runtime_opts = {
+    .name = "iscsi",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+    .desc = {
+        {
+            .name = "transport",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "portal",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "target",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "user",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "password",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "password-secret",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "lun",
+            .type = QEMU_OPT_NUMBER,
+        },
+        {
+            .name = "initiator-name",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "header-digest",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "timeout",
+            .type = QEMU_OPT_NUMBER,
+        },
+        { /* end of list */ }
+    },
+};
+
 static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
                       Error **errp)
 {
     IscsiLun *iscsilun = bs->opaque;
     struct iscsi_context *iscsi = NULL;
-    struct iscsi_url *iscsi_url = NULL;
     struct scsi_task *task = NULL;
     struct scsi_inquiry_standard *inq = NULL;
     struct scsi_inquiry_supported_pages *inq_vpd;
     char *initiator_name = NULL;
     QemuOpts *opts;
     Error *local_err = NULL;
-    const char *filename;
-    int i, ret = 0, timeout = 0;
+    const char *transport_name, *portal, *target;
+#if LIBISCSI_API_VERSION >= (20160603)
+    enum iscsi_transport_type transport;
+#endif
+    int i, ret = 0, timeout = 0, lun;
 
     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
     qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -1641,18 +1758,34 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
         goto out;
     }
 
-    filename = qemu_opt_get(opts, "filename");
+    transport_name = qemu_opt_get(opts, "transport");
+    portal = qemu_opt_get(opts, "portal");
+    target = qemu_opt_get(opts, "target");
+    lun = qemu_opt_get_number(opts, "lun", 0);
 
-    iscsi_url = iscsi_parse_full_url(iscsi, filename);
-    if (iscsi_url == NULL) {
-        error_setg(errp, "Failed to parse URL : %s", filename);
+    if (!transport_name || !portal || !target) {
+        error_setg(errp, "Need all of transport, portal and target options");
+        ret = -EINVAL;
+        goto out;
+    }
+
+    if (!strcmp(transport_name, "tcp")) {
+#if LIBISCSI_API_VERSION >= (20160603)
+        transport = TCP_TRANSPORT;
+    } else if (!strcmp(transport_name, "iser")) {
+        transport = ISER_TRANSPORT;
+#else
+        /* TCP is what older libiscsi versions always use */
+#endif
+    } else {
+        error_setg(errp, "Unknown transport: %s", transport_name);
         ret = -EINVAL;
         goto out;
     }
 
     memset(iscsilun, 0, sizeof(IscsiLun));
 
-    initiator_name = parse_initiator_name(iscsi_url->target);
+    initiator_name = get_initiator_name(opts);
 
     iscsi = iscsi_create_context(initiator_name);
     if (iscsi == NULL) {
@@ -1661,30 +1794,20 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
         goto out;
     }
 #if LIBISCSI_API_VERSION >= (20160603)
-    if (iscsi_init_transport(iscsi, iscsi_url->transport)) {
+    if (iscsi_init_transport(iscsi, transport)) {
         error_setg(errp, ("Error initializing transport."));
         ret = -EINVAL;
         goto out;
     }
 #endif
-    if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
+    if (iscsi_set_targetname(iscsi, target)) {
         error_setg(errp, "iSCSI: Failed to set target name.");
         ret = -EINVAL;
         goto out;
     }
 
-    if (iscsi_url->user[0] != '\0') {
-        ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
-                                              iscsi_url->passwd);
-        if (ret != 0) {
-            error_setg(errp, "Failed to set initiator username and password");
-            ret = -EINVAL;
-            goto out;
-        }
-    }
-
     /* check if we got CHAP username/password via the options */
-    parse_chap(iscsi, iscsi_url->target, &local_err);
+    apply_chap(iscsi, opts, &local_err);
     if (local_err != NULL) {
         error_propagate(errp, local_err);
         ret = -EINVAL;
@@ -1697,10 +1820,8 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
         goto out;
     }
 
-    iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
-
     /* check if we got HEADER_DIGEST via the options */
-    parse_header_digest(iscsi, iscsi_url->target, &local_err);
+    apply_header_digest(iscsi, opts, &local_err);
     if (local_err != NULL) {
         error_propagate(errp, local_err);
         ret = -EINVAL;
@@ -1708,7 +1829,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     /* timeout handling is broken in libiscsi before 1.15.0 */
-    timeout = parse_timeout(iscsi_url->target);
+    timeout = qemu_opt_get_number(opts, "timeout", 0);
 #if LIBISCSI_API_VERSION >= 20150621
     iscsi_set_timeout(iscsi, timeout);
 #else
@@ -1717,7 +1838,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
     }
 #endif
 
-    if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
+    if (iscsi_full_connect_sync(iscsi, portal, lun) != 0) {
         error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
             iscsi_get_error(iscsi));
         ret = -EINVAL;
@@ -1726,7 +1847,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
 
     iscsilun->iscsi = iscsi;
     iscsilun->aio_context = bdrv_get_aio_context(bs);
-    iscsilun->lun   = iscsi_url->lun;
+    iscsilun->lun = lun;
     iscsilun->has_write_same = true;
 
     task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
@@ -1812,6 +1933,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
     scsi_free_scsi_task(task);
     task = NULL;
 
+    qemu_mutex_init(&iscsilun->mutex);
     iscsi_attach_aio_context(bs, iscsilun->aio_context);
 
     /* Guess the internal cluster (page) size of the iscsi target by the means
@@ -1829,9 +1951,6 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
 out:
     qemu_opts_del(opts);
     g_free(initiator_name);
-    if (iscsi_url != NULL) {
-        iscsi_destroy_url(iscsi_url);
-    }
     if (task != NULL) {
         scsi_free_scsi_task(task);
     }
@@ -1860,6 +1979,7 @@ static void iscsi_close(BlockDriverState *bs)
     iscsi_destroy_context(iscsi);
     g_free(iscsilun->zeroblock);
     iscsi_allocmap_free(iscsilun);
+    qemu_mutex_destroy(&iscsilun->mutex);
     memset(iscsilun, 0, sizeof(IscsiLun));
 }
 
@@ -2040,15 +2160,15 @@ static BlockDriver bdrv_iscsi = {
     .format_name     = "iscsi",
     .protocol_name   = "iscsi",
 
-    .instance_size   = sizeof(IscsiLun),
-    .bdrv_needs_filename = true,
-    .bdrv_file_open  = iscsi_open,
-    .bdrv_close      = iscsi_close,
-    .bdrv_create     = iscsi_create,
-    .create_opts     = &iscsi_create_opts,
-    .bdrv_reopen_prepare   = iscsi_reopen_prepare,
-    .bdrv_reopen_commit    = iscsi_reopen_commit,
-    .bdrv_invalidate_cache = iscsi_invalidate_cache,
+    .instance_size          = sizeof(IscsiLun),
+    .bdrv_parse_filename    = iscsi_parse_filename,
+    .bdrv_file_open         = iscsi_open,
+    .bdrv_close             = iscsi_close,
+    .bdrv_create            = iscsi_create,
+    .create_opts            = &iscsi_create_opts,
+    .bdrv_reopen_prepare    = iscsi_reopen_prepare,
+    .bdrv_reopen_commit     = iscsi_reopen_commit,
+    .bdrv_invalidate_cache  = iscsi_invalidate_cache,
 
     .bdrv_getlength  = iscsi_getlength,
     .bdrv_get_info   = iscsi_get_info,
@@ -2075,15 +2195,15 @@ static BlockDriver bdrv_iser = {
     .format_name     = "iser",
     .protocol_name   = "iser",
 
-    .instance_size   = sizeof(IscsiLun),
-    .bdrv_needs_filename = true,
-    .bdrv_file_open  = iscsi_open,
-    .bdrv_close      = iscsi_close,
-    .bdrv_create     = iscsi_create,
-    .create_opts     = &iscsi_create_opts,
-    .bdrv_reopen_prepare   = iscsi_reopen_prepare,
-    .bdrv_reopen_commit    = iscsi_reopen_commit,
-    .bdrv_invalidate_cache = iscsi_invalidate_cache,
+    .instance_size          = sizeof(IscsiLun),
+    .bdrv_parse_filename    = iscsi_parse_filename,
+    .bdrv_file_open         = iscsi_open,
+    .bdrv_close             = iscsi_close,
+    .bdrv_create            = iscsi_create,
+    .create_opts            = &iscsi_create_opts,
+    .bdrv_reopen_prepare    = iscsi_reopen_prepare,
+    .bdrv_reopen_commit     = iscsi_reopen_commit,
+    .bdrv_invalidate_cache  = iscsi_invalidate_cache,
 
     .bdrv_getlength  = iscsi_getlength,
     .bdrv_get_info   = iscsi_get_info,
diff --git a/block/mirror.c b/block/mirror.c
index 698a54e50f..57f26c33a4 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -38,7 +38,10 @@ typedef struct MirrorBlockJob {
     BlockJob common;
     RateLimit limit;
     BlockBackend *target;
+    BlockDriverState *mirror_top_bs;
+    BlockDriverState *source;
     BlockDriverState *base;
+
     /* The name of the graph node to replace */
     char *replaces;
     /* The BDS to replace */
@@ -69,6 +72,7 @@ typedef struct MirrorBlockJob {
     bool waiting_for_io;
     int target_cluster_sectors;
     int max_iov;
+    bool initial_zeroing_ongoing;
 } MirrorBlockJob;
 
 typedef struct MirrorOp {
@@ -117,9 +121,10 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
         if (s->cow_bitmap) {
             bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
         }
-        s->common.offset += (uint64_t)op->nb_sectors * BDRV_SECTOR_SIZE;
+        if (!s->initial_zeroing_ongoing) {
+            s->common.offset += (uint64_t)op->nb_sectors * BDRV_SECTOR_SIZE;
+        }
     }
-
     qemu_iovec_destroy(&op->qiov);
     g_free(op);
 
@@ -325,7 +330,7 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s,
 
 static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
 {
-    BlockDriverState *source = blk_bs(s->common.blk);
+    BlockDriverState *source = s->source;
     int64_t sector_num, first_chunk;
     uint64_t delay_ns = 0;
     /* At least the first dirty chunk is mirrored in one iteration. */
@@ -384,7 +389,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
                             nb_chunks * sectors_per_chunk);
     bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks);
     while (nb_chunks > 0 && sector_num < end) {
-        int ret;
+        int64_t ret;
         int io_sectors, io_sectors_acct;
         BlockDriverState *file;
         enum MirrorMethod {
@@ -495,12 +500,30 @@ static void mirror_exit(BlockJob *job, void *opaque)
     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
     MirrorExitData *data = opaque;
     AioContext *replace_aio_context = NULL;
-    BlockDriverState *src = blk_bs(s->common.blk);
+    BlockDriverState *src = s->source;
     BlockDriverState *target_bs = blk_bs(s->target);
+    BlockDriverState *mirror_top_bs = s->mirror_top_bs;
+    Error *local_err = NULL;
 
     /* Make sure that the source BDS doesn't go away before we called
      * block_job_completed(). */
     bdrv_ref(src);
+    bdrv_ref(mirror_top_bs);
+
+    /* We don't access the source any more. Dropping any WRITE/RESIZE is
+     * required before it could become a backing file of target_bs. */
+    bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL,
+                            &error_abort);
+    if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
+        BlockDriverState *backing = s->is_none_mode ? src : s->base;
+        if (backing_bs(target_bs) != backing) {
+            bdrv_set_backing_hd(target_bs, backing, &local_err);
+            if (local_err) {
+                error_report_err(local_err);
+                data->ret = -EPERM;
+            }
+        }
+    }
 
     if (s->to_replace) {
         replace_aio_context = bdrv_get_aio_context(s->to_replace);
@@ -522,10 +545,6 @@ static void mirror_exit(BlockJob *job, void *opaque)
         bdrv_drained_begin(target_bs);
         bdrv_replace_in_backing_chain(to_replace, target_bs);
         bdrv_drained_end(target_bs);
-
-        /* We just changed the BDS the job BB refers to */
-        blk_remove_bs(job->blk);
-        blk_insert_bs(job->blk, src);
     }
     if (s->to_replace) {
         bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
@@ -538,9 +557,26 @@ static void mirror_exit(BlockJob *job, void *opaque)
     g_free(s->replaces);
     blk_unref(s->target);
     s->target = NULL;
+
+    /* Remove the mirror filter driver from the graph. Before this, get rid of
+     * the blockers on the intermediate nodes so that the resulting state is
+     * valid. */
+    block_job_remove_all_bdrv(job);
+    bdrv_replace_in_backing_chain(mirror_top_bs, backing_bs(mirror_top_bs));
+
+    /* We just changed the BDS the job BB refers to (with either or both of the
+     * bdrv_replace_in_backing_chain() calls), so switch the BB back so the
+     * cleanup does the right thing. We don't need any permissions any more
+     * now. */
+    blk_remove_bs(job->blk);
+    blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort);
+    blk_insert_bs(job->blk, mirror_top_bs, &error_abort);
+
     block_job_completed(&s->common, data->ret);
+
     g_free(data);
     bdrv_drained_end(src);
+    bdrv_unref(mirror_top_bs);
     bdrv_unref(src);
 }
 
@@ -560,7 +596,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
 {
     int64_t sector_num, end;
     BlockDriverState *base = s->base;
-    BlockDriverState *bs = blk_bs(s->common.blk);
+    BlockDriverState *bs = s->source;
     BlockDriverState *target_bs = blk_bs(s->target);
     int ret, n;
 
@@ -572,6 +608,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
             return 0;
         }
 
+        s->initial_zeroing_ongoing = true;
         for (sector_num = 0; sector_num < end; ) {
             int nb_sectors = MIN(end - sector_num,
                 QEMU_ALIGN_DOWN(INT_MAX, s->granularity) >> BDRV_SECTOR_BITS);
@@ -579,6 +616,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
             mirror_throttle(s);
 
             if (block_job_is_cancelled(&s->common)) {
+                s->initial_zeroing_ongoing = false;
                 return 0;
             }
 
@@ -593,6 +631,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
         }
 
         mirror_wait_for_all_io(s);
+        s->initial_zeroing_ongoing = false;
     }
 
     /* First part, loop on the sectors and initialize the dirty bitmap.  */
@@ -639,7 +678,7 @@ static void coroutine_fn mirror_run(void *opaque)
 {
     MirrorBlockJob *s = opaque;
     MirrorExitData *data;
-    BlockDriverState *bs = blk_bs(s->common.blk);
+    BlockDriverState *bs = s->source;
     BlockDriverState *target_bs = blk_bs(s->target);
     bool need_drain = true;
     int64_t length;
@@ -657,7 +696,28 @@ static void coroutine_fn mirror_run(void *opaque)
     if (s->bdev_length < 0) {
         ret = s->bdev_length;
         goto immediate_exit;
-    } else if (s->bdev_length == 0) {
+    }
+
+    /* Active commit must resize the base image if its size differs from the
+     * active layer. */
+    if (s->base == blk_bs(s->target)) {
+        int64_t base_length;
+
+        base_length = blk_getlength(s->target);
+        if (base_length < 0) {
+            ret = base_length;
+            goto immediate_exit;
+        }
+
+        if (s->bdev_length > base_length) {
+            ret = blk_truncate(s->target, s->bdev_length);
+            if (ret < 0) {
+                goto immediate_exit;
+            }
+        }
+    }
+
+    if (s->bdev_length == 0) {
         /* Report BLOCK_JOB_READY and wait for complete. */
         block_job_event_ready(&s->common);
         s->synced = true;
@@ -850,9 +910,8 @@ static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
 static void mirror_complete(BlockJob *job, Error **errp)
 {
     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
-    BlockDriverState *src, *target;
+    BlockDriverState *target;
 
-    src = blk_bs(job->blk);
     target = blk_bs(s->target);
 
     if (!s->synced) {
@@ -884,6 +943,10 @@ static void mirror_complete(BlockJob *job, Error **errp)
         replace_aio_context = bdrv_get_aio_context(s->to_replace);
         aio_context_acquire(replace_aio_context);
 
+        /* TODO Translate this into permission system. Current definition of
+         * GRAPH_MOD would require to request it for the parents; they might
+         * not even be BlockDriverStates, however, so a BdrvChild can't address
+         * them. May need redefinition of GRAPH_MOD. */
         error_setg(&s->replace_blocker,
                    "block device is in use by block-job-complete");
         bdrv_op_block_all(s->to_replace, s->replace_blocker);
@@ -892,13 +955,6 @@ static void mirror_complete(BlockJob *job, Error **errp)
         aio_context_release(replace_aio_context);
     }
 
-    if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
-        BlockDriverState *backing = s->is_none_mode ? src : s->base;
-        if (backing_bs(target) != backing) {
-            bdrv_set_backing_hd(target, backing);
-        }
-    }
-
     s->should_complete = true;
     block_job_enter(&s->common);
 }
@@ -954,6 +1010,77 @@ static const BlockJobDriver commit_active_job_driver = {
     .drain                  = mirror_drain,
 };
 
+static int coroutine_fn bdrv_mirror_top_preadv(BlockDriverState *bs,
+    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+    return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
+}
+
+static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs,
+    uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+    return bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags);
+}
+
+static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs)
+{
+    return bdrv_co_flush(bs->backing->bs);
+}
+
+static int64_t coroutine_fn bdrv_mirror_top_get_block_status(
+    BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
+    BlockDriverState **file)
+{
+    *pnum = nb_sectors;
+    *file = bs->backing->bs;
+    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
+           (sector_num << BDRV_SECTOR_BITS);
+}
+
+static int coroutine_fn bdrv_mirror_top_pwrite_zeroes(BlockDriverState *bs,
+    int64_t offset, int count, BdrvRequestFlags flags)
+{
+    return bdrv_co_pwrite_zeroes(bs->backing, offset, count, flags);
+}
+
+static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs,
+    int64_t offset, int count)
+{
+    return bdrv_co_pdiscard(bs->backing->bs, offset, count);
+}
+
+static void bdrv_mirror_top_close(BlockDriverState *bs)
+{
+}
+
+static void bdrv_mirror_top_child_perm(BlockDriverState *bs, BdrvChild *c,
+                                       const BdrvChildRole *role,
+                                       uint64_t perm, uint64_t shared,
+                                       uint64_t *nperm, uint64_t *nshared)
+{
+    /* Must be able to forward guest writes to the real image */
+    *nperm = 0;
+    if (perm & BLK_PERM_WRITE) {
+        *nperm |= BLK_PERM_WRITE;
+    }
+
+    *nshared = BLK_PERM_ALL;
+}
+
+/* Dummy node that provides consistent read to its users without requiring it
+ * from its backing file and that allows writes on the backing file chain. */
+static BlockDriver bdrv_mirror_top = {
+    .format_name                = "mirror_top",
+    .bdrv_co_preadv             = bdrv_mirror_top_preadv,
+    .bdrv_co_pwritev            = bdrv_mirror_top_pwritev,
+    .bdrv_co_pwrite_zeroes      = bdrv_mirror_top_pwrite_zeroes,
+    .bdrv_co_pdiscard           = bdrv_mirror_top_pdiscard,
+    .bdrv_co_flush              = bdrv_mirror_top_flush,
+    .bdrv_co_get_block_status   = bdrv_mirror_top_get_block_status,
+    .bdrv_close                 = bdrv_mirror_top_close,
+    .bdrv_child_perm            = bdrv_mirror_top_child_perm,
+};
+
 static void mirror_start_job(const char *job_id, BlockDriverState *bs,
                              int creation_flags, BlockDriverState *target,
                              const char *replaces, int64_t speed,
@@ -966,9 +1093,14 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
                              void *opaque, Error **errp,
                              const BlockJobDriver *driver,
                              bool is_none_mode, BlockDriverState *base,
-                             bool auto_complete)
+                             bool auto_complete, const char *filter_node_name)
 {
     MirrorBlockJob *s;
+    BlockDriverState *mirror_top_bs;
+    bool target_graph_mod;
+    bool target_is_backing;
+    Error *local_err = NULL;
+    int ret;
 
     if (granularity == 0) {
         granularity = bdrv_get_default_bitmap_granularity(target);
@@ -985,14 +1117,62 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
         buf_size = DEFAULT_MIRROR_BUF_SIZE;
     }
 
-    s = block_job_create(job_id, driver, bs, speed, creation_flags,
-                         cb, opaque, errp);
-    if (!s) {
+    /* In the case of active commit, add dummy driver to provide consistent
+     * reads on the top, while disabling it in the intermediate nodes, and make
+     * the backing chain writable. */
+    mirror_top_bs = bdrv_new_open_driver(&bdrv_mirror_top, filter_node_name,
+                                         BDRV_O_RDWR, errp);
+    if (mirror_top_bs == NULL) {
         return;
     }
+    mirror_top_bs->total_sectors = bs->total_sectors;
+
+    /* bdrv_append takes ownership of the mirror_top_bs reference, need to keep
+     * it alive until block_job_create() even if bs has no parent. */
+    bdrv_ref(mirror_top_bs);
+    bdrv_drained_begin(bs);
+    bdrv_append(mirror_top_bs, bs, &local_err);
+    bdrv_drained_end(bs);
 
-    s->target = blk_new();
-    blk_insert_bs(s->target, target);
+    if (local_err) {
+        bdrv_unref(mirror_top_bs);
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    /* Make sure that the source is not resized while the job is running */
+    s = block_job_create(job_id, driver, mirror_top_bs,
+                         BLK_PERM_CONSISTENT_READ,
+                         BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+                         BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD, speed,
+                         creation_flags, cb, opaque, errp);
+    bdrv_unref(mirror_top_bs);
+    if (!s) {
+        goto fail;
+    }
+    s->source = bs;
+    s->mirror_top_bs = mirror_top_bs;
+
+    /* No resize for the target either; while the mirror is still running, a
+     * consistent read isn't necessarily possible. We could possibly allow
+     * writes and graph modifications, though it would likely defeat the
+     * purpose of a mirror, so leave them blocked for now.
+     *
+     * In the case of active commit, things look a bit different, though,
+     * because the target is an already populated backing file in active use.
+     * We can allow anything except resize there.*/
+    target_is_backing = bdrv_chain_contains(bs, target);
+    target_graph_mod = (backing_mode != MIRROR_LEAVE_BACKING_CHAIN);
+    s->target = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE |
+                        (target_graph_mod ? BLK_PERM_GRAPH_MOD : 0),
+                        BLK_PERM_WRITE_UNCHANGED |
+                        (target_is_backing ? BLK_PERM_CONSISTENT_READ |
+                                             BLK_PERM_WRITE |
+                                             BLK_PERM_GRAPH_MOD : 0));
+    ret = blk_insert_bs(s->target, target, errp);
+    if (ret < 0) {
+        goto fail;
+    }
 
     s->replaces = g_strdup(replaces);
     s->on_source_error = on_source_error;
@@ -1015,18 +1195,40 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
         return;
     }
 
-    block_job_add_bdrv(&s->common, target);
+    /* Required permissions are already taken with blk_new() */
+    block_job_add_bdrv(&s->common, "target", target, 0, BLK_PERM_ALL,
+                       &error_abort);
+
     /* In commit_active_start() all intermediate nodes disappear, so
      * any jobs in them must be blocked */
-    if (bdrv_chain_contains(bs, target)) {
+    if (target_is_backing) {
         BlockDriverState *iter;
         for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) {
-            block_job_add_bdrv(&s->common, iter);
+            /* XXX BLK_PERM_WRITE needs to be allowed so we don't block
+             * ourselves at s->base (if writes are blocked for a node, they are
+             * also blocked for its backing file). The other options would be a
+             * second filter driver above s->base (== target). */
+            ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
+                                     BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE,
+                                     errp);
+            if (ret < 0) {
+                goto fail;
+            }
         }
     }
 
     trace_mirror_start(bs, s, opaque);
     block_job_start(&s->common);
+    return;
+
+fail:
+    if (s) {
+        g_free(s->replaces);
+        blk_unref(s->target);
+        block_job_unref(&s->common);
+    }
+
+    bdrv_replace_in_backing_chain(mirror_top_bs, backing_bs(mirror_top_bs));
 }
 
 void mirror_start(const char *job_id, BlockDriverState *bs,
@@ -1035,7 +1237,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
                   MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
                   BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
-                  bool unmap, Error **errp)
+                  bool unmap, const char *filter_node_name, Error **errp)
 {
     bool is_none_mode;
     BlockDriverState *base;
@@ -1049,18 +1251,18 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
     mirror_start_job(job_id, bs, BLOCK_JOB_DEFAULT, target, replaces,
                      speed, granularity, buf_size, backing_mode,
                      on_source_error, on_target_error, unmap, NULL, NULL, errp,
-                     &mirror_job_driver, is_none_mode, base, false);
+                     &mirror_job_driver, is_none_mode, base, false,
+                     filter_node_name);
 }
 
 void commit_active_start(const char *job_id, BlockDriverState *bs,
                          BlockDriverState *base, int creation_flags,
                          int64_t speed, BlockdevOnError on_error,
+                         const char *filter_node_name,
                          BlockCompletionFunc *cb, void *opaque, Error **errp,
                          bool auto_complete)
 {
-    int64_t length, base_length;
     int orig_base_flags;
-    int ret;
     Error *local_err = NULL;
 
     orig_base_flags = bdrv_get_flags(base);
@@ -1069,35 +1271,11 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
         return;
     }
 
-    length = bdrv_getlength(bs);
-    if (length < 0) {
-        error_setg_errno(errp, -length,
-                         "Unable to determine length of %s", bs->filename);
-        goto error_restore_flags;
-    }
-
-    base_length = bdrv_getlength(base);
-    if (base_length < 0) {
-        error_setg_errno(errp, -base_length,
-                         "Unable to determine length of %s", base->filename);
-        goto error_restore_flags;
-    }
-
-    if (length > base_length) {
-        ret = bdrv_truncate(base, length);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret,
-                            "Top image %s is larger than base image %s, and "
-                             "resize of base image failed",
-                             bs->filename, base->filename);
-            goto error_restore_flags;
-        }
-    }
-
     mirror_start_job(job_id, bs, creation_flags, base, NULL, speed, 0, 0,
                      MIRROR_LEAVE_BACKING_CHAIN,
                      on_error, on_error, true, cb, opaque, &local_err,
-                     &commit_active_job_driver, false, base, auto_complete);
+                     &commit_active_job_driver, false, base, auto_complete,
+                     filter_node_name);
     if (local_err) {
         error_propagate(errp, local_err);
         goto error_restore_flags;
diff --git a/block/nbd.c b/block/nbd.c
index 35f24be069..a7f9108fe5 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -537,8 +537,6 @@ static void nbd_refresh_filename(BlockDriverState *bs, QDict *options)
     visit_type_SocketAddress(ov, NULL, &s->saddr, &error_abort);
     visit_complete(ov, &saddr_qdict);
     visit_free(ov);
-    assert(qobject_type(saddr_qdict) == QTYPE_QDICT);
-
     qdict_put_obj(opts, "server", saddr_qdict);
 
     if (s->export) {
diff --git a/block/nfs.c b/block/nfs.c
index 08b43dd189..890d5d4aff 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -54,6 +54,7 @@ typedef struct NFSClient {
     int events;
     bool has_zero_init;
     AioContext *aio_context;
+    QemuMutex mutex;
     blkcnt_t st_blocks;
     bool cache_used;
     NFSServer *server;
@@ -191,6 +192,7 @@ static void nfs_parse_filename(const char *filename, QDict *options,
 static void nfs_process_read(void *arg);
 static void nfs_process_write(void *arg);
 
+/* Called with QemuMutex held.  */
 static void nfs_set_events(NFSClient *client)
 {
     int ev = nfs_which_events(client->context);
@@ -209,20 +211,20 @@ static void nfs_process_read(void *arg)
 {
     NFSClient *client = arg;
 
-    aio_context_acquire(client->aio_context);
+    qemu_mutex_lock(&client->mutex);
     nfs_service(client->context, POLLIN);
     nfs_set_events(client);
-    aio_context_release(client->aio_context);
+    qemu_mutex_unlock(&client->mutex);
 }
 
 static void nfs_process_write(void *arg)
 {
     NFSClient *client = arg;
 
-    aio_context_acquire(client->aio_context);
+    qemu_mutex_lock(&client->mutex);
     nfs_service(client->context, POLLOUT);
     nfs_set_events(client);
-    aio_context_release(client->aio_context);
+    qemu_mutex_unlock(&client->mutex);
 }
 
 static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
@@ -242,6 +244,7 @@ static void nfs_co_generic_bh_cb(void *opaque)
     aio_co_wake(task->co);
 }
 
+/* Called (via nfs_service) with QemuMutex held.  */
 static void
 nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
                   void *private_data)
@@ -263,9 +266,9 @@ nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
                             nfs_co_generic_bh_cb, task);
 }
 
-static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
-                                     int64_t sector_num, int nb_sectors,
-                                     QEMUIOVector *iov)
+static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, uint64_t offset,
+                                      uint64_t bytes, QEMUIOVector *iov,
+                                      int flags)
 {
     NFSClient *client = bs->opaque;
     NFSRPC task;
@@ -273,14 +276,15 @@ static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
     nfs_co_init_task(bs, &task);
     task.iov = iov;
 
+    qemu_mutex_lock(&client->mutex);
     if (nfs_pread_async(client->context, client->fh,
-                        sector_num * BDRV_SECTOR_SIZE,
-                        nb_sectors * BDRV_SECTOR_SIZE,
-                        nfs_co_generic_cb, &task) != 0) {
+                        offset, bytes, nfs_co_generic_cb, &task) != 0) {
+        qemu_mutex_unlock(&client->mutex);
         return -ENOMEM;
     }
 
     nfs_set_events(client);
+    qemu_mutex_unlock(&client->mutex);
     while (!task.complete) {
         qemu_coroutine_yield();
     }
@@ -297,39 +301,50 @@ static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
     return 0;
 }
 
-static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
-                                        int64_t sector_num, int nb_sectors,
-                                        QEMUIOVector *iov)
+static int coroutine_fn nfs_co_pwritev(BlockDriverState *bs, uint64_t offset,
+                                       uint64_t bytes, QEMUIOVector *iov,
+                                       int flags)
 {
     NFSClient *client = bs->opaque;
     NFSRPC task;
     char *buf = NULL;
+    bool my_buffer = false;
 
     nfs_co_init_task(bs, &task);
 
-    buf = g_try_malloc(nb_sectors * BDRV_SECTOR_SIZE);
-    if (nb_sectors && buf == NULL) {
-        return -ENOMEM;
+    if (iov->niov != 1) {
+        buf = g_try_malloc(bytes);
+        if (bytes && buf == NULL) {
+            return -ENOMEM;
+        }
+        qemu_iovec_to_buf(iov, 0, buf, bytes);
+        my_buffer = true;
+    } else {
+        buf = iov->iov[0].iov_base;
     }
 
-    qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
-
+    qemu_mutex_lock(&client->mutex);
     if (nfs_pwrite_async(client->context, client->fh,
-                         sector_num * BDRV_SECTOR_SIZE,
-                         nb_sectors * BDRV_SECTOR_SIZE,
-                         buf, nfs_co_generic_cb, &task) != 0) {
-        g_free(buf);
+                         offset, bytes, buf,
+                         nfs_co_generic_cb, &task) != 0) {
+        qemu_mutex_unlock(&client->mutex);
+        if (my_buffer) {
+            g_free(buf);
+        }
         return -ENOMEM;
     }
 
     nfs_set_events(client);
+    qemu_mutex_unlock(&client->mutex);
     while (!task.complete) {
         qemu_coroutine_yield();
     }
 
-    g_free(buf);
+    if (my_buffer) {
+        g_free(buf);
+    }
 
-    if (task.ret != nb_sectors * BDRV_SECTOR_SIZE) {
+    if (task.ret != bytes) {
         return task.ret < 0 ? task.ret : -EIO;
     }
 
@@ -343,12 +358,15 @@ static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
 
     nfs_co_init_task(bs, &task);
 
+    qemu_mutex_lock(&client->mutex);
     if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb,
                         &task) != 0) {
+        qemu_mutex_unlock(&client->mutex);
         return -ENOMEM;
     }
 
     nfs_set_events(client);
+    qemu_mutex_unlock(&client->mutex);
     while (!task.complete) {
         qemu_coroutine_yield();
     }
@@ -434,6 +452,7 @@ static void nfs_file_close(BlockDriverState *bs)
 {
     NFSClient *client = bs->opaque;
     nfs_client_close(client);
+    qemu_mutex_destroy(&client->mutex);
 }
 
 static NFSServer *nfs_config(QDict *options, Error **errp)
@@ -641,6 +660,7 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
     if (ret < 0) {
         return ret;
     }
+    qemu_mutex_init(&client->mutex);
     bs->total_sectors = ret;
     ret = 0;
     return ret;
@@ -696,6 +716,7 @@ static int nfs_has_zero_init(BlockDriverState *bs)
     return client->has_zero_init;
 }
 
+/* Called (via nfs_service) with QemuMutex held.  */
 static void
 nfs_get_allocated_file_size_cb(int ret, struct nfs_context *nfs, void *data,
                                void *private_data)
@@ -805,8 +826,6 @@ static void nfs_refresh_filename(BlockDriverState *bs, QDict *options)
     ov = qobject_output_visitor_new(&server_qdict);
     visit_type_NFSServer(ov, NULL, &client->server, &error_abort);
     visit_complete(ov, &server_qdict);
-    assert(qobject_type(server_qdict) == QTYPE_QDICT);
-
     qdict_put_obj(opts, "server", server_qdict);
     qdict_put(opts, "path", qstring_from_str(client->path));
 
@@ -863,8 +882,8 @@ static BlockDriver bdrv_nfs = {
     .bdrv_create                    = nfs_file_create,
     .bdrv_reopen_prepare            = nfs_reopen_prepare,
 
-    .bdrv_co_readv                  = nfs_co_readv,
-    .bdrv_co_writev                 = nfs_co_writev,
+    .bdrv_co_preadv                 = nfs_co_preadv,
+    .bdrv_co_pwritev                = nfs_co_pwritev,
     .bdrv_co_flush_to_disk          = nfs_co_flush,
 
     .bdrv_detach_aio_context        = nfs_detach_aio_context,
diff --git a/block/parallels.c b/block/parallels.c
index 2ccefa7d85..19935e29a9 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -215,7 +215,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
                                      s->data_end << BDRV_SECTOR_BITS,
                                      space << BDRV_SECTOR_BITS, 0);
         } else {
-            ret = bdrv_truncate(bs->file->bs,
+            ret = bdrv_truncate(bs->file,
                                 (s->data_end + space) << BDRV_SECTOR_BITS);
         }
         if (ret < 0) {
@@ -449,7 +449,7 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
                 size - res->image_end_offset);
         res->leaks += count;
         if (fix & BDRV_FIX_LEAKS) {
-            ret = bdrv_truncate(bs->file->bs, res->image_end_offset);
+            ret = bdrv_truncate(bs->file, res->image_end_offset);
             if (ret < 0) {
                 res->check_errors++;
                 return ret;
@@ -488,7 +488,8 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
     }
 
     file = blk_new_open(filename, NULL, NULL,
-                        BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+                        BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                        &local_err);
     if (file == NULL) {
         error_propagate(errp, local_err);
         return -EIO;
@@ -581,6 +582,12 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
     Error *local_err = NULL;
     char *buf;
 
+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
+                               false, errp);
+    if (!bs->file) {
+        return -EINVAL;
+    }
+
     ret = bdrv_pread(bs->file, 0, &ph, sizeof(ph));
     if (ret < 0) {
         goto fail;
@@ -681,7 +688,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
         goto fail_options;
     }
     if (!bdrv_has_zero_init(bs->file->bs) ||
-            bdrv_truncate(bs->file->bs, bdrv_getlength(bs->file->bs)) != 0) {
+            bdrv_truncate(bs->file, bdrv_getlength(bs->file->bs)) != 0) {
         s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
     }
 
@@ -724,7 +731,7 @@ static void parallels_close(BlockDriverState *bs)
     }
 
     if (bs->open_flags & BDRV_O_RDWR) {
-        bdrv_truncate(bs->file->bs, s->data_end << BDRV_SECTOR_BITS);
+        bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS);
     }
 
     g_free(s->bat_dirty_bmap);
@@ -756,6 +763,7 @@ static BlockDriver bdrv_parallels = {
     .bdrv_probe		= parallels_probe,
     .bdrv_open		= parallels_open,
     .bdrv_close		= parallels_close,
+    .bdrv_child_perm          = bdrv_format_default_perms,
     .bdrv_co_get_block_status = parallels_co_get_block_status,
     .bdrv_has_zero_init       = bdrv_has_zero_init_1,
     .bdrv_co_flush_to_os      = parallels_co_flush_to_os,
diff --git a/block/qapi.c b/block/qapi.c
index ac480aa93c..a40922ea26 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -682,7 +682,6 @@ void bdrv_image_info_specific_dump(fprintf_function func_fprintf, void *f,
 
     visit_type_ImageInfoSpecific(v, NULL, &info_spec, &error_abort);
     visit_complete(v, &obj);
-    assert(qobject_type(obj) == QTYPE_QDICT);
     data = qdict_get(qobject_to_qdict(obj), "data");
     dump_qobject(func_fprintf, f, 1, data);
     qobject_decref(obj);
diff --git a/block/qcow.c b/block/qcow.c
index fb738fc507..9d6ac83959 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -106,6 +106,12 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
     QCowHeader header;
     Error *local_err = NULL;
 
+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
+                               false, errp);
+    if (!bs->file) {
+        return -EINVAL;
+    }
+
     ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
     if (ret < 0) {
         goto fail;
@@ -467,7 +473,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
                 /* round to cluster size */
                 cluster_offset = (cluster_offset + s->cluster_size - 1) &
                     ~(s->cluster_size - 1);
-                bdrv_truncate(bs->file->bs, cluster_offset + s->cluster_size);
+                bdrv_truncate(bs->file, cluster_offset + s->cluster_size);
                 /* if encrypted, we must initialize the cluster
                    content which won't be written */
                 if (bs->encrypted &&
@@ -817,7 +823,8 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
     }
 
     qcow_blk = blk_new_open(filename, NULL, NULL,
-                            BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+                            BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                            &local_err);
     if (qcow_blk == NULL) {
         error_propagate(errp, local_err);
         ret = -EIO;
@@ -909,7 +916,7 @@ static int qcow_make_empty(BlockDriverState *bs)
     if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
             l1_length) < 0)
         return -1;
-    ret = bdrv_truncate(bs->file->bs, s->l1_table_offset + l1_length);
+    ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
     if (ret < 0)
         return ret;
 
@@ -1046,6 +1053,7 @@ static BlockDriver bdrv_qcow = {
     .bdrv_probe		= qcow_probe,
     .bdrv_open		= qcow_open,
     .bdrv_close		= qcow_close,
+    .bdrv_child_perm        = bdrv_format_default_perms,
     .bdrv_reopen_prepare    = qcow_reopen_prepare,
     .bdrv_create            = qcow_create,
     .bdrv_has_zero_init     = bdrv_has_zero_init_1,
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 3dbde18612..9e96f64c8b 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -1734,7 +1734,7 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
                     goto resize_fail;
                 }
 
-                ret = bdrv_truncate(bs->file->bs, offset + s->cluster_size);
+                ret = bdrv_truncate(bs->file, offset + s->cluster_size);
                 if (ret < 0) {
                     goto resize_fail;
                 }
diff --git a/block/qcow2.c b/block/qcow2.c
index 3e274bd1ba..6a92d2ef3f 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -814,8 +814,8 @@ static int qcow2_update_options(BlockDriverState *bs, QDict *options,
     return ret;
 }
 
-static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
-                      Error **errp)
+static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
+                         Error **errp)
 {
     BDRVQcow2State *s = bs->opaque;
     unsigned int len, i;
@@ -1205,6 +1205,18 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
     return ret;
 }
 
+static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
+                      Error **errp)
+{
+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
+                               false, errp);
+    if (!bs->file) {
+        return -EINVAL;
+    }
+
+    return qcow2_do_open(bs, options, flags, errp);
+}
+
 static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
 {
     BDRVQcow2State *s = bs->opaque;
@@ -1785,7 +1797,7 @@ static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp)
     options = qdict_clone_shallow(bs->options);
 
     flags &= ~BDRV_O_INACTIVE;
-    ret = qcow2_open(bs, options, flags, &local_err);
+    ret = qcow2_do_open(bs, options, flags, &local_err);
     QDECREF(options);
     if (local_err) {
         error_propagate(errp, local_err);
@@ -2190,7 +2202,8 @@ static int qcow2_create2(const char *filename, int64_t total_size,
     }
 
     blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                       &local_err);
     if (blk == NULL) {
         error_propagate(errp, local_err);
         return -EIO;
@@ -2254,7 +2267,8 @@ static int qcow2_create2(const char *filename, int64_t total_size,
     options = qdict_new();
     qdict_put(options, "driver", qstring_from_str("qcow2"));
     blk = blk_new_open(filename, NULL, options,
-                       BDRV_O_RDWR | BDRV_O_NO_FLUSH, &local_err);
+                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH,
+                       &local_err);
     if (blk == NULL) {
         error_propagate(errp, local_err);
         ret = -EIO;
@@ -2570,7 +2584,7 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
         /* align end of file to a sector boundary to ease reading with
            sector based I/Os */
         cluster_offset = bdrv_getlength(bs->file->bs);
-        return bdrv_truncate(bs->file->bs, cluster_offset);
+        return bdrv_truncate(bs->file, cluster_offset);
     }
 
     buf = qemu_blockalign(bs, s->cluster_size);
@@ -2784,7 +2798,7 @@ static int make_completely_empty(BlockDriverState *bs)
         goto fail;
     }
 
-    ret = bdrv_truncate(bs->file->bs, (3 + l1_clusters) * s->cluster_size);
+    ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size);
     if (ret < 0) {
         goto fail;
     }
@@ -3101,6 +3115,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
     uint64_t cluster_size = s->cluster_size;
     bool encrypt;
     int refcount_bits = s->refcount_bits;
+    Error *local_err = NULL;
     int ret;
     QemuOptDesc *desc = opts->list->desc;
     Qcow2AmendHelperCBInfo helper_cb_info;
@@ -3250,7 +3265,16 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
     }
 
     if (new_size) {
-        ret = bdrv_truncate(bs, new_size);
+        BlockBackend *blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL);
+        ret = blk_insert_bs(blk, bs, &local_err);
+        if (ret < 0) {
+            error_report_err(local_err);
+            blk_unref(blk);
+            return ret;
+        }
+
+        ret = blk_truncate(blk, new_size);
+        blk_unref(blk);
         if (ret < 0) {
             return ret;
         }
@@ -3387,6 +3411,7 @@ BlockDriver bdrv_qcow2 = {
     .bdrv_reopen_commit   = qcow2_reopen_commit,
     .bdrv_reopen_abort    = qcow2_reopen_abort,
     .bdrv_join_options    = qcow2_join_options,
+    .bdrv_child_perm      = bdrv_format_default_perms,
     .bdrv_create        = qcow2_create,
     .bdrv_has_zero_init = bdrv_has_zero_init_1,
     .bdrv_co_get_block_status = qcow2_co_get_block_status,
diff --git a/block/qed.c b/block/qed.c
index 0b62c7799e..5ec7fd83f2 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -415,8 +415,8 @@ static void bdrv_qed_drain(BlockDriverState *bs)
     }
 }
 
-static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
-                         Error **errp)
+static int bdrv_qed_do_open(BlockDriverState *bs, QDict *options, int flags,
+                            Error **errp)
 {
     BDRVQEDState *s = bs->opaque;
     QEDHeader le_header;
@@ -550,6 +550,18 @@ out:
     return ret;
 }
 
+static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
+                         Error **errp)
+{
+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
+                               false, errp);
+    if (!bs->file) {
+        return -EINVAL;
+    }
+
+    return bdrv_qed_do_open(bs, options, flags, errp);
+}
+
 static void bdrv_qed_refresh_limits(BlockDriverState *bs, Error **errp)
 {
     BDRVQEDState *s = bs->opaque;
@@ -613,7 +625,8 @@ static int qed_create(const char *filename, uint32_t cluster_size,
     }
 
     blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                       &local_err);
     if (blk == NULL) {
         error_propagate(errp, local_err);
         return -EIO;
@@ -1629,7 +1642,7 @@ static void bdrv_qed_invalidate_cache(BlockDriverState *bs, Error **errp)
     bdrv_qed_close(bs);
 
     memset(s, 0, sizeof(BDRVQEDState));
-    ret = bdrv_qed_open(bs, NULL, bs->open_flags, &local_err);
+    ret = bdrv_qed_do_open(bs, NULL, bs->open_flags, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         error_prepend(errp, "Could not reopen qed layer: ");
@@ -1692,6 +1705,7 @@ static BlockDriver bdrv_qed = {
     .bdrv_open                = bdrv_qed_open,
     .bdrv_close               = bdrv_qed_close,
     .bdrv_reopen_prepare      = bdrv_qed_reopen_prepare,
+    .bdrv_child_perm          = bdrv_format_default_perms,
     .bdrv_create              = bdrv_qed_create,
     .bdrv_has_zero_init       = bdrv_has_zero_init_1,
     .bdrv_co_get_block_status = bdrv_qed_co_get_block_status,
diff --git a/block/quorum.c b/block/quorum.c
index 86e2072dce..40205fb1b3 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -1032,10 +1032,17 @@ static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
 
     /* We can safely add the child now */
     bdrv_ref(child_bs);
-    child = bdrv_attach_child(bs, child_bs, indexstr, &child_format);
+
+    child = bdrv_attach_child(bs, child_bs, indexstr, &child_format, errp);
+    if (child == NULL) {
+        s->next_child_index--;
+        bdrv_unref(child_bs);
+        goto out;
+    }
     s->children = g_renew(BdrvChild *, s->children, s->num_children + 1);
     s->children[s->num_children++] = child;
 
+out:
     bdrv_drained_end(bs);
 }
 
@@ -1126,6 +1133,8 @@ static BlockDriver bdrv_quorum = {
     .bdrv_add_child                     = quorum_add_child,
     .bdrv_del_child                     = quorum_del_child,
 
+    .bdrv_child_perm                    = bdrv_filter_default_perms,
+
     .is_filter                          = true,
     .bdrv_recurse_is_first_non_filter   = quorum_recurse_is_first_non_filter,
 };
diff --git a/block/raw-format.c b/block/raw-format.c
index 8404a82e0c..86fbc657eb 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -341,7 +341,7 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset)
 
     s->size = offset;
     offset += s->offset;
-    return bdrv_truncate(bs->file->bs, offset);
+    return bdrv_truncate(bs->file, offset);
 }
 
 static int raw_media_changed(BlockDriverState *bs)
@@ -384,6 +384,12 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
     BDRVRawState *s = bs->opaque;
     int ret;
 
+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
+                               false, errp);
+    if (!bs->file) {
+        return -EINVAL;
+    }
+
     bs->sg = bs->file->bs->sg;
     bs->supported_write_flags = BDRV_REQ_FUA &
         bs->file->bs->supported_write_flags;
@@ -461,6 +467,7 @@ BlockDriver bdrv_raw = {
     .bdrv_reopen_abort    = &raw_reopen_abort,
     .bdrv_open            = &raw_open,
     .bdrv_close           = &raw_close,
+    .bdrv_child_perm      = bdrv_filter_default_perms,
     .bdrv_create          = &raw_create,
     .bdrv_co_preadv       = &raw_co_preadv,
     .bdrv_co_pwritev      = &raw_co_pwritev,
diff --git a/block/rbd.c b/block/rbd.c
index a57b3e3c5d..ee13f3d9d3 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -18,6 +18,7 @@
 #include "block/block_int.h"
 #include "crypto/secret.h"
 #include "qemu/cutils.h"
+#include "qapi/qmp/qstring.h"
 
 #include <rbd/librbd.h>
 
@@ -62,6 +63,13 @@
 #define RBD_MAX_SNAP_NAME_SIZE 128
 #define RBD_MAX_SNAPS 100
 
+/* The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h */
+#ifdef LIBRBD_SUPPORTS_IOVEC
+#define LIBRBD_USE_IOVEC 1
+#else
+#define LIBRBD_USE_IOVEC 0
+#endif
+
 typedef enum {
     RBD_AIO_READ,
     RBD_AIO_WRITE,
@@ -95,10 +103,10 @@ typedef struct BDRVRBDState {
     char *snap;
 } BDRVRBDState;
 
-static int qemu_rbd_next_tok(char *dst, int dst_len,
-                             char *src, char delim,
-                             const char *name,
-                             char **p, Error **errp)
+static char *qemu_rbd_next_tok(int max_len,
+                               char *src, char delim,
+                               const char *name,
+                               char **p, Error **errp)
 {
     int l;
     char *end;
@@ -120,17 +128,15 @@ static int qemu_rbd_next_tok(char *dst, int dst_len,
         }
     }
     l = strlen(src);
-    if (l >= dst_len) {
+    if (l >= max_len) {
         error_setg(errp, "%s too long", name);
-        return -EINVAL;
+        return NULL;
     } else if (l == 0) {
         error_setg(errp, "%s too short", name);
-        return -EINVAL;
+        return NULL;
     }
 
-    pstrcpy(dst, dst_len, src);
-
-    return 0;
+    return src;
 }
 
 static void qemu_rbd_unescape(char *src)
@@ -146,87 +152,134 @@ static void qemu_rbd_unescape(char *src)
     *p = '\0';
 }
 
-static int qemu_rbd_parsename(const char *filename,
-                              char *pool, int pool_len,
-                              char *snap, int snap_len,
-                              char *name, int name_len,
-                              char *conf, int conf_len,
-                              Error **errp)
+static void qemu_rbd_parse_filename(const char *filename, QDict *options,
+                                    Error **errp)
 {
     const char *start;
-    char *p, *buf;
-    int ret;
+    char *p, *buf, *keypairs;
+    char *found_str;
+    size_t max_keypair_size;
+    Error *local_err = NULL;
 
     if (!strstart(filename, "rbd:", &start)) {
         error_setg(errp, "File name must start with 'rbd:'");
-        return -EINVAL;
+        return;
     }
 
+    max_keypair_size = strlen(start) + 1;
     buf = g_strdup(start);
+    keypairs = g_malloc0(max_keypair_size);
     p = buf;
-    *snap = '\0';
-    *conf = '\0';
 
-    ret = qemu_rbd_next_tok(pool, pool_len, p,
-                            '/', "pool name", &p, errp);
-    if (ret < 0 || !p) {
-        ret = -EINVAL;
+    found_str = qemu_rbd_next_tok(RBD_MAX_POOL_NAME_SIZE, p,
+                                  '/', "pool name", &p, &local_err);
+    if (local_err) {
         goto done;
     }
-    qemu_rbd_unescape(pool);
+    if (!p) {
+        error_setg(errp, "Pool name is required");
+        goto done;
+    }
+    qemu_rbd_unescape(found_str);
+    qdict_put(options, "pool", qstring_from_str(found_str));
 
     if (strchr(p, '@')) {
-        ret = qemu_rbd_next_tok(name, name_len, p,
-                                '@', "object name", &p, errp);
-        if (ret < 0) {
+        found_str = qemu_rbd_next_tok(RBD_MAX_IMAGE_NAME_SIZE, p,
+                                      '@', "object name", &p, &local_err);
+        if (local_err) {
             goto done;
         }
-        ret = qemu_rbd_next_tok(snap, snap_len, p,
-                                ':', "snap name", &p, errp);
-        qemu_rbd_unescape(snap);
+        qemu_rbd_unescape(found_str);
+        qdict_put(options, "image", qstring_from_str(found_str));
+
+        found_str = qemu_rbd_next_tok(RBD_MAX_SNAP_NAME_SIZE, p,
+                                      ':', "snap name", &p, &local_err);
+        if (local_err) {
+            goto done;
+        }
+        qemu_rbd_unescape(found_str);
+        qdict_put(options, "snapshot", qstring_from_str(found_str));
     } else {
-        ret = qemu_rbd_next_tok(name, name_len, p,
-                                ':', "object name", &p, errp);
+        found_str = qemu_rbd_next_tok(RBD_MAX_IMAGE_NAME_SIZE, p,
+                                      ':', "object name", &p, &local_err);
+        if (local_err) {
+            goto done;
+        }
+        qemu_rbd_unescape(found_str);
+        qdict_put(options, "image", qstring_from_str(found_str));
     }
-    qemu_rbd_unescape(name);
-    if (ret < 0 || !p) {
+    if (!p) {
         goto done;
     }
 
-    ret = qemu_rbd_next_tok(conf, conf_len, p,
-                            '\0', "configuration", &p, errp);
-
-done:
-    g_free(buf);
-    return ret;
-}
-
-static char *qemu_rbd_parse_clientname(const char *conf, char *clientname)
-{
-    const char *p = conf;
+    found_str = qemu_rbd_next_tok(RBD_MAX_CONF_NAME_SIZE, p,
+                                  '\0', "configuration", &p, &local_err);
+    if (local_err) {
+        goto done;
+    }
 
-    while (*p) {
-        int len;
-        const char *end = strchr(p, ':');
+    p = found_str;
 
-        if (end) {
-            len = end - p;
-        } else {
-            len = strlen(p);
+    /* The following are essentially all key/value pairs, and we treat
+     * 'id' and 'conf' a bit special.  Key/value pairs may be in any order. */
+    while (p) {
+        char *name, *value;
+        name = qemu_rbd_next_tok(RBD_MAX_CONF_NAME_SIZE, p,
+                                 '=', "conf option name", &p, &local_err);
+        if (local_err) {
+            break;
         }
 
-        if (strncmp(p, "id=", 3) == 0) {
-            len -= 3;
-            strncpy(clientname, p + 3, len);
-            clientname[len] = '\0';
-            return clientname;
+        if (!p) {
+            error_setg(errp, "conf option %s has no value", name);
+            break;
         }
-        if (end == NULL) {
+
+        qemu_rbd_unescape(name);
+
+        value = qemu_rbd_next_tok(RBD_MAX_CONF_VAL_SIZE, p,
+                                  ':', "conf option value", &p, &local_err);
+        if (local_err) {
             break;
         }
-        p = end + 1;
+        qemu_rbd_unescape(value);
+
+        if (!strcmp(name, "conf")) {
+            qdict_put(options, "conf", qstring_from_str(value));
+        } else if (!strcmp(name, "id")) {
+            qdict_put(options, "user" , qstring_from_str(value));
+        } else {
+            /* FIXME: This is pretty ugly, and not the right way to do this.
+             *        These should be contained in a structure, and then
+             *        passed explicitly as individual key/value pairs to
+             *        rados.  Consider this legacy code that needs to be
+             *        updated. */
+            char *tmp = g_malloc0(max_keypair_size);
+            /* only use a delimiter if it is not the first keypair found */
+            /* These are sets of unknown key/value pairs we'll pass along
+             * to ceph */
+            if (keypairs[0]) {
+                snprintf(tmp, max_keypair_size, ":%s=%s", name, value);
+                pstrcat(keypairs, max_keypair_size, tmp);
+            } else {
+                snprintf(keypairs, max_keypair_size, "%s=%s", name, value);
+            }
+            g_free(tmp);
+        }
     }
-    return NULL;
+
+    if (keypairs[0]) {
+        qdict_put(options, "keyvalue-pairs", qstring_from_str(keypairs));
+    }
+
+
+done:
+    if (local_err) {
+        error_propagate(errp, local_err);
+    }
+    g_free(buf);
+    g_free(keypairs);
+    return;
 }
 
 
@@ -249,26 +302,24 @@ static int qemu_rbd_set_auth(rados_t cluster, const char *secretid,
     return 0;
 }
 
-
-static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
-                             bool only_read_conf_file,
-                             Error **errp)
+static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs,
+                                 Error **errp)
 {
     char *p, *buf;
-    char name[RBD_MAX_CONF_NAME_SIZE];
-    char value[RBD_MAX_CONF_VAL_SIZE];
+    char *name;
+    char *value;
+    Error *local_err = NULL;
     int ret = 0;
 
-    buf = g_strdup(conf);
+    buf = g_strdup(keypairs);
     p = buf;
 
     while (p) {
-        ret = qemu_rbd_next_tok(name, sizeof(name), p,
-                                '=', "conf option name", &p, errp);
-        if (ret < 0) {
+        name = qemu_rbd_next_tok(RBD_MAX_CONF_NAME_SIZE, p,
+                                 '=', "conf option name", &p, &local_err);
+        if (local_err) {
             break;
         }
-        qemu_rbd_unescape(name);
 
         if (!p) {
             error_setg(errp, "conf option %s has no value", name);
@@ -276,67 +327,117 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
             break;
         }
 
-        ret = qemu_rbd_next_tok(value, sizeof(value), p,
-                                ':', "conf option value", &p, errp);
-        if (ret < 0) {
+        value = qemu_rbd_next_tok(RBD_MAX_CONF_VAL_SIZE, p,
+                                  ':', "conf option value", &p, &local_err);
+        if (local_err) {
             break;
         }
-        qemu_rbd_unescape(value);
 
-        if (strcmp(name, "conf") == 0) {
-            /* read the conf file alone, so it doesn't override more
-               specific settings for a particular device */
-            if (only_read_conf_file) {
-                ret = rados_conf_read_file(cluster, value);
-                if (ret < 0) {
-                    error_setg_errno(errp, -ret, "error reading conf file %s",
-                                     value);
-                    break;
-                }
-            }
-        } else if (strcmp(name, "id") == 0) {
-            /* ignore, this is parsed by qemu_rbd_parse_clientname() */
-        } else if (!only_read_conf_file) {
-            ret = rados_conf_set(cluster, name, value);
-            if (ret < 0) {
-                error_setg_errno(errp, -ret, "invalid conf option %s", name);
-                ret = -EINVAL;
-                break;
-            }
+        ret = rados_conf_set(cluster, name, value);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret, "invalid conf option %s", name);
+            ret = -EINVAL;
+            break;
         }
     }
 
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+    }
     g_free(buf);
     return ret;
 }
 
+static void qemu_rbd_memset(RADOSCB *rcb, int64_t offs)
+{
+    if (LIBRBD_USE_IOVEC) {
+        RBDAIOCB *acb = rcb->acb;
+        iov_memset(acb->qiov->iov, acb->qiov->niov, offs, 0,
+                   acb->qiov->size - offs);
+    } else {
+        memset(rcb->buf + offs, 0, rcb->size - offs);
+    }
+}
+
+static QemuOptsList runtime_opts = {
+    .name = "rbd",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+    .desc = {
+        {
+            .name = "filename",
+            .type = QEMU_OPT_STRING,
+            .help = "Specification of the rbd image",
+        },
+        {
+            .name = "password-secret",
+            .type = QEMU_OPT_STRING,
+            .help = "ID of secret providing the password",
+        },
+        {
+            .name = "conf",
+            .type = QEMU_OPT_STRING,
+            .help = "Rados config file location",
+        },
+        {
+            .name = "pool",
+            .type = QEMU_OPT_STRING,
+            .help = "Rados pool name",
+        },
+        {
+            .name = "image",
+            .type = QEMU_OPT_STRING,
+            .help = "Image name in the pool",
+        },
+        {
+            .name = "snapshot",
+            .type = QEMU_OPT_STRING,
+            .help = "Ceph snapshot name",
+        },
+        {
+            /* maps to 'id' in rados_create() */
+            .name = "user",
+            .type = QEMU_OPT_STRING,
+            .help = "Rados id name",
+        },
+        {
+            .name = "keyvalue-pairs",
+            .type = QEMU_OPT_STRING,
+            .help = "Legacy rados key/value option parameters",
+        },
+        {
+            .name = "host",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "port",
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = "auth",
+            .type = QEMU_OPT_STRING,
+            .help = "Supported authentication method, either cephx or none",
+        },
+        { /* end of list */ }
+    },
+};
+
 static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
 {
     Error *local_err = NULL;
     int64_t bytes = 0;
     int64_t objsize;
     int obj_order = 0;
-    char pool[RBD_MAX_POOL_NAME_SIZE];
-    char name[RBD_MAX_IMAGE_NAME_SIZE];
-    char snap_buf[RBD_MAX_SNAP_NAME_SIZE];
-    char conf[RBD_MAX_CONF_SIZE];
-    char clientname_buf[RBD_MAX_CONF_SIZE];
-    char *clientname;
+    const char *pool, *name, *conf, *clientname, *keypairs;
     const char *secretid;
     rados_t cluster;
     rados_ioctx_t io_ctx;
-    int ret;
+    QDict *options = NULL;
+    QemuOpts *rbd_opts = NULL;
+    int ret = 0;
 
     secretid = qemu_opt_get(opts, "password-secret");
 
-    if (qemu_rbd_parsename(filename, pool, sizeof(pool),
-                           snap_buf, sizeof(snap_buf),
-                           name, sizeof(name),
-                           conf, sizeof(conf), &local_err) < 0) {
-        error_propagate(errp, local_err);
-        return -EINVAL;
-    }
-
     /* Read out options */
     bytes = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
                      BDRV_SECTOR_SIZE);
@@ -344,35 +445,55 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
     if (objsize) {
         if ((objsize - 1) & objsize) {    /* not a power of 2? */
             error_setg(errp, "obj size needs to be power of 2");
-            return -EINVAL;
+            ret = -EINVAL;
+            goto exit;
         }
         if (objsize < 4096) {
             error_setg(errp, "obj size too small");
-            return -EINVAL;
+            ret = -EINVAL;
+            goto exit;
         }
         obj_order = ctz32(objsize);
     }
 
-    clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
+    options = qdict_new();
+    qemu_rbd_parse_filename(filename, options, &local_err);
+    if (local_err) {
+        ret = -EINVAL;
+        error_propagate(errp, local_err);
+        goto exit;
+    }
+
+    rbd_opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(rbd_opts, options, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+        goto exit;
+    }
+
+    pool       = qemu_opt_get(rbd_opts, "pool");
+    conf       = qemu_opt_get(rbd_opts, "conf");
+    clientname = qemu_opt_get(rbd_opts, "user");
+    name       = qemu_opt_get(rbd_opts, "image");
+    keypairs   = qemu_opt_get(rbd_opts, "keyvalue-pairs");
+
     ret = rados_create(&cluster, clientname);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "error initializing");
-        return ret;
+        goto exit;
     }
 
-    if (strstr(conf, "conf=") == NULL) {
-        /* try default location, but ignore failure */
-        rados_conf_read_file(cluster, NULL);
-    } else if (conf[0] != '\0' &&
-               qemu_rbd_set_conf(cluster, conf, true, &local_err) < 0) {
-        error_propagate(errp, local_err);
+    /* try default location when conf=NULL, but ignore failure */
+    ret = rados_conf_read_file(cluster, conf);
+    if (conf && ret < 0) {
+        error_setg_errno(errp, -ret, "error reading conf file %s", conf);
         ret = -EIO;
         goto shutdown;
     }
 
-    if (conf[0] != '\0' &&
-        qemu_rbd_set_conf(cluster, conf, false, &local_err) < 0) {
-        error_propagate(errp, local_err);
+    ret = qemu_rbd_set_keypairs(cluster, keypairs, errp);
+    if (ret < 0) {
         ret = -EIO;
         goto shutdown;
     }
@@ -403,6 +524,10 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
 
 shutdown:
     rados_shutdown(cluster);
+
+exit:
+    QDECREF(options);
+    qemu_opts_del(rbd_opts);
     return ret;
 }
 
@@ -426,11 +551,11 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
         }
     } else {
         if (r < 0) {
-            memset(rcb->buf, 0, rcb->size);
+            qemu_rbd_memset(rcb, 0);
             acb->ret = r;
             acb->error = 1;
         } else if (r < rcb->size) {
-            memset(rcb->buf + r, 0, rcb->size - r);
+            qemu_rbd_memset(rcb, r);
             if (!acb->error) {
                 acb->ret = rcb->size;
             }
@@ -441,47 +566,116 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
 
     g_free(rcb);
 
-    if (acb->cmd == RBD_AIO_READ) {
-        qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
+    if (!LIBRBD_USE_IOVEC) {
+        if (acb->cmd == RBD_AIO_READ) {
+            qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
+        }
+        qemu_vfree(acb->bounce);
     }
-    qemu_vfree(acb->bounce);
+
     acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
 
     qemu_aio_unref(acb);
 }
 
-/* TODO Convert to fine grained options */
-static QemuOptsList runtime_opts = {
-    .name = "rbd",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = "filename",
-            .type = QEMU_OPT_STRING,
-            .help = "Specification of the rbd image",
-        },
-        {
-            .name = "password-secret",
-            .type = QEMU_OPT_STRING,
-            .help = "ID of secret providing the password",
-        },
-        { /* end of list */ }
-    },
-};
+#define RBD_MON_HOST          0
+#define RBD_AUTH_SUPPORTED    1
+
+static char *qemu_rbd_array_opts(QDict *options, const char *prefix, int type,
+                                 Error **errp)
+{
+    int num_entries;
+    QemuOpts *opts = NULL;
+    QDict *sub_options;
+    const char *host;
+    const char *port;
+    char *str;
+    char *rados_str = NULL;
+    Error *local_err = NULL;
+    int i;
+
+    assert(type == RBD_MON_HOST || type == RBD_AUTH_SUPPORTED);
+
+    num_entries = qdict_array_entries(options, prefix);
+
+    if (num_entries < 0) {
+        error_setg(errp, "Parse error on RBD QDict array");
+        return NULL;
+    }
+
+    for (i = 0; i < num_entries; i++) {
+        char *strbuf = NULL;
+        const char *value;
+        char *rados_str_tmp;
+
+        str = g_strdup_printf("%s%d.", prefix, i);
+        qdict_extract_subqdict(options, &sub_options, str);
+        g_free(str);
+
+        opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+        qemu_opts_absorb_qdict(opts, sub_options, &local_err);
+        QDECREF(sub_options);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            g_free(rados_str);
+            rados_str = NULL;
+            goto exit;
+        }
+
+        if (type == RBD_MON_HOST) {
+            host = qemu_opt_get(opts, "host");
+            port = qemu_opt_get(opts, "port");
+
+            value = host;
+            if (port) {
+                /* check for ipv6 */
+                if (strchr(host, ':')) {
+                    strbuf = g_strdup_printf("[%s]:%s", host, port);
+                } else {
+                    strbuf = g_strdup_printf("%s:%s", host, port);
+                }
+                value = strbuf;
+            } else if (strchr(host, ':')) {
+                strbuf = g_strdup_printf("[%s]", host);
+                value = strbuf;
+            }
+        } else {
+            value = qemu_opt_get(opts, "auth");
+        }
+
+
+        /* each iteration in the for loop will build upon the string, and if
+         * rados_str is NULL then it is our first pass */
+        if (rados_str) {
+            /* separate options with ';', as that  is what rados_conf_set()
+             * requires */
+            rados_str_tmp = rados_str;
+            rados_str = g_strdup_printf("%s;%s", rados_str_tmp, value);
+            g_free(rados_str_tmp);
+        } else {
+            rados_str = g_strdup(value);
+        }
+
+        g_free(strbuf);
+        qemu_opts_del(opts);
+        opts = NULL;
+    }
+
+exit:
+    qemu_opts_del(opts);
+    return rados_str;
+}
 
 static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
                          Error **errp)
 {
     BDRVRBDState *s = bs->opaque;
-    char pool[RBD_MAX_POOL_NAME_SIZE];
-    char snap_buf[RBD_MAX_SNAP_NAME_SIZE];
-    char conf[RBD_MAX_CONF_SIZE];
-    char clientname_buf[RBD_MAX_CONF_SIZE];
-    char *clientname;
+    const char *pool, *snap, *conf, *clientname, *name, *keypairs;
     const char *secretid;
     QemuOpts *opts;
     Error *local_err = NULL;
-    const char *filename;
+    char *mon_host = NULL;
+    char *auth_supported = NULL;
     int r;
 
     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
@@ -492,41 +686,63 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
         return -EINVAL;
     }
 
-    filename = qemu_opt_get(opts, "filename");
-    secretid = qemu_opt_get(opts, "password-secret");
+    auth_supported = qemu_rbd_array_opts(options, "auth-supported.",
+                                         RBD_AUTH_SUPPORTED, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        r = -EINVAL;
+        goto failed_opts;
+    }
 
-    if (qemu_rbd_parsename(filename, pool, sizeof(pool),
-                           snap_buf, sizeof(snap_buf),
-                           s->name, sizeof(s->name),
-                           conf, sizeof(conf), errp) < 0) {
+    mon_host = qemu_rbd_array_opts(options, "server.",
+                                   RBD_MON_HOST, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
         r = -EINVAL;
         goto failed_opts;
     }
 
-    clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
+    secretid = qemu_opt_get(opts, "password-secret");
+
+    pool           = qemu_opt_get(opts, "pool");
+    conf           = qemu_opt_get(opts, "conf");
+    snap           = qemu_opt_get(opts, "snapshot");
+    clientname     = qemu_opt_get(opts, "user");
+    name           = qemu_opt_get(opts, "image");
+    keypairs       = qemu_opt_get(opts, "keyvalue-pairs");
+
     r = rados_create(&s->cluster, clientname);
     if (r < 0) {
         error_setg_errno(errp, -r, "error initializing");
         goto failed_opts;
     }
 
-    s->snap = NULL;
-    if (snap_buf[0] != '\0') {
-        s->snap = g_strdup(snap_buf);
+    s->snap = g_strdup(snap);
+    if (name) {
+        pstrcpy(s->name, RBD_MAX_IMAGE_NAME_SIZE, name);
+    }
+
+    /* try default location when conf=NULL, but ignore failure */
+    r = rados_conf_read_file(s->cluster, conf);
+    if (conf && r < 0) {
+        error_setg_errno(errp, -r, "error reading conf file %s", conf);
+        goto failed_shutdown;
+    }
+
+    r = qemu_rbd_set_keypairs(s->cluster, keypairs, errp);
+    if (r < 0) {
+        goto failed_shutdown;
     }
 
-    if (strstr(conf, "conf=") == NULL) {
-        /* try default location, but ignore failure */
-        rados_conf_read_file(s->cluster, NULL);
-    } else if (conf[0] != '\0') {
-        r = qemu_rbd_set_conf(s->cluster, conf, true, errp);
+    if (mon_host) {
+        r = rados_conf_set(s->cluster, "mon_host", mon_host);
         if (r < 0) {
             goto failed_shutdown;
         }
     }
 
-    if (conf[0] != '\0') {
-        r = qemu_rbd_set_conf(s->cluster, conf, false, errp);
+    if (auth_supported) {
+        r = rados_conf_set(s->cluster, "auth_supported", auth_supported);
         if (r < 0) {
             goto failed_shutdown;
         }
@@ -580,6 +796,8 @@ failed_shutdown:
     g_free(s->snap);
 failed_opts:
     qemu_opts_del(opts);
+    g_free(mon_host);
+    g_free(auth_supported);
     return r;
 }
 
@@ -655,7 +873,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
     RBDAIOCB *acb;
     RADOSCB *rcb = NULL;
     rbd_completion_t c;
-    char *buf;
     int r;
 
     BDRVRBDState *s = bs->opaque;
@@ -664,27 +881,29 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
     acb->cmd = cmd;
     acb->qiov = qiov;
     assert(!qiov || qiov->size == size);
-    if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
-        acb->bounce = NULL;
-    } else {
-        acb->bounce = qemu_try_blockalign(bs, qiov->size);
-        if (acb->bounce == NULL) {
-            goto failed;
+
+    rcb = g_new(RADOSCB, 1);
+
+    if (!LIBRBD_USE_IOVEC) {
+        if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
+            acb->bounce = NULL;
+        } else {
+            acb->bounce = qemu_try_blockalign(bs, qiov->size);
+            if (acb->bounce == NULL) {
+                goto failed;
+            }
+        }
+        if (cmd == RBD_AIO_WRITE) {
+            qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
         }
+        rcb->buf = acb->bounce;
     }
+
     acb->ret = 0;
     acb->error = 0;
     acb->s = s;
 
-    if (cmd == RBD_AIO_WRITE) {
-        qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
-    }
-
-    buf = acb->bounce;
-
-    rcb = g_new(RADOSCB, 1);
     rcb->acb = acb;
-    rcb->buf = buf;
     rcb->s = acb->s;
     rcb->size = size;
     r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c);
@@ -694,10 +913,18 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
 
     switch (cmd) {
     case RBD_AIO_WRITE:
-        r = rbd_aio_write(s->image, off, size, buf, c);
+#ifdef LIBRBD_SUPPORTS_IOVEC
+            r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
+#else
+            r = rbd_aio_write(s->image, off, size, rcb->buf, c);
+#endif
         break;
     case RBD_AIO_READ:
-        r = rbd_aio_read(s->image, off, size, buf, c);
+#ifdef LIBRBD_SUPPORTS_IOVEC
+            r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
+#else
+            r = rbd_aio_read(s->image, off, size, rcb->buf, c);
+#endif
         break;
     case RBD_AIO_DISCARD:
         r = rbd_aio_discard_wrapper(s->image, off, size, c);
@@ -712,14 +939,16 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
     if (r < 0) {
         goto failed_completion;
     }
-
     return &acb->common;
 
 failed_completion:
     rbd_aio_release(c);
 failed:
     g_free(rcb);
-    qemu_vfree(acb->bounce);
+    if (!LIBRBD_USE_IOVEC) {
+        qemu_vfree(acb->bounce);
+    }
+
     qemu_aio_unref(acb);
     return NULL;
 }
@@ -972,18 +1201,18 @@ static QemuOptsList qemu_rbd_create_opts = {
 };
 
 static BlockDriver bdrv_rbd = {
-    .format_name        = "rbd",
-    .instance_size      = sizeof(BDRVRBDState),
-    .bdrv_needs_filename = true,
-    .bdrv_file_open     = qemu_rbd_open,
-    .bdrv_close         = qemu_rbd_close,
-    .bdrv_create        = qemu_rbd_create,
-    .bdrv_has_zero_init = bdrv_has_zero_init_1,
-    .bdrv_get_info      = qemu_rbd_getinfo,
-    .create_opts        = &qemu_rbd_create_opts,
-    .bdrv_getlength     = qemu_rbd_getlength,
-    .bdrv_truncate      = qemu_rbd_truncate,
-    .protocol_name      = "rbd",
+    .format_name            = "rbd",
+    .instance_size          = sizeof(BDRVRBDState),
+    .bdrv_parse_filename    = qemu_rbd_parse_filename,
+    .bdrv_file_open         = qemu_rbd_open,
+    .bdrv_close             = qemu_rbd_close,
+    .bdrv_create            = qemu_rbd_create,
+    .bdrv_has_zero_init     = bdrv_has_zero_init_1,
+    .bdrv_get_info          = qemu_rbd_getinfo,
+    .create_opts            = &qemu_rbd_create_opts,
+    .bdrv_getlength         = qemu_rbd_getlength,
+    .bdrv_truncate          = qemu_rbd_truncate,
+    .protocol_name          = "rbd",
 
     .bdrv_aio_readv         = qemu_rbd_aio_readv,
     .bdrv_aio_writev        = qemu_rbd_aio_writev,
diff --git a/block/replication.c b/block/replication.c
index 729dd12499..22f170fd33 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -86,6 +86,12 @@ static int replication_open(BlockDriverState *bs, QDict *options,
     const char *mode;
     const char *top_id;
 
+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
+                               false, errp);
+    if (!bs->file) {
+        return -EINVAL;
+    }
+
     ret = -EINVAL;
     opts = qemu_opts_create(&replication_runtime_opts, NULL, 0, &error_abort);
     qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -638,7 +644,7 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
         s->replication_state = BLOCK_REPLICATION_FAILOVER;
         commit_active_start(NULL, s->active_disk->bs, s->secondary_disk->bs,
                             BLOCK_JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT,
-                            replication_done, bs, errp, true);
+                            NULL, replication_done, bs, errp, true);
         break;
     default:
         aio_context_release(aio_context);
@@ -654,6 +660,7 @@ BlockDriver bdrv_replication = {
 
     .bdrv_open                  = replication_open,
     .bdrv_close                 = replication_close,
+    .bdrv_child_perm            = bdrv_filter_default_perms,
 
     .bdrv_getlength             = replication_getlength,
     .bdrv_co_readv              = replication_co_readv,
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 860ba61502..743471043e 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -1609,7 +1609,7 @@ static int sd_prealloc(const char *filename, Error **errp)
     int ret;
 
     blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_PROTOCOL, errp);
+                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
     if (blk == NULL) {
         ret = -EIO;
         goto out_with_err_set;
diff --git a/block/stream.c b/block/stream.c
index 1523ba7dfb..0113710845 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -68,6 +68,7 @@ static void stream_complete(BlockJob *job, void *opaque)
     StreamCompleteData *data = opaque;
     BlockDriverState *bs = blk_bs(job->blk);
     BlockDriverState *base = s->base;
+    Error *local_err = NULL;
 
     if (!block_job_is_cancelled(&s->common) && data->reached_end &&
         data->ret == 0) {
@@ -79,11 +80,19 @@ static void stream_complete(BlockJob *job, void *opaque)
             }
         }
         data->ret = bdrv_change_backing_file(bs, base_id, base_fmt);
-        bdrv_set_backing_hd(bs, base);
+        bdrv_set_backing_hd(bs, base, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            data->ret = -EPERM;
+            goto out;
+        }
     }
 
+out:
     /* Reopen the image back in read-only mode if necessary */
     if (s->bs_flags != bdrv_get_flags(bs)) {
+        /* Give up write permissions before making it read-only */
+        blk_set_perm(job->blk, 0, BLK_PERM_ALL, &error_abort);
         bdrv_reopen(bs, s->bs_flags, NULL);
     }
 
@@ -229,25 +238,35 @@ void stream_start(const char *job_id, BlockDriverState *bs,
     BlockDriverState *iter;
     int orig_bs_flags;
 
-    s = block_job_create(job_id, &stream_job_driver, bs, speed,
-                         BLOCK_JOB_DEFAULT, NULL, NULL, errp);
-    if (!s) {
-        return;
-    }
-
     /* Make sure that the image is opened in read-write mode */
     orig_bs_flags = bdrv_get_flags(bs);
     if (!(orig_bs_flags & BDRV_O_RDWR)) {
         if (bdrv_reopen(bs, orig_bs_flags | BDRV_O_RDWR, errp) != 0) {
-            block_job_unref(&s->common);
             return;
         }
     }
 
-    /* Block all intermediate nodes between bs and base, because they
-     * will disappear from the chain after this operation */
+    /* Prevent concurrent jobs trying to modify the graph structure here, we
+     * already have our own plans. Also don't allow resize as the image size is
+     * queried only at the job start and then cached. */
+    s = block_job_create(job_id, &stream_job_driver, bs,
+                         BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+                         BLK_PERM_GRAPH_MOD,
+                         BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+                         BLK_PERM_WRITE,
+                         speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp);
+    if (!s) {
+        goto fail;
+    }
+
+    /* Block all intermediate nodes between bs and base, because they will
+     * disappear from the chain after this operation. The streaming job reads
+     * every block only once, assuming that it doesn't change, so block writes
+     * and resizes. */
     for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) {
-        block_job_add_bdrv(&s->common, iter);
+        block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
+                           BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED,
+                           &error_abort);
     }
 
     s->base = base;
@@ -257,4 +276,10 @@ void stream_start(const char *job_id, BlockDriverState *bs,
     s->on_error = on_error;
     trace_stream_start(bs, base, s);
     block_job_start(&s->common);
+    return;
+
+fail:
+    if (orig_bs_flags != bdrv_get_flags(bs)) {
+        bdrv_reopen(bs, s->bs_flags, NULL);
+    }
 }
diff --git a/block/vdi.c b/block/vdi.c
index 0aeb940aa8..9b4f70e977 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -363,6 +363,12 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
     int ret;
     Error *local_err = NULL;
 
+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
+                               false, errp);
+    if (!bs->file) {
+        return -EINVAL;
+    }
+
     logout("\n");
 
     ret = bdrv_read(bs->file, 0, (uint8_t *)&header, 1);
@@ -757,7 +763,8 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
     }
 
     blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                       &local_err);
     if (blk == NULL) {
         error_propagate(errp, local_err);
         ret = -EIO;
@@ -885,6 +892,7 @@ static BlockDriver bdrv_vdi = {
     .bdrv_open = vdi_open,
     .bdrv_close = vdi_close,
     .bdrv_reopen_prepare = vdi_reopen_prepare,
+    .bdrv_child_perm          = bdrv_format_default_perms,
     .bdrv_create = vdi_create,
     .bdrv_has_zero_init = bdrv_has_zero_init_1,
     .bdrv_co_get_block_status = vdi_co_get_block_status,
diff --git a/block/vhdx-log.c b/block/vhdx-log.c
index 02eb104310..67a91c0de5 100644
--- a/block/vhdx-log.c
+++ b/block/vhdx-log.c
@@ -548,7 +548,7 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
             if (new_file_size % (1024*1024)) {
                 /* round up to nearest 1MB boundary */
                 new_file_size = ((new_file_size >> 20) + 1) << 20;
-                bdrv_truncate(bs->file->bs, new_file_size);
+                bdrv_truncate(bs->file, new_file_size);
             }
         }
         qemu_vfree(desc_entries);
diff --git a/block/vhdx.c b/block/vhdx.c
index 68db9e074e..052a753159 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -898,6 +898,12 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
     uint64_t signature;
     Error *local_err = NULL;
 
+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
+                               false, errp);
+    if (!bs->file) {
+        return -EINVAL;
+    }
+
     s->bat = NULL;
     s->first_visible_write = true;
 
@@ -1165,7 +1171,7 @@ static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
     /* per the spec, the address for a block is in units of 1MB */
     *new_offset = ROUND_UP(*new_offset, 1024 * 1024);
 
-    return bdrv_truncate(bs->file->bs, *new_offset + s->block_size);
+    return bdrv_truncate(bs->file, *new_offset + s->block_size);
 }
 
 /*
@@ -1853,7 +1859,8 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
     }
 
     blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                       &local_err);
     if (blk == NULL) {
         error_propagate(errp, local_err);
         ret = -EIO;
@@ -1977,6 +1984,7 @@ static BlockDriver bdrv_vhdx = {
     .bdrv_open              = vhdx_open,
     .bdrv_close             = vhdx_close,
     .bdrv_reopen_prepare    = vhdx_reopen_prepare,
+    .bdrv_child_perm        = bdrv_format_default_perms,
     .bdrv_co_readv          = vhdx_co_readv,
     .bdrv_co_writev         = vhdx_co_writev,
     .bdrv_create            = vhdx_create,
diff --git a/block/vmdk.c b/block/vmdk.c
index 393c84d8b1..a9bd22bf93 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -943,6 +943,12 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
     uint32_t magic;
     Error *local_err = NULL;
 
+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
+                               false, errp);
+    if (!bs->file) {
+        return -EINVAL;
+    }
+
     buf = vmdk_read_desc(bs->file, 0, errp);
     if (!buf) {
         return -EINVAL;
@@ -1697,7 +1703,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
     }
 
     blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                       &local_err);
     if (blk == NULL) {
         error_propagate(errp, local_err);
         ret = -EIO;
@@ -2065,7 +2072,8 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
     }
 
     new_blk = blk_new_open(filename, NULL, NULL,
-                           BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+                           BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                           &local_err);
     if (new_blk == NULL) {
         error_propagate(errp, local_err);
         ret = -EIO;
@@ -2353,6 +2361,7 @@ static BlockDriver bdrv_vmdk = {
     .bdrv_open                    = vmdk_open,
     .bdrv_check                   = vmdk_check,
     .bdrv_reopen_prepare          = vmdk_reopen_prepare,
+    .bdrv_child_perm              = bdrv_format_default_perms,
     .bdrv_co_preadv               = vmdk_co_preadv,
     .bdrv_co_pwritev              = vmdk_co_pwritev,
     .bdrv_co_pwritev_compressed   = vmdk_co_pwritev_compressed,
diff --git a/block/vpc.c b/block/vpc.c
index ed6353dbd4..f591d4be38 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -220,6 +220,12 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
     int disk_type = VHD_DYNAMIC;
     int ret;
 
+    bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
+                               false, errp);
+    if (!bs->file) {
+        return -EINVAL;
+    }
+
     opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort);
     qemu_opts_absorb_qdict(opts, options, &local_err);
     if (local_err) {
@@ -909,7 +915,8 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
     }
 
     blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
+                       &local_err);
     if (blk == NULL) {
         error_propagate(errp, local_err);
         ret = -EIO;
@@ -1061,6 +1068,7 @@ static BlockDriver bdrv_vpc = {
     .bdrv_open              = vpc_open,
     .bdrv_close             = vpc_close,
     .bdrv_reopen_prepare    = vpc_reopen_prepare,
+    .bdrv_child_perm        = bdrv_format_default_perms,
     .bdrv_create            = vpc_create,
 
     .bdrv_co_preadv             = vpc_co_preadv,
diff --git a/block/vvfat.c b/block/vvfat.c
index c6bf67e8f3..aa61c329e7 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -2968,6 +2968,7 @@ static void write_target_close(BlockDriverState *bs) {
 
 static BlockDriver vvfat_write_target = {
     .format_name        = "vvfat_write_target",
+    .instance_size      = sizeof(void*),
     .bdrv_co_pwritev    = write_target_commit,
     .bdrv_close         = write_target_close,
 };
@@ -3036,13 +3037,12 @@ static int enable_write_target(BlockDriverState *bs, Error **errp)
     unlink(s->qcow_filename);
 #endif
 
-    backing = bdrv_new();
-    bdrv_set_backing_hd(s->bs, backing);
-    bdrv_unref(backing);
+    backing = bdrv_new_open_driver(&vvfat_write_target, NULL, BDRV_O_ALLOW_RDWR,
+                                   &error_abort);
+    *(void**) backing->opaque = s;
 
-    s->bs->backing->bs->drv = &vvfat_write_target;
-    s->bs->backing->bs->opaque = g_new(void *, 1);
-    *(void**)s->bs->backing->bs->opaque = s;
+    bdrv_set_backing_hd(s->bs, backing, &error_abort);
+    bdrv_unref(backing);
 
     return 0;
 
@@ -3052,6 +3052,27 @@ err:
     return ret;
 }
 
+static void vvfat_child_perm(BlockDriverState *bs, BdrvChild *c,
+                             const BdrvChildRole *role,
+                             uint64_t perm, uint64_t shared,
+                             uint64_t *nperm, uint64_t *nshared)
+{
+    BDRVVVFATState *s = bs->opaque;
+
+    assert(c == s->qcow || role == &child_backing);
+
+    if (c == s->qcow) {
+        /* This is a private node, nobody should try to attach to it */
+        *nperm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
+        *nshared = BLK_PERM_WRITE_UNCHANGED;
+    } else {
+        /* The backing file is there so 'commit' can use it. vvfat doesn't
+         * access it in any way. */
+        *nperm = 0;
+        *nshared = BLK_PERM_ALL;
+    }
+}
+
 static void vvfat_close(BlockDriverState *bs)
 {
     BDRVVVFATState *s = bs->opaque;
@@ -3077,6 +3098,7 @@ static BlockDriver bdrv_vvfat = {
     .bdrv_file_open         = vvfat_open,
     .bdrv_refresh_limits    = vvfat_refresh_limits,
     .bdrv_close             = vvfat_close,
+    .bdrv_child_perm        = vvfat_child_perm,
 
     .bdrv_co_preadv         = vvfat_co_preadv,
     .bdrv_co_pwritev        = vvfat_co_pwritev,
diff --git a/blockdev.c b/blockdev.c
index bbf9d4d8f1..8eb4e84fe0 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -52,6 +52,7 @@
 #include "sysemu/arch_init.h"
 #include "qemu/cutils.h"
 #include "qemu/help_option.h"
+#include "qemu/throttle-options.h"
 
 static QTAILQ_HEAD(, BlockDriverState) monitor_bdrv_states =
     QTAILQ_HEAD_INITIALIZER(monitor_bdrv_states);
@@ -557,7 +558,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
     if ((!file || !*file) && !qdict_size(bs_opts)) {
         BlockBackendRootState *blk_rs;
 
-        blk = blk_new();
+        blk = blk_new(0, BLK_PERM_ALL);
         blk_rs = blk_get_root_state(blk);
         blk_rs->open_flags    = bdrv_flags;
         blk_rs->read_only     = read_only;
@@ -1767,6 +1768,17 @@ static void external_snapshot_prepare(BlkActionState *common,
 
     if (!state->new_bs->drv->supports_backing) {
         error_setg(errp, "The snapshot does not support backing images");
+        return;
+    }
+
+    /* This removes our old bs and adds the new bs. This is an operation that
+     * can fail, so we need to do it in .prepare; undoing it for abort is
+     * always possible. */
+    bdrv_ref(state->new_bs);
+    bdrv_append(state->new_bs, state->old_bs, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
     }
 }
 
@@ -1777,8 +1789,6 @@ static void external_snapshot_commit(BlkActionState *common)
 
     bdrv_set_aio_context(state->new_bs, state->aio_context);
 
-    /* This removes our old bs and adds the new bs */
-    bdrv_append(state->new_bs, state->old_bs);
     /* We don't need (or want) to use the transactional
      * bdrv_reopen_multiple() across all the entries at once, because we
      * don't want to abort all of them if one of them fails the reopen */
@@ -1793,7 +1803,9 @@ static void external_snapshot_abort(BlkActionState *common)
     ExternalSnapshotState *state =
                              DO_UPCAST(ExternalSnapshotState, common, common);
     if (state->new_bs) {
-        bdrv_unref(state->new_bs);
+        if (state->new_bs->backing) {
+            bdrv_replace_in_backing_chain(state->new_bs, state->old_bs);
+        }
     }
 }
 
@@ -1804,6 +1816,7 @@ static void external_snapshot_clean(BlkActionState *common)
     if (state->aio_context) {
         bdrv_drained_end(state->old_bs);
         aio_context_release(state->aio_context);
+        bdrv_unref(state->new_bs);
     }
 }
 
@@ -2310,7 +2323,7 @@ static int do_open_tray(const char *blk_name, const char *qdev_id,
     }
 
     if (!locked || force) {
-        blk_dev_change_media_cb(blk, false);
+        blk_dev_change_media_cb(blk, false, &error_abort);
     }
 
     if (locked && !force) {
@@ -2348,6 +2361,7 @@ void qmp_blockdev_close_tray(bool has_device, const char *device,
                              Error **errp)
 {
     BlockBackend *blk;
+    Error *local_err = NULL;
 
     device = has_device ? device : NULL;
     id = has_id ? id : NULL;
@@ -2371,7 +2385,11 @@ void qmp_blockdev_close_tray(bool has_device, const char *device,
         return;
     }
 
-    blk_dev_change_media_cb(blk, true);
+    blk_dev_change_media_cb(blk, true, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
 }
 
 void qmp_x_blockdev_remove_medium(bool has_device, const char *device,
@@ -2424,7 +2442,7 @@ void qmp_x_blockdev_remove_medium(bool has_device, const char *device,
          * called at all); therefore, the medium needs to be ejected here.
          * Do it after blk_remove_bs() so blk_is_inserted(blk) returns the @load
          * value passed here (i.e. false). */
-        blk_dev_change_media_cb(blk, false);
+        blk_dev_change_media_cb(blk, false, &error_abort);
     }
 
 out:
@@ -2434,7 +2452,9 @@ out:
 static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
                                             BlockDriverState *bs, Error **errp)
 {
+    Error *local_err = NULL;
     bool has_device;
+    int ret;
 
     /* For BBs without a device, we can exchange the BDS tree at will */
     has_device = blk_get_attached_dev(blk);
@@ -2454,7 +2474,10 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
         return;
     }
 
-    blk_insert_bs(blk, bs);
+    ret = blk_insert_bs(blk, bs, errp);
+    if (ret < 0) {
+        return;
+    }
 
     if (!blk_dev_has_tray(blk)) {
         /* For tray-less devices, blockdev-close-tray is a no-op (or may not be
@@ -2462,7 +2485,12 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
          * slot here.
          * Do it after blk_insert_bs() so blk_is_inserted(blk) returns the @load
          * value passed here (i.e. true). */
-        blk_dev_change_media_cb(blk, true);
+        blk_dev_change_media_cb(blk, true, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            blk_remove_bs(blk);
+            return;
+        }
     }
 }
 
@@ -2858,6 +2886,7 @@ void qmp_block_resize(bool has_device, const char *device,
                       int64_t size, Error **errp)
 {
     Error *local_err = NULL;
+    BlockBackend *blk = NULL;
     BlockDriverState *bs;
     AioContext *aio_context;
     int ret;
@@ -2888,10 +2917,16 @@ void qmp_block_resize(bool has_device, const char *device,
         goto out;
     }
 
+    blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL);
+    ret = blk_insert_bs(blk, bs, errp);
+    if (ret < 0) {
+        goto out;
+    }
+
     /* complete all in-flight operations before resizing the device */
     bdrv_drain_all();
 
-    ret = bdrv_truncate(bs, size);
+    ret = blk_truncate(blk, size);
     switch (ret) {
     case 0:
         break;
@@ -2913,6 +2948,7 @@ void qmp_block_resize(bool has_device, const char *device,
     }
 
 out:
+    blk_unref(blk);
     aio_context_release(aio_context);
 }
 
@@ -3008,6 +3044,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
                       bool has_top, const char *top,
                       bool has_backing_file, const char *backing_file,
                       bool has_speed, int64_t speed,
+                      bool has_filter_node_name, const char *filter_node_name,
                       Error **errp)
 {
     BlockDriverState *bs;
@@ -3023,6 +3060,9 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
     if (!has_speed) {
         speed = 0;
     }
+    if (!has_filter_node_name) {
+        filter_node_name = NULL;
+    }
 
     /* Important Note:
      *  libvirt relies on the DeviceNotFound error class in order to probe for
@@ -3097,8 +3137,8 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
             goto out;
         }
         commit_active_start(has_job_id ? job_id : NULL, bs, base_bs,
-                            BLOCK_JOB_DEFAULT, speed, on_error, NULL, NULL,
-                            &local_err, false);
+                            BLOCK_JOB_DEFAULT, speed, on_error,
+                            filter_node_name, NULL, NULL, &local_err, false);
     } else {
         BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs);
         if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
@@ -3106,7 +3146,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
         }
         commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, speed,
                      on_error, has_backing_file ? backing_file : NULL,
-                     &local_err);
+                     filter_node_name, &local_err);
     }
     if (local_err != NULL) {
         error_propagate(errp, local_err);
@@ -3342,6 +3382,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
                                    bool has_on_target_error,
                                    BlockdevOnError on_target_error,
                                    bool has_unmap, bool unmap,
+                                   bool has_filter_node_name,
+                                   const char *filter_node_name,
                                    Error **errp)
 {
 
@@ -3363,6 +3405,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
     if (!has_unmap) {
         unmap = true;
     }
+    if (!has_filter_node_name) {
+        filter_node_name = NULL;
+    }
 
     if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) {
         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity",
@@ -3392,7 +3437,8 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
     mirror_start(job_id, bs, target,
                  has_replaces ? replaces : NULL,
                  speed, granularity, buf_size, sync, backing_mode,
-                 on_source_error, on_target_error, unmap, errp);
+                 on_source_error, on_target_error, unmap, filter_node_name,
+                 errp);
 }
 
 void qmp_drive_mirror(DriveMirror *arg, Error **errp)
@@ -3530,6 +3576,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
                            arg->has_on_source_error, arg->on_source_error,
                            arg->has_on_target_error, arg->on_target_error,
                            arg->has_unmap, arg->unmap,
+                           false, NULL,
                            &local_err);
     bdrv_unref(target_bs);
     error_propagate(errp, local_err);
@@ -3548,6 +3595,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
                          BlockdevOnError on_source_error,
                          bool has_on_target_error,
                          BlockdevOnError on_target_error,
+                         bool has_filter_node_name,
+                         const char *filter_node_name,
                          Error **errp)
 {
     BlockDriverState *bs;
@@ -3579,6 +3628,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
                            has_on_source_error, on_source_error,
                            has_on_target_error, on_target_error,
                            true, true,
+                           has_filter_node_name, filter_node_name,
                            &local_err);
     error_propagate(errp, local_err);
 
@@ -4002,83 +4052,11 @@ QemuOptsList qemu_common_drive_opts = {
             .name = BDRV_OPT_READ_ONLY,
             .type = QEMU_OPT_BOOL,
             .help = "open drive file as read-only",
-        },{
-            .name = "throttling.iops-total",
-            .type = QEMU_OPT_NUMBER,
-            .help = "limit total I/O operations per second",
-        },{
-            .name = "throttling.iops-read",
-            .type = QEMU_OPT_NUMBER,
-            .help = "limit read operations per second",
-        },{
-            .name = "throttling.iops-write",
-            .type = QEMU_OPT_NUMBER,
-            .help = "limit write operations per second",
-        },{
-            .name = "throttling.bps-total",
-            .type = QEMU_OPT_NUMBER,
-            .help = "limit total bytes per second",
-        },{
-            .name = "throttling.bps-read",
-            .type = QEMU_OPT_NUMBER,
-            .help = "limit read bytes per second",
-        },{
-            .name = "throttling.bps-write",
-            .type = QEMU_OPT_NUMBER,
-            .help = "limit write bytes per second",
-        },{
-            .name = "throttling.iops-total-max",
-            .type = QEMU_OPT_NUMBER,
-            .help = "I/O operations burst",
-        },{
-            .name = "throttling.iops-read-max",
-            .type = QEMU_OPT_NUMBER,
-            .help = "I/O operations read burst",
-        },{
-            .name = "throttling.iops-write-max",
-            .type = QEMU_OPT_NUMBER,
-            .help = "I/O operations write burst",
-        },{
-            .name = "throttling.bps-total-max",
-            .type = QEMU_OPT_NUMBER,
-            .help = "total bytes burst",
-        },{
-            .name = "throttling.bps-read-max",
-            .type = QEMU_OPT_NUMBER,
-            .help = "total bytes read burst",
-        },{
-            .name = "throttling.bps-write-max",
-            .type = QEMU_OPT_NUMBER,
-            .help = "total bytes write burst",
-        },{
-            .name = "throttling.iops-total-max-length",
-            .type = QEMU_OPT_NUMBER,
-            .help = "length of the iops-total-max burst period, in seconds",
-        },{
-            .name = "throttling.iops-read-max-length",
-            .type = QEMU_OPT_NUMBER,
-            .help = "length of the iops-read-max burst period, in seconds",
-        },{
-            .name = "throttling.iops-write-max-length",
-            .type = QEMU_OPT_NUMBER,
-            .help = "length of the iops-write-max burst period, in seconds",
-        },{
-            .name = "throttling.bps-total-max-length",
-            .type = QEMU_OPT_NUMBER,
-            .help = "length of the bps-total-max burst period, in seconds",
-        },{
-            .name = "throttling.bps-read-max-length",
-            .type = QEMU_OPT_NUMBER,
-            .help = "length of the bps-read-max burst period, in seconds",
-        },{
-            .name = "throttling.bps-write-max-length",
-            .type = QEMU_OPT_NUMBER,
-            .help = "length of the bps-write-max burst period, in seconds",
-        },{
-            .name = "throttling.iops-size",
-            .type = QEMU_OPT_NUMBER,
-            .help = "when limiting by iops max size of an I/O in bytes",
-        },{
+        },
+
+        THROTTLE_OPTS,
+
+        {
             .name = "throttling.group",
             .type = QEMU_OPT_STRING,
             .help = "name of the block throttling group",
diff --git a/blockjob.c b/blockjob.c
index abee11bb08..69126af97f 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -55,6 +55,19 @@ struct BlockJobTxn {
 
 static QLIST_HEAD(, BlockJob) block_jobs = QLIST_HEAD_INITIALIZER(block_jobs);
 
+static char *child_job_get_parent_desc(BdrvChild *c)
+{
+    BlockJob *job = c->opaque;
+    return g_strdup_printf("%s job '%s'",
+                           BlockJobType_lookup[job->driver->job_type],
+                           job->id);
+}
+
+static const BdrvChildRole child_job = {
+    .get_parent_desc    = child_job_get_parent_desc,
+    .stay_at_node       = true,
+};
+
 BlockJob *block_job_next(BlockJob *job)
 {
     if (!job) {
@@ -115,19 +128,44 @@ static void block_job_detach_aio_context(void *opaque)
     block_job_unref(job);
 }
 
-void block_job_add_bdrv(BlockJob *job, BlockDriverState *bs)
+void block_job_remove_all_bdrv(BlockJob *job)
+{
+    GSList *l;
+    for (l = job->nodes; l; l = l->next) {
+        BdrvChild *c = l->data;
+        bdrv_op_unblock_all(c->bs, job->blocker);
+        bdrv_root_unref_child(c);
+    }
+    g_slist_free(job->nodes);
+    job->nodes = NULL;
+}
+
+int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
+                       uint64_t perm, uint64_t shared_perm, Error **errp)
 {
-    job->nodes = g_slist_prepend(job->nodes, bs);
+    BdrvChild *c;
+
+    c = bdrv_root_attach_child(bs, name, &child_job, perm, shared_perm,
+                               job, errp);
+    if (c == NULL) {
+        return -EPERM;
+    }
+
+    job->nodes = g_slist_prepend(job->nodes, c);
     bdrv_ref(bs);
     bdrv_op_block_all(bs, job->blocker);
+
+    return 0;
 }
 
 void *block_job_create(const char *job_id, const BlockJobDriver *driver,
-                       BlockDriverState *bs, int64_t speed, int flags,
+                       BlockDriverState *bs, uint64_t perm,
+                       uint64_t shared_perm, int64_t speed, int flags,
                        BlockCompletionFunc *cb, void *opaque, Error **errp)
 {
     BlockBackend *blk;
     BlockJob *job;
+    int ret;
 
     if (bs->job) {
         error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
@@ -159,13 +197,17 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
         }
     }
 
-    blk = blk_new();
-    blk_insert_bs(blk, bs);
+    blk = blk_new(perm, shared_perm);
+    ret = blk_insert_bs(blk, bs, errp);
+    if (ret < 0) {
+        blk_unref(blk);
+        return NULL;
+    }
 
     job = g_malloc0(driver->instance_size);
     error_setg(&job->blocker, "block device is in use by block job: %s",
                BlockJobType_lookup[driver->job_type]);
-    block_job_add_bdrv(job, bs);
+    block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort);
     bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
 
     job->driver        = driver;
@@ -228,15 +270,9 @@ void block_job_ref(BlockJob *job)
 void block_job_unref(BlockJob *job)
 {
     if (--job->refcnt == 0) {
-        GSList *l;
         BlockDriverState *bs = blk_bs(job->blk);
         bs->job = NULL;
-        for (l = job->nodes; l; l = l->next) {
-            bs = l->data;
-            bdrv_op_unblock_all(bs, job->blocker);
-            bdrv_unref(bs);
-        }
-        g_slist_free(job->nodes);
+        block_job_remove_all_bdrv(job);
         blk_remove_aio_context_notifier(job->blk,
                                         block_job_attached_aio_context,
                                         block_job_detach_aio_context, job);
diff --git a/configure b/configure
index 1c9655e639..6c21975f02 100755
--- a/configure
+++ b/configure
@@ -3378,7 +3378,7 @@ fi
 fdt_required=no
 for target in $target_list; do
   case $target in
-    aarch64*-softmmu|arm*-softmmu|ppc*-softmmu|microblaze*-softmmu)
+    aarch64*-softmmu|arm*-softmmu|ppc*-softmmu|microblaze*-softmmu|mips64el-softmmu)
       fdt_required=yes
     ;;
   esac
@@ -3396,11 +3396,11 @@ fi
 if test "$fdt" != "no" ; then
   fdt_libs="-lfdt"
   # explicitly check for libfdt_env.h as it is missing in some stable installs
-  # and test for required functions to make sure we are on a version >= 1.4.0
+  # and test for required functions to make sure we are on a version >= 1.4.2
   cat > $TMPC << EOF
 #include <libfdt.h>
 #include <libfdt_env.h>
-int main(void) { fdt_get_property_by_offset(0, 0, 0); return 0; }
+int main(void) { fdt_first_subnode(0, 0); return 0; }
 EOF
   if compile_prog "" "$fdt_libs" ; then
     # system DTC is good - use it
@@ -3418,7 +3418,7 @@ EOF
     fdt_libs="-L\$(BUILD_DIR)/dtc/libfdt $fdt_libs"
   elif test "$fdt" = "yes" ; then
     # have neither and want - prompt for system/submodule install
-    error_exit "DTC (libfdt) version >= 1.4.0 not present. Your options:" \
+    error_exit "DTC (libfdt) version >= 1.4.2 not present. Your options:" \
         "  (1) Preferred: Install the DTC (libfdt) devel package" \
         "  (2) Fetch the DTC submodule, using:" \
         "      git submodule update --init dtc"
@@ -5879,6 +5879,7 @@ mkdir -p $target_dir
 echo "# Automatically generated by configure - do not modify" > $config_target_mak
 
 bflt="no"
+mttcg="no"
 interp_prefix1=$(echo "$interp_prefix" | sed "s/%M/$target_name/g")
 gdb_xml_files=""
 
@@ -5893,15 +5894,18 @@ case "$target_name" in
     TARGET_BASE_ARCH=i386
   ;;
   alpha)
+    mttcg="yes"
   ;;
   arm|armeb)
     TARGET_ARCH=arm
     bflt="yes"
+    mttcg="yes"
     gdb_xml_files="arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml"
   ;;
   aarch64)
     TARGET_BASE_ARCH=arm
     bflt="yes"
+    mttcg="yes"
     gdb_xml_files="aarch64-core.xml aarch64-fpu.xml arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml"
   ;;
   cris)
@@ -6066,6 +6070,9 @@ if test "$target_bigendian" = "yes" ; then
 fi
 if test "$target_softmmu" = "yes" ; then
   echo "CONFIG_SOFTMMU=y" >> $config_target_mak
+  if test "$mttcg" = "yes" ; then
+    echo "TARGET_SUPPORTS_MTTCG=y" >> $config_target_mak
+  fi
 fi
 if test "$target_user_only" = "yes" ; then
   echo "CONFIG_USER_ONLY=y" >> $config_target_mak
diff --git a/cpu-exec-common.c b/cpu-exec-common.c
index 767d9c6f0c..0504a9457b 100644
--- a/cpu-exec-common.c
+++ b/cpu-exec-common.c
@@ -23,9 +23,6 @@
 #include "exec/exec-all.h"
 #include "exec/memory-internal.h"
 
-bool exit_request;
-CPUState *tcg_current_cpu;
-
 /* exit the current TB, but without causing any exception to be raised */
 void cpu_loop_exit_noexc(CPUState *cpu)
 {
diff --git a/cpu-exec.c b/cpu-exec.c
index 7db959c821..d04dd91ebd 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -29,6 +29,7 @@
 #include "qemu/rcu.h"
 #include "exec/tb-hash.h"
 #include "exec/log.h"
+#include "qemu/main-loop.h"
 #if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY)
 #include "hw/i386/apic.h"
 #endif
@@ -221,20 +222,43 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
 
 static void cpu_exec_step(CPUState *cpu)
 {
+    CPUClass *cc = CPU_GET_CLASS(cpu);
     CPUArchState *env = (CPUArchState *)cpu->env_ptr;
     TranslationBlock *tb;
     target_ulong cs_base, pc;
     uint32_t flags;
 
     cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
-    tb = tb_gen_code(cpu, pc, cs_base, flags,
-                     1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
-    tb->orig_tb = NULL;
-    /* execute the generated code */
-    trace_exec_tb_nocache(tb, pc);
-    cpu_tb_exec(cpu, tb);
-    tb_phys_invalidate(tb, -1);
-    tb_free(tb);
+    if (sigsetjmp(cpu->jmp_env, 0) == 0) {
+        mmap_lock();
+        tb_lock();
+        tb = tb_gen_code(cpu, pc, cs_base, flags,
+                         1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
+        tb->orig_tb = NULL;
+        tb_unlock();
+        mmap_unlock();
+
+        cc->cpu_exec_enter(cpu);
+        /* execute the generated code */
+        trace_exec_tb_nocache(tb, pc);
+        cpu_tb_exec(cpu, tb);
+        cc->cpu_exec_exit(cpu);
+
+        tb_lock();
+        tb_phys_invalidate(tb, -1);
+        tb_free(tb);
+        tb_unlock();
+    } else {
+        /* We may have exited due to another problem here, so we need
+         * to reset any tb_locks we may have taken but didn't release.
+         * The mmap_lock is dropped by tb_gen_code if it runs out of
+         * memory.
+         */
+#ifndef CONFIG_SOFTMMU
+        tcg_debug_assert(!have_mmap_lock());
+#endif
+        tb_lock_reset();
+    }
 }
 
 void cpu_exec_step_atomic(CPUState *cpu)
@@ -378,12 +402,13 @@ static inline bool cpu_handle_halt(CPUState *cpu)
         if ((cpu->interrupt_request & CPU_INTERRUPT_POLL)
             && replay_interrupt()) {
             X86CPU *x86_cpu = X86_CPU(cpu);
+            qemu_mutex_lock_iothread();
             apic_poll_irq(x86_cpu->apic_state);
             cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL);
+            qemu_mutex_unlock_iothread();
         }
 #endif
         if (!cpu_has_work(cpu)) {
-            current_cpu = NULL;
             return true;
         }
 
@@ -433,7 +458,9 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
 #else
             if (replay_exception()) {
                 CPUClass *cc = CPU_GET_CLASS(cpu);
+                qemu_mutex_lock_iothread();
                 cc->do_interrupt(cpu);
+                qemu_mutex_unlock_iothread();
                 cpu->exception_index = -1;
             } else if (!replay_has_interrupt()) {
                 /* give a chance to iothread in replay mode */
@@ -459,9 +486,11 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
                                         TranslationBlock **last_tb)
 {
     CPUClass *cc = CPU_GET_CLASS(cpu);
-    int interrupt_request = cpu->interrupt_request;
 
-    if (unlikely(interrupt_request)) {
+    if (unlikely(atomic_read(&cpu->interrupt_request))) {
+        int interrupt_request;
+        qemu_mutex_lock_iothread();
+        interrupt_request = cpu->interrupt_request;
         if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) {
             /* Mask out external interrupts for this step. */
             interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK;
@@ -469,6 +498,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
         if (interrupt_request & CPU_INTERRUPT_DEBUG) {
             cpu->interrupt_request &= ~CPU_INTERRUPT_DEBUG;
             cpu->exception_index = EXCP_DEBUG;
+            qemu_mutex_unlock_iothread();
             return true;
         }
         if (replay_mode == REPLAY_MODE_PLAY && !replay_has_interrupt()) {
@@ -478,6 +508,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
             cpu->interrupt_request &= ~CPU_INTERRUPT_HALT;
             cpu->halted = 1;
             cpu->exception_index = EXCP_HLT;
+            qemu_mutex_unlock_iothread();
             return true;
         }
 #if defined(TARGET_I386)
@@ -488,12 +519,14 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
             cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0, 0);
             do_cpu_init(x86_cpu);
             cpu->exception_index = EXCP_HALTED;
+            qemu_mutex_unlock_iothread();
             return true;
         }
 #else
         else if (interrupt_request & CPU_INTERRUPT_RESET) {
             replay_interrupt();
             cpu_reset(cpu);
+            qemu_mutex_unlock_iothread();
             return true;
         }
 #endif
@@ -516,6 +549,9 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
                the program flow was changed */
             *last_tb = NULL;
         }
+
+        /* If we exit via cpu_loop_exit/longjmp it is reset in cpu_exec */
+        qemu_mutex_unlock_iothread();
     }
 
     /* Finally, check if we need to exit to the main loop.  */
@@ -548,13 +584,11 @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
     insns_left = atomic_read(&cpu->icount_decr.u32);
     atomic_set(&cpu->icount_decr.u16.high, 0);
     if (insns_left < 0) {
-        /* Something asked us to stop executing
-         * chained TBs; just continue round the main
-         * loop. Whatever requested the exit will also
-         * have set something else (eg exit_request or
-         * interrupt_request) which we will handle
-         * next time around the loop.  But we need to
-         * ensure the zeroing of tcg_exit_req (see cpu_tb_exec)
+        /* Something asked us to stop executing chained TBs; just
+         * continue round the main loop. Whatever requested the exit
+         * will also have set something else (eg exit_request or
+         * interrupt_request) which we will handle next time around
+         * the loop.  But we need to ensure the zeroing of icount_decr
          * comes before the next read of cpu->exit_request
          * or cpu->interrupt_request.
          */
@@ -597,13 +631,8 @@ int cpu_exec(CPUState *cpu)
         return EXCP_HALTED;
     }
 
-    atomic_mb_set(&tcg_current_cpu, cpu);
     rcu_read_lock();
 
-    if (unlikely(atomic_mb_read(&exit_request))) {
-        cpu->exit_request = 1;
-    }
-
     cc->cpu_exec_enter(cpu);
 
     /* Calculate difference between guest clock and host clock.
@@ -629,6 +658,9 @@ int cpu_exec(CPUState *cpu)
 #endif /* buggy compiler */
         cpu->can_do_io = 1;
         tb_lock_reset();
+        if (qemu_mutex_iothread_locked()) {
+            qemu_mutex_unlock_iothread();
+        }
     }
 
     /* if an exception is pending, we execute it here */
@@ -648,10 +680,5 @@ int cpu_exec(CPUState *cpu)
     cc->cpu_exec_exit(cpu);
     rcu_read_unlock();
 
-    /* fail safe : never use current_cpu outside cpu_exec() */
-    current_cpu = NULL;
-
-    /* Does not need atomic_mb_set because a spurious wakeup is okay.  */
-    atomic_set(&tcg_current_cpu, NULL);
     return ret;
 }
diff --git a/cpus.c b/cpus.c
index 0bcb5b50b6..8200ac6b75 100644
--- a/cpus.c
+++ b/cpus.c
@@ -25,6 +25,7 @@
 /* Needed early for CONFIG_BSD etc. */
 #include "qemu/osdep.h"
 #include "qemu-common.h"
+#include "qemu/config-file.h"
 #include "cpu.h"
 #include "monitor/monitor.h"
 #include "qapi/qmp/qerror.h"
@@ -45,6 +46,7 @@
 #include "qemu/main-loop.h"
 #include "qemu/bitmap.h"
 #include "qemu/seqlock.h"
+#include "tcg.h"
 #include "qapi-event.h"
 #include "hw/nmi.h"
 #include "sysemu/replay.h"
@@ -150,6 +152,77 @@ typedef struct TimersState {
 } TimersState;
 
 static TimersState timers_state;
+bool mttcg_enabled;
+
+/*
+ * We default to false if we know other options have been enabled
+ * which are currently incompatible with MTTCG. Otherwise when each
+ * guest (target) has been updated to support:
+ *   - atomic instructions
+ *   - memory ordering primitives (barriers)
+ * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
+ *
+ * Once a guest architecture has been converted to the new primitives
+ * there are two remaining limitations to check.
+ *
+ * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
+ * - The host must have a stronger memory order than the guest
+ *
+ * It may be possible in future to support strong guests on weak hosts
+ * but that will require tagging all load/stores in a guest with their
+ * implicit memory order requirements which would likely slow things
+ * down a lot.
+ */
+
+static bool check_tcg_memory_orders_compatible(void)
+{
+#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
+    return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
+#else
+    return false;
+#endif
+}
+
+static bool default_mttcg_enabled(void)
+{
+    QemuOpts *icount_opts = qemu_find_opts_singleton("icount");
+    const char *rr = qemu_opt_get(icount_opts, "rr");
+
+    if (rr || TCG_OVERSIZED_GUEST) {
+        return false;
+    } else {
+#ifdef TARGET_SUPPORTS_MTTCG
+        return check_tcg_memory_orders_compatible();
+#else
+        return false;
+#endif
+    }
+}
+
+void qemu_tcg_configure(QemuOpts *opts, Error **errp)
+{
+    const char *t = qemu_opt_get(opts, "thread");
+    if (t) {
+        if (strcmp(t, "multi") == 0) {
+            if (TCG_OVERSIZED_GUEST) {
+                error_setg(errp, "No MTTCG when guest word size > hosts");
+            } else {
+                if (!check_tcg_memory_orders_compatible()) {
+                    error_report("Guest expects a stronger memory ordering "
+                                 "than the host provides");
+                    error_printf("This may cause strange/hard to debug errors");
+                }
+                mttcg_enabled = true;
+            }
+        } else if (strcmp(t, "single") == 0) {
+            mttcg_enabled = false;
+        } else {
+            error_setg(errp, "Invalid 'thread' setting %s", t);
+        }
+    } else {
+        mttcg_enabled = default_mttcg_enabled();
+    }
+}
 
 int64_t cpu_get_icount_raw(void)
 {
@@ -695,6 +768,63 @@ void configure_icount(QemuOpts *opts, Error **errp)
 }
 
 /***********************************************************/
+/* TCG vCPU kick timer
+ *
+ * The kick timer is responsible for moving single threaded vCPU
+ * emulation on to the next vCPU. If more than one vCPU is running a
+ * timer event with force a cpu->exit so the next vCPU can get
+ * scheduled.
+ *
+ * The timer is removed if all vCPUs are idle and restarted again once
+ * idleness is complete.
+ */
+
+static QEMUTimer *tcg_kick_vcpu_timer;
+static CPUState *tcg_current_rr_cpu;
+
+#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
+
+static inline int64_t qemu_tcg_next_kick(void)
+{
+    return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
+}
+
+/* Kick the currently round-robin scheduled vCPU */
+static void qemu_cpu_kick_rr_cpu(void)
+{
+    CPUState *cpu;
+    do {
+        cpu = atomic_mb_read(&tcg_current_rr_cpu);
+        if (cpu) {
+            cpu_exit(cpu);
+        }
+    } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
+}
+
+static void kick_tcg_thread(void *opaque)
+{
+    timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
+    qemu_cpu_kick_rr_cpu();
+}
+
+static void start_tcg_kick_timer(void)
+{
+    if (!mttcg_enabled && !tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
+        tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+                                           kick_tcg_thread, NULL);
+        timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
+    }
+}
+
+static void stop_tcg_kick_timer(void)
+{
+    if (tcg_kick_vcpu_timer) {
+        timer_del(tcg_kick_vcpu_timer);
+        tcg_kick_vcpu_timer = NULL;
+    }
+}
+
+/***********************************************************/
 void hw_error(const char *fmt, ...)
 {
     va_list ap;
@@ -896,8 +1026,6 @@ static void qemu_kvm_init_cpu_signals(CPUState *cpu)
 #endif /* _WIN32 */
 
 static QemuMutex qemu_global_mutex;
-static QemuCond qemu_io_proceeded_cond;
-static unsigned iothread_requesting_mutex;
 
 static QemuThread io_thread;
 
@@ -911,7 +1039,6 @@ void qemu_init_cpu_loop(void)
     qemu_init_sigbus();
     qemu_cond_init(&qemu_cpu_cond);
     qemu_cond_init(&qemu_pause_cond);
-    qemu_cond_init(&qemu_io_proceeded_cond);
     qemu_mutex_init(&qemu_global_mutex);
 
     qemu_thread_get_self(&io_thread);
@@ -936,28 +1063,34 @@ static void qemu_tcg_destroy_vcpu(CPUState *cpu)
 
 static void qemu_wait_io_event_common(CPUState *cpu)
 {
+    atomic_mb_set(&cpu->thread_kicked, false);
     if (cpu->stop) {
         cpu->stop = false;
         cpu->stopped = true;
         qemu_cond_broadcast(&qemu_pause_cond);
     }
     process_queued_cpu_work(cpu);
-    cpu->thread_kicked = false;
+}
+
+static bool qemu_tcg_should_sleep(CPUState *cpu)
+{
+    if (mttcg_enabled) {
+        return cpu_thread_is_idle(cpu);
+    } else {
+        return all_cpu_threads_idle();
+    }
 }
 
 static void qemu_tcg_wait_io_event(CPUState *cpu)
 {
-    while (all_cpu_threads_idle()) {
+    while (qemu_tcg_should_sleep(cpu)) {
+        stop_tcg_kick_timer();
         qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
     }
 
-    while (iothread_requesting_mutex) {
-        qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
-    }
+    start_tcg_kick_timer();
 
-    CPU_FOREACH(cpu) {
-        qemu_wait_io_event_common(cpu);
-    }
+    qemu_wait_io_event_common(cpu);
 }
 
 static void qemu_kvm_wait_io_event(CPUState *cpu)
@@ -1028,6 +1161,7 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
     qemu_thread_get_self(cpu->thread);
     cpu->thread_id = qemu_get_thread_id();
     cpu->can_do_io = 1;
+    current_cpu = cpu;
 
     sigemptyset(&waitset);
     sigaddset(&waitset, SIG_IPI);
@@ -1036,9 +1170,7 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
     cpu->created = true;
     qemu_cond_signal(&qemu_cpu_cond);
 
-    current_cpu = cpu;
     while (1) {
-        current_cpu = NULL;
         qemu_mutex_unlock_iothread();
         do {
             int sig;
@@ -1049,7 +1181,6 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
             exit(1);
         }
         qemu_mutex_lock_iothread();
-        current_cpu = cpu;
         qemu_wait_io_event_common(cpu);
     }
 
@@ -1115,9 +1246,11 @@ static int tcg_cpu_exec(CPUState *cpu)
         cpu->icount_decr.u16.low = decr;
         cpu->icount_extra = count;
     }
+    qemu_mutex_unlock_iothread();
     cpu_exec_start(cpu);
     ret = cpu_exec(cpu);
     cpu_exec_end(cpu);
+    qemu_mutex_lock_iothread();
 #ifdef CONFIG_PROFILER
     tcg_time += profile_getclock() - ti;
 #endif
@@ -1150,7 +1283,16 @@ static void deal_with_unplugged_cpus(void)
     }
 }
 
-static void *qemu_tcg_cpu_thread_fn(void *arg)
+/* Single-threaded TCG
+ *
+ * In the single-threaded case each vCPU is simulated in turn. If
+ * there is more than a single vCPU we create a simple timer to kick
+ * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
+ * This is done explicitly rather than relying on side-effects
+ * elsewhere.
+ */
+
+static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
 {
     CPUState *cpu = arg;
 
@@ -1172,15 +1314,18 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
 
         /* process any pending work */
         CPU_FOREACH(cpu) {
+            current_cpu = cpu;
             qemu_wait_io_event_common(cpu);
         }
     }
 
-    /* process any pending work */
-    atomic_mb_set(&exit_request, 1);
+    start_tcg_kick_timer();
 
     cpu = first_cpu;
 
+    /* process any pending work */
+    cpu->exit_request = 1;
+
     while (1) {
         /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
         qemu_account_warp_timer();
@@ -1189,7 +1334,10 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
             cpu = first_cpu;
         }
 
-        for (; cpu != NULL && !exit_request; cpu = CPU_NEXT(cpu)) {
+        while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
+
+            atomic_mb_set(&tcg_current_rr_cpu, cpu);
+            current_cpu = cpu;
 
             qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
                               (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
@@ -1200,22 +1348,32 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
                 if (r == EXCP_DEBUG) {
                     cpu_handle_guest_debug(cpu);
                     break;
+                } else if (r == EXCP_ATOMIC) {
+                    qemu_mutex_unlock_iothread();
+                    cpu_exec_step_atomic(cpu);
+                    qemu_mutex_lock_iothread();
+                    break;
                 }
-            } else if (cpu->stop || cpu->stopped) {
+            } else if (cpu->stop) {
                 if (cpu->unplug) {
                     cpu = CPU_NEXT(cpu);
                 }
                 break;
             }
 
-        } /* for cpu.. */
+            cpu = CPU_NEXT(cpu);
+        } /* while (cpu && !cpu->exit_request).. */
+
+        /* Does not need atomic_mb_set because a spurious wakeup is okay.  */
+        atomic_set(&tcg_current_rr_cpu, NULL);
 
-        /* Pairs with smp_wmb in qemu_cpu_kick.  */
-        atomic_mb_set(&exit_request, 0);
+        if (cpu && cpu->exit_request) {
+            atomic_mb_set(&cpu->exit_request, 0);
+        }
 
         handle_icount_deadline();
 
-        qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
+        qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
         deal_with_unplugged_cpus();
     }
 
@@ -1262,6 +1420,68 @@ static void CALLBACK dummy_apc_func(ULONG_PTR unused)
 }
 #endif
 
+/* Multi-threaded TCG
+ *
+ * In the multi-threaded case each vCPU has its own thread. The TLS
+ * variable current_cpu can be used deep in the code to find the
+ * current CPUState for a given thread.
+ */
+
+static void *qemu_tcg_cpu_thread_fn(void *arg)
+{
+    CPUState *cpu = arg;
+
+    rcu_register_thread();
+
+    qemu_mutex_lock_iothread();
+    qemu_thread_get_self(cpu->thread);
+
+    cpu->thread_id = qemu_get_thread_id();
+    cpu->created = true;
+    cpu->can_do_io = 1;
+    current_cpu = cpu;
+    qemu_cond_signal(&qemu_cpu_cond);
+
+    /* process any pending work */
+    cpu->exit_request = 1;
+
+    while (1) {
+        if (cpu_can_run(cpu)) {
+            int r;
+            r = tcg_cpu_exec(cpu);
+            switch (r) {
+            case EXCP_DEBUG:
+                cpu_handle_guest_debug(cpu);
+                break;
+            case EXCP_HALTED:
+                /* during start-up the vCPU is reset and the thread is
+                 * kicked several times. If we don't ensure we go back
+                 * to sleep in the halted state we won't cleanly
+                 * start-up when the vCPU is enabled.
+                 *
+                 * cpu->halted should ensure we sleep in wait_io_event
+                 */
+                g_assert(cpu->halted);
+                break;
+            case EXCP_ATOMIC:
+                qemu_mutex_unlock_iothread();
+                cpu_exec_step_atomic(cpu);
+                qemu_mutex_lock_iothread();
+            default:
+                /* Ignore everything else? */
+                break;
+            }
+        }
+
+        handle_icount_deadline();
+
+        atomic_mb_set(&cpu->exit_request, 0);
+        qemu_tcg_wait_io_event(cpu);
+    }
+
+    return NULL;
+}
+
 static void qemu_cpu_kick_thread(CPUState *cpu)
 {
 #ifndef _WIN32
@@ -1287,24 +1507,13 @@ static void qemu_cpu_kick_thread(CPUState *cpu)
 #endif
 }
 
-static void qemu_cpu_kick_no_halt(void)
-{
-    CPUState *cpu;
-    /* Ensure whatever caused the exit has reached the CPU threads before
-     * writing exit_request.
-     */
-    atomic_mb_set(&exit_request, 1);
-    cpu = atomic_mb_read(&tcg_current_cpu);
-    if (cpu) {
-        cpu_exit(cpu);
-    }
-}
-
 void qemu_cpu_kick(CPUState *cpu)
 {
     qemu_cond_broadcast(cpu->halt_cond);
     if (tcg_enabled()) {
-        qemu_cpu_kick_no_halt();
+        cpu_exit(cpu);
+        /* NOP unless doing single-thread RR */
+        qemu_cpu_kick_rr_cpu();
     } else {
         if (hax_enabled()) {
             /*
@@ -1342,27 +1551,14 @@ bool qemu_mutex_iothread_locked(void)
 
 void qemu_mutex_lock_iothread(void)
 {
-    atomic_inc(&iothread_requesting_mutex);
-    /* In the simple case there is no need to bump the VCPU thread out of
-     * TCG code execution.
-     */
-    if (!tcg_enabled() || qemu_in_vcpu_thread() ||
-        !first_cpu || !first_cpu->created) {
-        qemu_mutex_lock(&qemu_global_mutex);
-        atomic_dec(&iothread_requesting_mutex);
-    } else {
-        if (qemu_mutex_trylock(&qemu_global_mutex)) {
-            qemu_cpu_kick_no_halt();
-            qemu_mutex_lock(&qemu_global_mutex);
-        }
-        atomic_dec(&iothread_requesting_mutex);
-        qemu_cond_broadcast(&qemu_io_proceeded_cond);
-    }
+    g_assert(!qemu_mutex_iothread_locked());
+    qemu_mutex_lock(&qemu_global_mutex);
     iothread_locked = true;
 }
 
 void qemu_mutex_unlock_iothread(void)
 {
+    g_assert(qemu_mutex_iothread_locked());
     iothread_locked = false;
     qemu_mutex_unlock(&qemu_global_mutex);
 }
@@ -1392,13 +1588,6 @@ void pause_all_vcpus(void)
 
     if (qemu_in_vcpu_thread()) {
         cpu_stop_current();
-        if (!kvm_enabled()) {
-            CPU_FOREACH(cpu) {
-                cpu->stop = false;
-                cpu->stopped = true;
-            }
-            return;
-        }
     }
 
     while (!all_vcpus_paused()) {
@@ -1447,29 +1636,43 @@ void cpu_remove_sync(CPUState *cpu)
 static void qemu_tcg_init_vcpu(CPUState *cpu)
 {
     char thread_name[VCPU_THREAD_NAME_SIZE];
-    static QemuCond *tcg_halt_cond;
-    static QemuThread *tcg_cpu_thread;
+    static QemuCond *single_tcg_halt_cond;
+    static QemuThread *single_tcg_cpu_thread;
 
-    /* share a single thread for all cpus with TCG */
-    if (!tcg_cpu_thread) {
+    if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
         cpu->thread = g_malloc0(sizeof(QemuThread));
         cpu->halt_cond = g_malloc0(sizeof(QemuCond));
         qemu_cond_init(cpu->halt_cond);
-        tcg_halt_cond = cpu->halt_cond;
-        snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
+
+        if (qemu_tcg_mttcg_enabled()) {
+            /* create a thread per vCPU with TCG (MTTCG) */
+            parallel_cpus = true;
+            snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
                  cpu->cpu_index);
-        qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
-                           cpu, QEMU_THREAD_JOINABLE);
+
+            qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
+                               cpu, QEMU_THREAD_JOINABLE);
+
+        } else {
+            /* share a single thread for all cpus with TCG */
+            snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
+            qemu_thread_create(cpu->thread, thread_name,
+                               qemu_tcg_rr_cpu_thread_fn,
+                               cpu, QEMU_THREAD_JOINABLE);
+
+            single_tcg_halt_cond = cpu->halt_cond;
+            single_tcg_cpu_thread = cpu->thread;
+        }
 #ifdef _WIN32
         cpu->hThread = qemu_thread_get_handle(cpu->thread);
 #endif
         while (!cpu->created) {
             qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
         }
-        tcg_cpu_thread = cpu->thread;
     } else {
-        cpu->thread = tcg_cpu_thread;
-        cpu->halt_cond = tcg_halt_cond;
+        /* For non-MTTCG cases we share the thread */
+        cpu->thread = single_tcg_cpu_thread;
+        cpu->halt_cond = single_tcg_halt_cond;
     }
 }
 
diff --git a/cputlb.c b/cputlb.c
index 6c39927455..f5d056cc08 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -18,6 +18,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/main-loop.h"
 #include "cpu.h"
 #include "exec/exec-all.h"
 #include "exec/memory.h"
@@ -57,6 +58,40 @@
     } \
 } while (0)
 
+#define assert_cpu_is_self(this_cpu) do {                         \
+        if (DEBUG_TLB_GATE) {                                     \
+            g_assert(!cpu->created || qemu_cpu_is_self(cpu));     \
+        }                                                         \
+    } while (0)
+
+/* run_on_cpu_data.target_ptr should always be big enough for a
+ * target_ulong even on 32 bit builds */
+QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
+
+/* We currently can't handle more than 16 bits in the MMUIDX bitmask.
+ */
+QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
+#define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
+
+/* flush_all_helper: run fn across all cpus
+ *
+ * If the wait flag is set then the src cpu's helper will be queued as
+ * "safe" work and the loop exited creating a synchronisation point
+ * where all queued work will be finished before execution starts
+ * again.
+ */
+static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
+                             run_on_cpu_data d)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        if (cpu != src) {
+            async_run_on_cpu(cpu, fn, d);
+        }
+    }
+}
+
 /* statistics */
 int tlb_flush_count;
 
@@ -65,10 +100,22 @@ int tlb_flush_count;
  * flushing more entries than required is only an efficiency issue,
  * not a correctness issue.
  */
-void tlb_flush(CPUState *cpu)
+static void tlb_flush_nocheck(CPUState *cpu)
 {
     CPUArchState *env = cpu->env_ptr;
 
+    /* The QOM tests will trigger tlb_flushes without setting up TCG
+     * so we bug out here in that case.
+     */
+    if (!tcg_enabled()) {
+        return;
+    }
+
+    assert_cpu_is_self(cpu);
+    tlb_debug("(count: %d)\n", tlb_flush_count++);
+
+    tb_lock();
+
     memset(env->tlb_table, -1, sizeof(env->tlb_table));
     memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table));
     memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
@@ -76,39 +123,117 @@ void tlb_flush(CPUState *cpu)
     env->vtlb_index = 0;
     env->tlb_flush_addr = -1;
     env->tlb_flush_mask = 0;
-    tlb_flush_count++;
+
+    tb_unlock();
+
+    atomic_mb_set(&cpu->pending_tlb_flush, 0);
 }
 
-static inline void v_tlb_flush_by_mmuidx(CPUState *cpu, va_list argp)
+static void tlb_flush_global_async_work(CPUState *cpu, run_on_cpu_data data)
+{
+    tlb_flush_nocheck(cpu);
+}
+
+void tlb_flush(CPUState *cpu)
+{
+    if (cpu->created && !qemu_cpu_is_self(cpu)) {
+        if (atomic_mb_read(&cpu->pending_tlb_flush) != ALL_MMUIDX_BITS) {
+            atomic_mb_set(&cpu->pending_tlb_flush, ALL_MMUIDX_BITS);
+            async_run_on_cpu(cpu, tlb_flush_global_async_work,
+                             RUN_ON_CPU_NULL);
+        }
+    } else {
+        tlb_flush_nocheck(cpu);
+    }
+}
+
+void tlb_flush_all_cpus(CPUState *src_cpu)
+{
+    const run_on_cpu_func fn = tlb_flush_global_async_work;
+    flush_all_helper(src_cpu, fn, RUN_ON_CPU_NULL);
+    fn(src_cpu, RUN_ON_CPU_NULL);
+}
+
+void tlb_flush_all_cpus_synced(CPUState *src_cpu)
+{
+    const run_on_cpu_func fn = tlb_flush_global_async_work;
+    flush_all_helper(src_cpu, fn, RUN_ON_CPU_NULL);
+    async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_NULL);
+}
+
+static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
 {
     CPUArchState *env = cpu->env_ptr;
+    unsigned long mmu_idx_bitmask = data.host_int;
+    int mmu_idx;
 
-    tlb_debug("start\n");
+    assert_cpu_is_self(cpu);
 
-    for (;;) {
-        int mmu_idx = va_arg(argp, int);
+    tb_lock();
 
-        if (mmu_idx < 0) {
-            break;
-        }
+    tlb_debug("start: mmu_idx:0x%04lx\n", mmu_idx_bitmask);
 
-        tlb_debug("%d\n", mmu_idx);
+    for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
 
-        memset(env->tlb_table[mmu_idx], -1, sizeof(env->tlb_table[0]));
-        memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0]));
+        if (test_bit(mmu_idx, &mmu_idx_bitmask)) {
+            tlb_debug("%d\n", mmu_idx);
+
+            memset(env->tlb_table[mmu_idx], -1, sizeof(env->tlb_table[0]));
+            memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0]));
+        }
     }
 
     memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
+
+    tlb_debug("done\n");
+
+    tb_unlock();
 }
 
-void tlb_flush_by_mmuidx(CPUState *cpu, ...)
+void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
 {
-    va_list argp;
-    va_start(argp, cpu);
-    v_tlb_flush_by_mmuidx(cpu, argp);
-    va_end(argp);
+    tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
+
+    if (!qemu_cpu_is_self(cpu)) {
+        uint16_t pending_flushes = idxmap;
+        pending_flushes &= ~atomic_mb_read(&cpu->pending_tlb_flush);
+
+        if (pending_flushes) {
+            tlb_debug("reduced mmu_idx: 0x%" PRIx16 "\n", pending_flushes);
+
+            atomic_or(&cpu->pending_tlb_flush, pending_flushes);
+            async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
+                             RUN_ON_CPU_HOST_INT(pending_flushes));
+        }
+    } else {
+        tlb_flush_by_mmuidx_async_work(cpu,
+                                       RUN_ON_CPU_HOST_INT(idxmap));
+    }
+}
+
+void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
+{
+    const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
+
+    tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
+
+    flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
+    fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
+}
+
+void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
+                                                       uint16_t idxmap)
+{
+    const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
+
+    tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
+
+    flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
+    async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
 }
 
+
+
 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
 {
     if (addr == (tlb_entry->addr_read &
@@ -121,12 +246,15 @@ static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
     }
 }
 
-void tlb_flush_page(CPUState *cpu, target_ulong addr)
+static void tlb_flush_page_async_work(CPUState *cpu, run_on_cpu_data data)
 {
     CPUArchState *env = cpu->env_ptr;
+    target_ulong addr = (target_ulong) data.target_ptr;
     int i;
     int mmu_idx;
 
+    assert_cpu_is_self(cpu);
+
     tlb_debug("page :" TARGET_FMT_lx "\n", addr);
 
     /* Check if we need to flush due to large pages.  */
@@ -156,15 +284,62 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
     tb_flush_jmp_cache(cpu, addr);
 }
 
-void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...)
+void tlb_flush_page(CPUState *cpu, target_ulong addr)
+{
+    tlb_debug("page :" TARGET_FMT_lx "\n", addr);
+
+    if (!qemu_cpu_is_self(cpu)) {
+        async_run_on_cpu(cpu, tlb_flush_page_async_work,
+                         RUN_ON_CPU_TARGET_PTR(addr));
+    } else {
+        tlb_flush_page_async_work(cpu, RUN_ON_CPU_TARGET_PTR(addr));
+    }
+}
+
+/* As we are going to hijack the bottom bits of the page address for a
+ * mmuidx bit mask we need to fail to build if we can't do that
+ */
+QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN);
+
+static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
+                                                run_on_cpu_data data)
 {
     CPUArchState *env = cpu->env_ptr;
-    int i, k;
-    va_list argp;
+    target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
+    target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
+    unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
+    int page = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    int mmu_idx;
+    int i;
+
+    assert_cpu_is_self(cpu);
+
+    tlb_debug("page:%d addr:"TARGET_FMT_lx" mmu_idx:0x%lx\n",
+              page, addr, mmu_idx_bitmap);
 
-    va_start(argp, addr);
+    for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
+        if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
+            tlb_flush_entry(&env->tlb_table[mmu_idx][page], addr);
+
+            /* check whether there are vltb entries that need to be flushed */
+            for (i = 0; i < CPU_VTLB_SIZE; i++) {
+                tlb_flush_entry(&env->tlb_v_table[mmu_idx][i], addr);
+            }
+        }
+    }
 
-    tlb_debug("addr "TARGET_FMT_lx"\n", addr);
+    tb_flush_jmp_cache(cpu, addr);
+}
+
+static void tlb_check_page_and_flush_by_mmuidx_async_work(CPUState *cpu,
+                                                          run_on_cpu_data data)
+{
+    CPUArchState *env = cpu->env_ptr;
+    target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
+    target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
+    unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
+
+    tlb_debug("addr:"TARGET_FMT_lx" mmu_idx: %04lx\n", addr, mmu_idx_bitmap);
 
     /* Check if we need to flush due to large pages.  */
     if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
@@ -172,33 +347,80 @@ void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...)
                   TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
                   env->tlb_flush_addr, env->tlb_flush_mask);
 
-        v_tlb_flush_by_mmuidx(cpu, argp);
-        va_end(argp);
-        return;
+        tlb_flush_by_mmuidx_async_work(cpu,
+                                       RUN_ON_CPU_HOST_INT(mmu_idx_bitmap));
+    } else {
+        tlb_flush_page_by_mmuidx_async_work(cpu, data);
     }
+}
 
-    addr &= TARGET_PAGE_MASK;
-    i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
+{
+    target_ulong addr_and_mmu_idx;
 
-    for (;;) {
-        int mmu_idx = va_arg(argp, int);
+    tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
 
-        if (mmu_idx < 0) {
-            break;
-        }
+    /* This should already be page aligned */
+    addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
+    addr_and_mmu_idx |= idxmap;
 
-        tlb_debug("idx %d\n", mmu_idx);
+    if (!qemu_cpu_is_self(cpu)) {
+        async_run_on_cpu(cpu, tlb_check_page_and_flush_by_mmuidx_async_work,
+                         RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
+    } else {
+        tlb_check_page_and_flush_by_mmuidx_async_work(
+            cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
+    }
+}
 
-        tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
+void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
+                                       uint16_t idxmap)
+{
+    const run_on_cpu_func fn = tlb_check_page_and_flush_by_mmuidx_async_work;
+    target_ulong addr_and_mmu_idx;
 
-        /* check whether there are vltb entries that need to be flushed */
-        for (k = 0; k < CPU_VTLB_SIZE; k++) {
-            tlb_flush_entry(&env->tlb_v_table[mmu_idx][k], addr);
-        }
-    }
-    va_end(argp);
+    tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
 
-    tb_flush_jmp_cache(cpu, addr);
+    /* This should already be page aligned */
+    addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
+    addr_and_mmu_idx |= idxmap;
+
+    flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
+    fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
+}
+
+void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
+                                                            target_ulong addr,
+                                                            uint16_t idxmap)
+{
+    const run_on_cpu_func fn = tlb_check_page_and_flush_by_mmuidx_async_work;
+    target_ulong addr_and_mmu_idx;
+
+    tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
+
+    /* This should already be page aligned */
+    addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
+    addr_and_mmu_idx |= idxmap;
+
+    flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
+    async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
+}
+
+void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
+{
+    const run_on_cpu_func fn = tlb_flush_page_async_work;
+
+    flush_all_helper(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
+    fn(src, RUN_ON_CPU_TARGET_PTR(addr));
+}
+
+void tlb_flush_page_all_cpus_synced(CPUState *src,
+                                                  target_ulong addr)
+{
+    const run_on_cpu_func fn = tlb_flush_page_async_work;
+
+    flush_all_helper(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
+    async_safe_run_on_cpu(src, fn, RUN_ON_CPU_TARGET_PTR(addr));
 }
 
 /* update the TLBs so that writes to code in the virtual page 'addr'
@@ -216,36 +438,84 @@ void tlb_unprotect_code(ram_addr_t ram_addr)
     cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
 }
 
-static bool tlb_is_dirty_ram(CPUTLBEntry *tlbe)
-{
-    return (tlbe->addr_write & (TLB_INVALID_MASK|TLB_MMIO|TLB_NOTDIRTY)) == 0;
-}
 
-void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
+/*
+ * Dirty write flag handling
+ *
+ * When the TCG code writes to a location it looks up the address in
+ * the TLB and uses that data to compute the final address. If any of
+ * the lower bits of the address are set then the slow path is forced.
+ * There are a number of reasons to do this but for normal RAM the
+ * most usual is detecting writes to code regions which may invalidate
+ * generated code.
+ *
+ * Because we want other vCPUs to respond to changes straight away we
+ * update the te->addr_write field atomically. If the TLB entry has
+ * been changed by the vCPU in the mean time we skip the update.
+ *
+ * As this function uses atomic accesses we also need to ensure
+ * updates to tlb_entries follow the same access rules. We don't need
+ * to worry about this for oversized guests as MTTCG is disabled for
+ * them.
+ */
+
+static void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
                            uintptr_t length)
 {
-    uintptr_t addr;
+#if TCG_OVERSIZED_GUEST
+    uintptr_t addr = tlb_entry->addr_write;
 
-    if (tlb_is_dirty_ram(tlb_entry)) {
-        addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
+    if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) {
+        addr &= TARGET_PAGE_MASK;
+        addr += tlb_entry->addend;
         if ((addr - start) < length) {
             tlb_entry->addr_write |= TLB_NOTDIRTY;
         }
     }
+#else
+    /* paired with atomic_mb_set in tlb_set_page_with_attrs */
+    uintptr_t orig_addr = atomic_mb_read(&tlb_entry->addr_write);
+    uintptr_t addr = orig_addr;
+
+    if ((addr & (TLB_INVALID_MASK | TLB_MMIO | TLB_NOTDIRTY)) == 0) {
+        addr &= TARGET_PAGE_MASK;
+        addr += atomic_read(&tlb_entry->addend);
+        if ((addr - start) < length) {
+            uintptr_t notdirty_addr = orig_addr | TLB_NOTDIRTY;
+            atomic_cmpxchg(&tlb_entry->addr_write, orig_addr, notdirty_addr);
+        }
+    }
+#endif
 }
 
-static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
+/* For atomic correctness when running MTTCG we need to use the right
+ * primitives when copying entries */
+static inline void copy_tlb_helper(CPUTLBEntry *d, CPUTLBEntry *s,
+                                   bool atomic_set)
 {
-    ram_addr_t ram_addr;
-
-    ram_addr = qemu_ram_addr_from_host(ptr);
-    if (ram_addr == RAM_ADDR_INVALID) {
-        fprintf(stderr, "Bad ram pointer %p\n", ptr);
-        abort();
+#if TCG_OVERSIZED_GUEST
+    *d = *s;
+#else
+    if (atomic_set) {
+        d->addr_read = s->addr_read;
+        d->addr_code = s->addr_code;
+        atomic_set(&d->addend, atomic_read(&s->addend));
+        /* Pairs with flag setting in tlb_reset_dirty_range */
+        atomic_mb_set(&d->addr_write, atomic_read(&s->addr_write));
+    } else {
+        d->addr_read = s->addr_read;
+        d->addr_write = atomic_read(&s->addr_write);
+        d->addr_code = s->addr_code;
+        d->addend = atomic_read(&s->addend);
     }
-    return ram_addr;
+#endif
 }
 
+/* This is a cross vCPU call (i.e. another vCPU resetting the flags of
+ * the target vCPU). As such care needs to be taken that we don't
+ * dangerously race with another vCPU update. The only thing actually
+ * updated is the target TLB entry ->addr_write flags.
+ */
 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
 {
     CPUArchState *env;
@@ -283,6 +553,8 @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
     int i;
     int mmu_idx;
 
+    assert_cpu_is_self(cpu);
+
     vaddr &= TARGET_PAGE_MASK;
     i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
     for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
@@ -337,11 +609,12 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
     target_ulong address;
     target_ulong code_address;
     uintptr_t addend;
-    CPUTLBEntry *te;
+    CPUTLBEntry *te, *tv, tn;
     hwaddr iotlb, xlat, sz;
     unsigned vidx = env->vtlb_index++ % CPU_VTLB_SIZE;
     int asidx = cpu_asidx_from_attrs(cpu, attrs);
 
+    assert_cpu_is_self(cpu);
     assert(size >= TARGET_PAGE_SIZE);
     if (size != TARGET_PAGE_SIZE) {
         tlb_add_large_page(env, vaddr, size);
@@ -371,41 +644,50 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
 
     index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
     te = &env->tlb_table[mmu_idx][index];
-
     /* do not discard the translation in te, evict it into a victim tlb */
-    env->tlb_v_table[mmu_idx][vidx] = *te;
+    tv = &env->tlb_v_table[mmu_idx][vidx];
+
+    /* addr_write can race with tlb_reset_dirty_range */
+    copy_tlb_helper(tv, te, true);
+
     env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index];
 
     /* refill the tlb */
     env->iotlb[mmu_idx][index].addr = iotlb - vaddr;
     env->iotlb[mmu_idx][index].attrs = attrs;
-    te->addend = addend - vaddr;
+
+    /* Now calculate the new entry */
+    tn.addend = addend - vaddr;
     if (prot & PAGE_READ) {
-        te->addr_read = address;
+        tn.addr_read = address;
     } else {
-        te->addr_read = -1;
+        tn.addr_read = -1;
     }
 
     if (prot & PAGE_EXEC) {
-        te->addr_code = code_address;
+        tn.addr_code = code_address;
     } else {
-        te->addr_code = -1;
+        tn.addr_code = -1;
     }
+
+    tn.addr_write = -1;
     if (prot & PAGE_WRITE) {
         if ((memory_region_is_ram(section->mr) && section->readonly)
             || memory_region_is_romd(section->mr)) {
             /* Write access calls the I/O callback.  */
-            te->addr_write = address | TLB_MMIO;
+            tn.addr_write = address | TLB_MMIO;
         } else if (memory_region_is_ram(section->mr)
                    && cpu_physical_memory_is_clean(
                         memory_region_get_ram_addr(section->mr) + xlat)) {
-            te->addr_write = address | TLB_NOTDIRTY;
+            tn.addr_write = address | TLB_NOTDIRTY;
         } else {
-            te->addr_write = address;
+            tn.addr_write = address;
         }
-    } else {
-        te->addr_write = -1;
     }
+
+    /* Pairs with flag setting in tlb_reset_dirty_range */
+    copy_tlb_helper(te, &tn, true);
+    /* atomic_mb_set(&te->addr_write, write_address); */
 }
 
 /* Add a new TLB entry, but without specifying the memory
@@ -452,6 +734,18 @@ static void report_bad_exec(CPUState *cpu, target_ulong addr)
     log_cpu_state_mask(LOG_GUEST_ERROR, cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
 }
 
+static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
+{
+    ram_addr_t ram_addr;
+
+    ram_addr = qemu_ram_addr_from_host(ptr);
+    if (ram_addr == RAM_ADDR_INVALID) {
+        error_report("Bad ram pointer %p", ptr);
+        abort();
+    }
+    return ram_addr;
+}
+
 /* NOTE: this function can trigger an exception */
 /* NOTE2: the returned address is not exactly the physical address: it
  * is actually a ram_addr_t (in system mode; the user mode emulation
@@ -475,14 +769,13 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
     pd = iotlbentry->addr & ~TARGET_PAGE_MASK;
     mr = iotlb_to_region(cpu, pd, iotlbentry->attrs);
     if (memory_region_is_unassigned(mr)) {
-        CPUClass *cc = CPU_GET_CLASS(cpu);
-
-        if (cc->do_unassigned_access) {
-            cc->do_unassigned_access(cpu, addr, false, true, 0, 4);
-        } else {
-            report_bad_exec(cpu, addr);
-            exit(1);
-        }
+        cpu_unassigned_access(cpu, addr, false, true, 0, 4);
+        /* The CPU's unassigned access hook might have longjumped out
+         * with an exception. If it didn't (or there was no hook) then
+         * we can't proceed further.
+         */
+        report_bad_exec(cpu, addr);
+        exit(1);
     }
     p = (void *)((uintptr_t)addr + env1->tlb_table[mmu_idx][page_index].addend);
     return qemu_ram_addr_from_host_nofail(p);
@@ -495,6 +788,7 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
     hwaddr physaddr = iotlbentry->addr;
     MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
     uint64_t val;
+    bool locked = false;
 
     physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
     cpu->mem_io_pc = retaddr;
@@ -503,7 +797,16 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
     }
 
     cpu->mem_io_vaddr = addr;
+
+    if (mr->global_locking) {
+        qemu_mutex_lock_iothread();
+        locked = true;
+    }
     memory_region_dispatch_read(mr, physaddr, &val, size, iotlbentry->attrs);
+    if (locked) {
+        qemu_mutex_unlock_iothread();
+    }
+
     return val;
 }
 
@@ -514,15 +817,23 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
     CPUState *cpu = ENV_GET_CPU(env);
     hwaddr physaddr = iotlbentry->addr;
     MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
+    bool locked = false;
 
     physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
     if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
         cpu_io_recompile(cpu, retaddr);
     }
-
     cpu->mem_io_vaddr = addr;
     cpu->mem_io_pc = retaddr;
+
+    if (mr->global_locking) {
+        qemu_mutex_lock_iothread();
+        locked = true;
+    }
     memory_region_dispatch_write(mr, physaddr, val, size, iotlbentry->attrs);
+    if (locked) {
+        qemu_mutex_unlock_iothread();
+    }
 }
 
 /* Return true if ADDR is present in the victim tlb, and has been copied
@@ -538,10 +849,13 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
         if (cmp == page) {
             /* Found entry in victim tlb, swap tlb and iotlb.  */
             CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index];
+
+            copy_tlb_helper(&tmptlb, tlb, false);
+            copy_tlb_helper(tlb, vtlb, true);
+            copy_tlb_helper(vtlb, &tmptlb, true);
+
             CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index];
             CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx];
-
-            tmptlb = *tlb; *tlb = *vtlb; *vtlb = tmptlb;
             tmpio = *io; *io = *vio; *vio = tmpio;
             return true;
         }
diff --git a/crypto/cipher.c b/crypto/cipher.c
index 9ecaff702b..5a9648942f 100644
--- a/crypto/cipher.c
+++ b/crypto/cipher.c
@@ -63,18 +63,14 @@ static bool mode_need_iv[QCRYPTO_CIPHER_MODE__MAX] = {
 
 size_t qcrypto_cipher_get_block_len(QCryptoCipherAlgorithm alg)
 {
-    if (alg >= G_N_ELEMENTS(alg_key_len)) {
-        return 0;
-    }
+    assert(alg < G_N_ELEMENTS(alg_key_len));
     return alg_block_len[alg];
 }
 
 
 size_t qcrypto_cipher_get_key_len(QCryptoCipherAlgorithm alg)
 {
-    if (alg >= G_N_ELEMENTS(alg_key_len)) {
-        return 0;
-    }
+    assert(alg < G_N_ELEMENTS(alg_key_len));
     return alg_key_len[alg];
 }
 
diff --git a/crypto/ivgen-essiv.c b/crypto/ivgen-essiv.c
index 634de63338..cba20bde6c 100644
--- a/crypto/ivgen-essiv.c
+++ b/crypto/ivgen-essiv.c
@@ -48,6 +48,7 @@ static int qcrypto_ivgen_essiv_init(QCryptoIVGen *ivgen,
                            &salt, &nhash,
                            errp) < 0) {
         g_free(essiv);
+        g_free(salt);
         return -1;
     }
 
diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak
index fdf40893aa..1e3bd2b8ca 100644
--- a/default-configs/arm-softmmu.mak
+++ b/default-configs/arm-softmmu.mak
@@ -42,6 +42,8 @@ CONFIG_ARM11MPCORE=y
 CONFIG_A9MPCORE=y
 CONFIG_A15MPCORE=y
 
+CONFIG_ARM_V7M=y
+
 CONFIG_ARM_GIC=y
 CONFIG_ARM_GIC_KVM=$(CONFIG_KVM)
 CONFIG_ARM_TIMER=y
diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak
index 48b07a4c91..029e95202a 100644
--- a/default-configs/i386-softmmu.mak
+++ b/default-configs/i386-softmmu.mak
@@ -59,3 +59,4 @@ CONFIG_I82801B11=y
 CONFIG_SMBIOS=y
 CONFIG_HYPERV_TESTDEV=$(CONFIG_KVM)
 CONFIG_PXB=y
+CONFIG_ACPI_VMGENID=y
diff --git a/default-configs/mips64el-softmmu.mak b/default-configs/mips64el-softmmu.mak
index 485e218cfc..c2ae313f47 100644
--- a/default-configs/mips64el-softmmu.mak
+++ b/default-configs/mips64el-softmmu.mak
@@ -10,3 +10,6 @@ CONFIG_JAZZ=y
 CONFIG_G364FB=y
 CONFIG_JAZZ_LED=y
 CONFIG_VT82C686=y
+CONFIG_MIPS_BOSTON=y
+CONFIG_FITLOADER=y
+CONFIG_PCI_XILINX=y
diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak
index fd96345f3c..d1d7432f74 100644
--- a/default-configs/x86_64-softmmu.mak
+++ b/default-configs/x86_64-softmmu.mak
@@ -59,3 +59,4 @@ CONFIG_I82801B11=y
 CONFIG_SMBIOS=y
 CONFIG_HYPERV_TESTDEV=$(CONFIG_KVM)
 CONFIG_PXB=y
+CONFIG_ACPI_VMGENID=y
diff --git a/docs/mach-virt-graphical.cfg b/docs/mach-virt-graphical.cfg
new file mode 100644
index 0000000000..0fdf6846dd
--- /dev/null
+++ b/docs/mach-virt-graphical.cfg
@@ -0,0 +1,281 @@
+# mach-virt - VirtIO guest (graphical console)
+# =========================================================
+#
+# Usage:
+#
+#   $ qemu-system-aarch64 \
+#     -nodefaults \
+#     -readconfig mach-virt-graphical.cfg \
+#     -cpu host
+#
+# You will probably need to tweak the lines marked as
+# CHANGE ME before being able to use this configuration!
+#
+# The guest will have a selection of VirtIO devices
+# tailored towards optimal performance with modern guests,
+# and will be accessed through a graphical console.
+#
+# ---------------------------------------------------------
+#
+# Using -nodefaults is required to have full control over
+# the virtual hardware: when it's specified, QEMU will
+# populate the board with only the builtin peripherals,
+# such as the PL011 UART, plus a PCI Express Root Bus; the
+# user will then have to explicitly add further devices.
+#
+# The PCI Express Root Bus shows up in the guest as:
+#
+#   00:00.0 Host bridge
+#
+# This configuration file adds a number of other useful
+# devices, more specifically:
+#
+#   00:01.0 Display controller
+#   00.1c.* PCI bridge (PCI Express Root Ports)
+#   01:00.0 SCSI storage controller
+#   02:00.0 Ethernet controller
+#   03:00.0 USB controller
+#
+# More information about these devices is available below.
+
+
+# Machine options
+# =========================================================
+#
+# We use the virt machine type and enable KVM acceleration
+# for better performance.
+#
+# Using less than 1 GiB of memory is probably not going to
+# yield good performance in the guest, and might even lead
+# to obscure boot issues in some cases.
+#
+# Unfortunately, there is no way to configure the CPU model
+# in this file, so it will have to be provided on the
+# command line, but we can configure the guest to use the
+# same GIC version as the host.
+
+[machine]
+  type = "virt"
+  accel = "kvm"
+  gic-version = "host"
+
+[memory]
+  size = "1024"
+
+
+# Firmware configuration
+# =========================================================
+#
+# There are two parts to the firmware: a read-only image
+# containing the executable code, which is shared between
+# guests, and a read/write variable store that is owned
+# by one specific guest, exclusively, and is used to
+# record information such as the UEFI boot order.
+#
+# For any new guest, its permanent, private variable store
+# should initially be copied from the template file
+# provided along with the firmware binary.
+#
+# Depending on the OS distribution you're using on the
+# host, the name of the package containing the firmware
+# binary and variable store template, as well as the paths
+# to the files themselves, will be different. For example:
+#
+# Fedora
+#   edk2-aarch64                                      (pkg)
+#   /usr/share/edk2/aarch64/QEMU_EFI-pflash.raw       (bin)
+#   /usr/share/edk2/aarch64/vars-template-pflash.raw  (var)
+#
+# RHEL
+#   AAVMF                                             (pkg)
+#   /usr/share/AAVMF/AAVMF_CODE.fd                    (bin)
+#   /usr/share/AAVMF/AAVMF_VARS.fd                    (var)
+#
+# Debian/Ubuntu
+#   qemu-efi                                          (pkg)
+#   /usr/share/AAVMF/AAVMF_CODE.fd                    (bin)
+#   /usr/share/AAVMF/AAVMF_VARS.fd                    (var)
+
+[drive "uefi-binary"]
+  file = "/usr/share/AAVMF/AAVMF_CODE.fd"       # CHANGE ME
+  format = "raw"
+  if = "pflash"
+  unit = "0"
+  readonly = "on"
+
+[drive "uefi-varstore"]
+  file = "guest_VARS.fd"                        # CHANGE ME
+  format = "raw"
+  if = "pflash"
+  unit = "1"
+
+
+# PCI bridge (PCI Express Root Ports)
+# =========================================================
+#
+# We create eight PCI Express Root Ports, and we plug them
+# all into separate functions of the same slot. Some of
+# them will be used by devices, the rest will remain
+# available for hotplug.
+
+[device "pcie.1"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.0"
+  port = "1"
+  chassis = "1"
+  multifunction = "on"
+
+[device "pcie.2"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.1"
+  port = "2"
+  chassis = "2"
+
+[device "pcie.3"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.2"
+  port = "3"
+  chassis = "3"
+
+[device "pcie.4"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.3"
+  port = "4"
+  chassis = "4"
+
+[device "pcie.5"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.4"
+  port = "5"
+  chassis = "5"
+
+[device "pcie.6"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.5"
+  port = "6"
+  chassis = "6"
+
+[device "pcie.7"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.6"
+  port = "7"
+  chassis = "7"
+
+[device "pcie.8"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.7"
+  port = "8"
+  chassis = "8"
+
+
+# SCSI storage controller (and storage)
+# =========================================================
+#
+# We use virtio-scsi here so that we can (hot)plug a large
+# number of disks without running into issues; a SCSI disk,
+# backed by a qcow2 disk image on the host's filesystem, is
+# attached to it.
+#
+# We also create an optical disk, mostly for installation
+# purposes: once the guest OS has been succesfully
+# installed, the guest will no longer boot from optical
+# media. If you don't want, or no longer want, to have an
+# optical disk in the guest you can safely comment out
+# all relevant sections below.
+
+[device "scsi"]
+  driver = "virtio-scsi-pci"
+  bus = "pcie.1"
+  addr = "00.0"
+
+[device "scsi-disk"]
+  driver = "scsi-hd"
+  bus = "scsi.0"
+  drive = "disk"
+  bootindex = "1"
+
+[drive "disk"]
+  file = "guest.qcow2"                          # CHANGE ME
+  format = "qcow2"
+  if = "none"
+
+[device "scsi-optical-disk"]
+  driver = "scsi-cd"
+  bus = "scsi.0"
+  drive = "optical-disk"
+  bootindex = "2"
+
+[drive "optical-disk"]
+  file = "install.iso"                          # CHANGE ME
+  format = "raw"
+  if = "none"
+
+
+# Ethernet controller
+# =========================================================
+#
+# We use virtio-net for improved performance over emulated
+# hardware; on the host side, we take advantage of user
+# networking so that the QEMU process doesn't require any
+# additional privileges.
+
+[netdev "hostnet"]
+  type = "user"
+
+[device "net"]
+  driver = "virtio-net-pci"
+  netdev = "hostnet"
+  bus = "pcie.2"
+  addr = "00.0"
+
+
+# USB controller (and input devices)
+# =========================================================
+#
+# We add a virtualization-friendly USB 3.0 controller and
+# a USB keyboard / USB tablet combo so that graphical
+# guests can be controlled appropriately.
+
+[device "usb"]
+  driver = "nec-usb-xhci"
+  bus = "pcie.3"
+  addr = "00.0"
+
+[device "keyboard"]
+  driver = "usb-kbd"
+  bus = "usb.0"
+
+[device "tablet"]
+  driver = "usb-tablet"
+  bus = "usb.0"
+
+
+# Display controller
+# =========================================================
+#
+# We use virtio-gpu because the legacy VGA framebuffer is
+# very troublesome on aarch64, and virtio-gpu is the only
+# video device that doesn't implement it.
+#
+# If you're running the guest on a remote, potentially
+# headless host, you will probably want to append something
+# like
+#
+#   -display vnc=127.0.0.1:0
+#
+# to the command line in order to prevent QEMU from
+# creating a graphical display window on the host and
+# enable remote access instead.
+
+[device "video"]
+  driver = "virtio-gpu"
+  bus = "pcie.0"
+  addr = "01.0"
diff --git a/docs/mach-virt-serial.cfg b/docs/mach-virt-serial.cfg
new file mode 100644
index 0000000000..aee9f1c5a1
--- /dev/null
+++ b/docs/mach-virt-serial.cfg
@@ -0,0 +1,243 @@
+# mach-virt - VirtIO guest (serial console)
+# =========================================================
+#
+# Usage:
+#
+#   $ qemu-system-aarch64 \
+#     -nodefaults \
+#     -readconfig mach-virt-serial.cfg \
+#     -display none -serial mon:stdio \
+#     -cpu host
+#
+# You will probably need to tweak the lines marked as
+# CHANGE ME before being able to use this configuration!
+#
+# The guest will have a selection of VirtIO devices
+# tailored towards optimal performance with modern guests,
+# and will be accessed through the serial console.
+#
+# ---------------------------------------------------------
+#
+# Using -nodefaults is required to have full control over
+# the virtual hardware: when it's specified, QEMU will
+# populate the board with only the builtin peripherals,
+# such as the PL011 UART, plus a PCI Express Root Bus; the
+# user will then have to explicitly add further devices.
+#
+# The PCI Express Root Bus shows up in the guest as:
+#
+#   00:00.0 Host bridge
+#
+# This configuration file adds a number of other useful
+# devices, more specifically:
+#
+#   00.1c.* PCI bridge (PCI Express Root Ports)
+#   01:00.0 SCSI storage controller
+#   02:00.0 Ethernet controller
+#
+# More information about these devices is available below.
+#
+# We use '-display none' to prevent QEMU from creating a
+# graphical display window, which would serve no use in
+# this specific configuration, and '-serial mon:stdio' to
+# multiplex the guest's serial console and the QEMU monitor
+# to the host's stdio; use 'Ctrl+A h' to learn how to
+# switch between the two and more.
+
+
+# Machine options
+# =========================================================
+#
+# We use the virt machine type and enable KVM acceleration
+# for better performance.
+#
+# Using less than 1 GiB of memory is probably not going to
+# yield good performance in the guest, and might even lead
+# to obscure boot issues in some cases.
+#
+# Unfortunately, there is no way to configure the CPU model
+# in this file, so it will have to be provided on the
+# command line, but we can configure the guest to use the
+# same GIC version as the host.
+
+[machine]
+  type = "virt"
+  accel = "kvm"
+  gic-version = "host"
+
+[memory]
+  size = "1024"
+
+
+# Firmware configuration
+# =========================================================
+#
+# There are two parts to the firmware: a read-only image
+# containing the executable code, which is shared between
+# guests, and a read/write variable store that is owned
+# by one specific guest, exclusively, and is used to
+# record information such as the UEFI boot order.
+#
+# For any new guest, its permanent, private variable store
+# should initially be copied from the template file
+# provided along with the firmware binary.
+#
+# Depending on the OS distribution you're using on the
+# host, the name of the package containing the firmware
+# binary and variable store template, as well as the paths
+# to the files themselves, will be different. For example:
+#
+# Fedora
+#   edk2-aarch64                                      (pkg)
+#   /usr/share/edk2/aarch64/QEMU_EFI-pflash.raw       (bin)
+#   /usr/share/edk2/aarch64/vars-template-pflash.raw  (var)
+#
+# RHEL
+#   AAVMF                                             (pkg)
+#   /usr/share/AAVMF/AAVMF_CODE.fd                    (bin)
+#   /usr/share/AAVMF/AAVMF_VARS.fd                    (var)
+#
+# Debian/Ubuntu
+#   qemu-efi                                          (pkg)
+#   /usr/share/AAVMF/AAVMF_CODE.fd                    (bin)
+#   /usr/share/AAVMF/AAVMF_VARS.fd                    (var)
+
+[drive "uefi-binary"]
+  file = "/usr/share/AAVMF/AAVMF_CODE.fd"       # CHANGE ME
+  format = "raw"
+  if = "pflash"
+  unit = "0"
+  readonly = "on"
+
+[drive "uefi-varstore"]
+  file = "guest_VARS.fd"                        # CHANGE ME
+  format = "raw"
+  if = "pflash"
+  unit = "1"
+
+
+# PCI bridge (PCI Express Root Ports)
+# =========================================================
+#
+# We create eight PCI Express Root Ports, and we plug them
+# all into separate functions of the same slot. Some of
+# them will be used by devices, the rest will remain
+# available for hotplug.
+
+[device "pcie.1"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.0"
+  port = "1"
+  chassis = "1"
+  multifunction = "on"
+
+[device "pcie.2"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.1"
+  port = "2"
+  chassis = "2"
+
+[device "pcie.3"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.2"
+  port = "3"
+  chassis = "3"
+
+[device "pcie.4"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.3"
+  port = "4"
+  chassis = "4"
+
+[device "pcie.5"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.4"
+  port = "5"
+  chassis = "5"
+
+[device "pcie.6"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.5"
+  port = "6"
+  chassis = "6"
+
+[device "pcie.7"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.6"
+  port = "7"
+  chassis = "7"
+
+[device "pcie.8"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.7"
+  port = "8"
+  chassis = "8"
+
+
+# SCSI storage controller (and storage)
+# =========================================================
+#
+# We use virtio-scsi here so that we can (hot)plug a large
+# number of disks without running into issues; a SCSI disk,
+# backed by a qcow2 disk image on the host's filesystem, is
+# attached to it.
+#
+# We also create an optical disk, mostly for installation
+# purposes: once the guest OS has been succesfully
+# installed, the guest will no longer boot from optical
+# media. If you don't want, or no longer want, to have an
+# optical disk in the guest you can safely comment out
+# all relevant sections below.
+
+[device "scsi"]
+  driver = "virtio-scsi-pci"
+  bus = "pcie.1"
+  addr = "00.0"
+
+[device "scsi-disk"]
+  driver = "scsi-hd"
+  bus = "scsi.0"
+  drive = "disk"
+  bootindex = "1"
+
+[drive "disk"]
+  file = "guest.qcow2"                          # CHANGE ME
+  format = "qcow2"
+  if = "none"
+
+[device "scsi-optical-disk"]
+  driver = "scsi-cd"
+  bus = "scsi.0"
+  drive = "optical-disk"
+  bootindex = "2"
+
+[drive "optical-disk"]
+  file = "install.iso"                          # CHANGE ME
+  format = "raw"
+  if = "none"
+
+
+# Ethernet controller
+# =========================================================
+#
+# We use virtio-net for improved performance over emulated
+# hardware; on the host side, we take advantage of user
+# networking so that the QEMU process doesn't require any
+# additional privileges.
+
+[netdev "hostnet"]
+  type = "user"
+
+[device "net"]
+  driver = "virtio-net-pci"
+  netdev = "hostnet"
+  bus = "pcie.2"
+  addr = "00.0"
diff --git a/docs/migration.txt b/docs/migration.txt
index 6503c17685..1b940a829b 100644
--- a/docs/migration.txt
+++ b/docs/migration.txt
@@ -161,6 +161,11 @@ include/hw/hw.h.
 
 === More about versions ===
 
+Version numbers are intended for major incompatible changes to the
+migration of a device, and using them breaks backwards-migration
+compatibility; in general most changes can be made by adding Subsections
+(see below) or _TEST macros (see below) which won't break compatibility.
+
 You can see that there are several version fields:
 
 - version_id: the maximum version_id supported by VMState for that device.
@@ -175,6 +180,9 @@ version_id.  And the function load_state_old() (if present) is able to
 load state from minimum_version_id_old to minimum_version_id.  This
 function is deprecated and will be removed when no more users are left.
 
+Saving state will always create a section with the 'version_id' value
+and thus can't be loaded by any older QEMU.
+
 ===  Massaging functions ===
 
 Sometimes, it is not enough to be able to save the state directly
@@ -292,6 +300,56 @@ save/send this state when we are in the middle of a pio operation
 not enabled, the values on that fields are garbage and don't need to
 be sent.
 
+Using a condition function that checks a 'property' to determine whether
+to send a subsection allows backwards migration compatibility when
+new subsections are added.
+
+For example;
+   a) Add a new property using DEFINE_PROP_BOOL - e.g. support-foo and
+      default it to true.
+   b) Add an entry to the HW_COMPAT_ for the previous version
+      that sets the property to false.
+   c) Add a static bool  support_foo function that tests the property.
+   d) Add a subsection with a .needed set to the support_foo function
+   e) (potentially) Add a pre_load that sets up a default value for 'foo'
+      to be used if the subsection isn't loaded.
+
+Now that subsection will not be generated when using an older
+machine type and the migration stream will be accepted by older
+QEMU versions. pre-load functions can be used to initialise state
+on the newer version so that they default to suitable values
+when loading streams created by older QEMU versions that do not
+generate the subsection.
+
+In some cases subsections are added for data that had been accidentally
+omitted by earlier versions; if the missing data causes the migration
+process to succeed but the guest to behave badly then it may be better
+to send the subsection and cause the migration to explicitly fail
+with the unknown subsection error.   If the bad behaviour only happens
+with certain data values, making the subsection conditional on
+the data value (rather than the machine type) allows migrations to succeed
+in most cases.  In general the preference is to tie the subsection to
+the machine type, and allow reliable migrations, unless the behaviour
+from omission of the subsection is really bad.
+
+= Not sending existing elements =
+
+Sometimes members of the VMState are no longer needed;
+  removing them will break migration compatibility
+  making them version dependent and bumping the version will break backwards
+   migration compatibility.
+
+The best way is to:
+  a) Add a new property/compatibility/function in the same way for subsections
+    above.
+  b) replace the VMSTATE macro with the _TEST version of the macro, e.g.:
+     VMSTATE_UINT32(foo, barstruct)
+   becomes
+     VMSTATE_UINT32_TEST(foo, barstruct, pre_version_baz)
+
+  Sometime in the future when we no longer care about the ancient
+versions these can be killed off.
+
 = Return path =
 
 In most migration scenarios there is only a single data path that runs
@@ -482,3 +540,16 @@ request for a page that has already been sent is ignored.  Duplicate requests
 such as this can happen as a page is sent at about the same time the
 destination accesses it.
 
+=== Postcopy with hugepages ===
+
+Postcopy now works with hugetlbfs backed memory:
+  a) The linux kernel on the destination must support userfault on hugepages.
+  b) The huge-page configuration on the source and destination VMs must be
+     identical; i.e. RAMBlocks on both sides must use the same page size.
+  c) Note that -mem-path /dev/hugepages  will fall back to allocating normal
+     RAM if it doesn't have enough hugepages, triggering (b) to fail.
+     Using -mem-prealloc enforces the allocation using hugepages.
+  d) Care should be taken with the size of hugepage used; postcopy with 2MB
+     hugepages works well, however 1GB hugepages are likely to be problematic
+     since it takes ~1 second to transfer a 1GB hugepage across a 10Gbps link,
+     and until the full page is transferred the destination thread is blocked.
diff --git a/docs/multi-thread-tcg.txt b/docs/multi-thread-tcg.txt
new file mode 100644
index 0000000000..a99b4564c6
--- /dev/null
+++ b/docs/multi-thread-tcg.txt
@@ -0,0 +1,350 @@
+Copyright (c) 2015-2016 Linaro Ltd.
+
+This work is licensed under the terms of the GNU GPL, version 2 or
+later. See the COPYING file in the top-level directory.
+
+Introduction
+============
+
+This document outlines the design for multi-threaded TCG system-mode
+emulation. The current user-mode emulation mirrors the thread
+structure of the translated executable. Some of the work will be
+applicable to both system and linux-user emulation.
+
+The original system-mode TCG implementation was single threaded and
+dealt with multiple CPUs with simple round-robin scheduling. This
+simplified a lot of things but became increasingly limited as systems
+being emulated gained additional cores and per-core performance gains
+for host systems started to level off.
+
+vCPU Scheduling
+===============
+
+We introduce a new running mode where each vCPU will run on its own
+user-space thread. This will be enabled by default for all FE/BE
+combinations that have had the required work done to support this
+safely.
+
+In the general case of running translated code there should be no
+inter-vCPU dependencies and all vCPUs should be able to run at full
+speed. Synchronisation will only be required while accessing internal
+shared data structures or when the emulated architecture requires a
+coherent representation of the emulated machine state.
+
+Shared Data Structures
+======================
+
+Main Run Loop
+-------------
+
+Even when there is no code being generated there are a number of
+structures associated with the hot-path through the main run-loop.
+These are associated with looking up the next translation block to
+execute. These include:
+
+    tb_jmp_cache (per-vCPU, cache of recent jumps)
+    tb_ctx.htable (global hash table, phys address->tb lookup)
+
+As TB linking only occurs when blocks are in the same page this code
+is critical to performance as looking up the next TB to execute is the
+most common reason to exit the generated code.
+
+DESIGN REQUIREMENT: Make access to lookup structures safe with
+multiple reader/writer threads. Minimise any lock contention to do it.
+
+The hot-path avoids using locks where possible. The tb_jmp_cache is
+updated with atomic accesses to ensure consistent results. The fall
+back QHT based hash table is also designed for lockless lookups. Locks
+are only taken when code generation is required or TranslationBlocks
+have their block-to-block jumps patched.
+
+Global TCG State
+----------------
+
+We need to protect the entire code generation cycle including any post
+generation patching of the translated code. This also implies a shared
+translation buffer which contains code running on all cores. Any
+execution path that comes to the main run loop will need to hold a
+mutex for code generation. This also includes times when we need flush
+code or entries from any shared lookups/caches. Structures held on a
+per-vCPU basis won't need locking unless other vCPUs will need to
+modify them.
+
+DESIGN REQUIREMENT: Add locking around all code generation and TB
+patching.
+
+(Current solution)
+
+Mainly as part of the linux-user work all code generation is
+serialised with a tb_lock(). For the SoftMMU tb_lock() also takes the
+place of mmap_lock() in linux-user.
+
+Translation Blocks
+------------------
+
+Currently the whole system shares a single code generation buffer
+which when full will force a flush of all translations and start from
+scratch again. Some operations also force a full flush of translations
+including:
+
+  - debugging operations (breakpoint insertion/removal)
+  - some CPU helper functions
+
+This is done with the async_safe_run_on_cpu() mechanism to ensure all
+vCPUs are quiescent when changes are being made to shared global
+structures.
+
+More granular translation invalidation events are typically due
+to a change of the state of a physical page:
+
+  - code modification (self modify code, patching code)
+  - page changes (new page mapping in linux-user mode)
+
+While setting the invalid flag in a TranslationBlock will stop it
+being used when looked up in the hot-path there are a number of other
+book-keeping structures that need to be safely cleared.
+
+Any TranslationBlocks which have been patched to jump directly to the
+now invalid blocks need the jump patches reversing so they will return
+to the C code.
+
+There are a number of look-up caches that need to be properly updated
+including the:
+
+  - jump lookup cache
+  - the physical-to-tb lookup hash table
+  - the global page table
+
+The global page table (l1_map) which provides a multi-level look-up
+for PageDesc structures which contain pointers to the start of a
+linked list of all Translation Blocks in that page (see page_next).
+
+Both the jump patching and the page cache involve linked lists that
+the invalidated TranslationBlock needs to be removed from.
+
+DESIGN REQUIREMENT: Safely handle invalidation of TBs
+                      - safely patch/revert direct jumps
+                      - remove central PageDesc lookup entries
+                      - ensure lookup caches/hashes are safely updated
+
+(Current solution)
+
+The direct jump themselves are updated atomically by the TCG
+tb_set_jmp_target() code. Modification to the linked lists that allow
+searching for linked pages are done under the protect of the
+tb_lock().
+
+The global page table is protected by the tb_lock() in system-mode and
+mmap_lock() in linux-user mode.
+
+The lookup caches are updated atomically and the lookup hash uses QHT
+which is designed for concurrent safe lookup.
+
+
+Memory maps and TLBs
+--------------------
+
+The memory handling code is fairly critical to the speed of memory
+access in the emulated system. The SoftMMU code is designed so the
+hot-path can be handled entirely within translated code. This is
+handled with a per-vCPU TLB structure which once populated will allow
+a series of accesses to the page to occur without exiting the
+translated code. It is possible to set flags in the TLB address which
+will ensure the slow-path is taken for each access. This can be done
+to support:
+
+  - Memory regions (dividing up access to PIO, MMIO and RAM)
+  - Dirty page tracking (for code gen, SMC detection, migration and display)
+  - Virtual TLB (for translating guest address->real address)
+
+When the TLB tables are updated by a vCPU thread other than their own
+we need to ensure it is done in a safe way so no inconsistent state is
+seen by the vCPU thread.
+
+Some operations require updating a number of vCPUs TLBs at the same
+time in a synchronised manner.
+
+DESIGN REQUIREMENTS:
+
+  - TLB Flush All/Page
+    - can be across-vCPUs
+    - cross vCPU TLB flush may need other vCPU brought to halt
+    - change may need to be visible to the calling vCPU immediately
+  - TLB Flag Update
+    - usually cross-vCPU
+    - want change to be visible as soon as possible
+  - TLB Update (update a CPUTLBEntry, via tlb_set_page_with_attrs)
+    - This is a per-vCPU table - by definition can't race
+    - updated by its own thread when the slow-path is forced
+
+(Current solution)
+
+We have updated cputlb.c to defer operations when a cross-vCPU
+operation with async_run_on_cpu() which ensures each vCPU sees a
+coherent state when it next runs its work (in a few instructions
+time).
+
+A new set up operations (tlb_flush_*_all_cpus) take an additional flag
+which when set will force synchronisation by setting the source vCPUs
+work as "safe work" and exiting the cpu run loop. This ensure by the
+time execution restarts all flush operations have completed.
+
+TLB flag updates are all done atomically and are also protected by the
+tb_lock() which is used by the functions that update the TLB in bulk.
+
+(Known limitation)
+
+Not really a limitation but the wait mechanism is overly strict for
+some architectures which only need flushes completed by a barrier
+instruction. This could be a future optimisation.
+
+Emulated hardware state
+-----------------------
+
+Currently thanks to KVM work any access to IO memory is automatically
+protected by the global iothread mutex, also known as the BQL (Big
+Qemu Lock). Any IO region that doesn't use global mutex is expected to
+do its own locking.
+
+However IO memory isn't the only way emulated hardware state can be
+modified. Some architectures have model specific registers that
+trigger hardware emulation features. Generally any translation helper
+that needs to update more than a single vCPUs of state should take the
+BQL.
+
+As the BQL, or global iothread mutex is shared across the system we
+push the use of the lock as far down into the TCG code as possible to
+minimise contention.
+
+(Current solution)
+
+MMIO access automatically serialises hardware emulation by way of the
+BQL. Currently ARM targets serialise all ARM_CP_IO register accesses
+and also defer the reset/startup of vCPUs to the vCPU context by way
+of async_run_on_cpu().
+
+Updates to interrupt state are also protected by the BQL as they can
+often be cross vCPU.
+
+Memory Consistency
+==================
+
+Between emulated guests and host systems there are a range of memory
+consistency models. Even emulating weakly ordered systems on strongly
+ordered hosts needs to ensure things like store-after-load re-ordering
+can be prevented when the guest wants to.
+
+Memory Barriers
+---------------
+
+Barriers (sometimes known as fences) provide a mechanism for software
+to enforce a particular ordering of memory operations from the point
+of view of external observers (e.g. another processor core). They can
+apply to any memory operations as well as just loads or stores.
+
+The Linux kernel has an excellent write-up on the various forms of
+memory barrier and the guarantees they can provide [1].
+
+Barriers are often wrapped around synchronisation primitives to
+provide explicit memory ordering semantics. However they can be used
+by themselves to provide safe lockless access by ensuring for example
+a change to a signal flag will only be visible once the changes to
+payload are.
+
+DESIGN REQUIREMENT: Add a new tcg_memory_barrier op
+
+This would enforce a strong load/store ordering so all loads/stores
+complete at the memory barrier. On single-core non-SMP strongly
+ordered backends this could become a NOP.
+
+Aside from explicit standalone memory barrier instructions there are
+also implicit memory ordering semantics which comes with each guest
+memory access instruction. For example all x86 load/stores come with
+fairly strong guarantees of sequential consistency where as ARM has
+special variants of load/store instructions that imply acquire/release
+semantics.
+
+In the case of a strongly ordered guest architecture being emulated on
+a weakly ordered host the scope for a heavy performance impact is
+quite high.
+
+DESIGN REQUIREMENTS: Be efficient with use of memory barriers
+       - host systems with stronger implied guarantees can skip some barriers
+       - merge consecutive barriers to the strongest one
+
+(Current solution)
+
+The system currently has a tcg_gen_mb() which will add memory barrier
+operations if code generation is being done in a parallel context. The
+tcg_optimize() function attempts to merge barriers up to their
+strongest form before any load/store operations. The solution was
+originally developed and tested for linux-user based systems. All
+backends have been converted to emit fences when required. So far the
+following front-ends have been updated to emit fences when required:
+
+    - target-i386
+    - target-arm
+    - target-aarch64
+    - target-alpha
+    - target-mips
+
+Memory Control and Maintenance
+------------------------------
+
+This includes a class of instructions for controlling system cache
+behaviour. While QEMU doesn't model cache behaviour these instructions
+are often seen when code modification has taken place to ensure the
+changes take effect.
+
+Synchronisation Primitives
+--------------------------
+
+There are two broad types of synchronisation primitives found in
+modern ISAs: atomic instructions and exclusive regions.
+
+The first type offer a simple atomic instruction which will guarantee
+some sort of test and conditional store will be truly atomic w.r.t.
+other cores sharing access to the memory. The classic example is the
+x86 cmpxchg instruction.
+
+The second type offer a pair of load/store instructions which offer a
+guarantee that an region of memory has not been touched between the
+load and store instructions. An example of this is ARM's ldrex/strex
+pair where the strex instruction will return a flag indicating a
+successful store only if no other CPU has accessed the memory region
+since the ldrex.
+
+Traditionally TCG has generated a series of operations that work
+because they are within the context of a single translation block so
+will have completed before another CPU is scheduled. However with
+the ability to have multiple threads running to emulate multiple CPUs
+we will need to explicitly expose these semantics.
+
+DESIGN REQUIREMENTS:
+  - Support classic atomic instructions
+  - Support load/store exclusive (or load link/store conditional) pairs
+  - Generic enough infrastructure to support all guest architectures
+CURRENT OPEN QUESTIONS:
+  - How problematic is the ABA problem in general?
+
+(Current solution)
+
+The TCG provides a number of atomic helpers (tcg_gen_atomic_*) which
+can be used directly or combined to emulate other instructions like
+ARM's ldrex/strex instructions. While they are susceptible to the ABA
+problem so far common guests have not implemented patterns where
+this may be a problem - typically presenting a locking ABI which
+assumes cmpxchg like semantics.
+
+The code also includes a fall-back for cases where multi-threaded TCG
+ops can't work (e.g. guest atomic width > host atomic width). In this
+case an EXCP_ATOMIC exit occurs and the instruction is emulated with
+an exclusive lock which ensures all emulation is serialised.
+
+While the atomic helpers look good enough for now there may be a need
+to look at solutions that can more closely model the guest
+architectures semantics.
+
+==========
+
+[1] https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/plain/Documentation/memory-barriers.txt
diff --git a/docs/q35-chipset.cfg b/docs/q35-chipset.cfg
deleted file mode 100644
index e4ddb7d9cc..0000000000
--- a/docs/q35-chipset.cfg
+++ /dev/null
@@ -1,152 +0,0 @@
-################################################################
-#
-# qemu -M q35 creates a bare machine with just the very essential
-# chipset devices being present:
-#
-#     00.0 - Host bridge
-#     1f.0 - ISA bridge / LPC
-#     1f.2 - SATA (AHCI) controller
-#     1f.3 - SMBus controller
-#
-# This config file documents the other devices and how they are
-# created.  You can simply use "-readconfig $thisfile" to create
-# them all.  Here is a overview:
-#
-#     19.0 - Ethernet controller (not created, our e1000 emulation
-#                                 doesn't emulate the ich9 device).
-#     1a.* - USB Controller #2 (ehci + uhci companions)
-#     1b.0 - HD Audio Controller
-#     1c.* - PCI Express Ports
-#     1d.* - USB Controller #1 (ehci + uhci companions,
-#                               "qemu -M q35 -usb" creates these too)
-#     1e.0 - PCI Bridge
-#
-
-[device "ich9-ehci-2"]
-  driver = "ich9-usb-ehci2"
-  multifunction = "on"
-  bus = "pcie.0"
-  addr = "1a.7"
-
-[device "ich9-uhci-4"]
-  driver = "ich9-usb-uhci4"
-  multifunction = "on"
-  bus = "pcie.0"
-  addr = "1a.0"
-  masterbus = "ich9-ehci-2.0"
-  firstport = "0"
-
-[device "ich9-uhci-5"]
-  driver = "ich9-usb-uhci5"
-  multifunction = "on"
-  bus = "pcie.0"
-  addr = "1a.1"
-  masterbus = "ich9-ehci-2.0"
-  firstport = "2"
-
-[device "ich9-uhci-6"]
-  driver = "ich9-usb-uhci6"
-  multifunction = "on"
-  bus = "pcie.0"
-  addr = "1a.2"
-  masterbus = "ich9-ehci-2.0"
-  firstport = "4"
-
-
-[device "ich9-hda-audio"]
-  driver = "ich9-intel-hda"
-  bus = "pcie.0"
-  addr = "1b.0"
-
-
-[device "ich9-pcie-port-1"]
-  driver = "ioh3420"
-  multifunction = "on"
-  bus = "pcie.0"
-  addr = "1c.0"
-  port = "1"
-  chassis = "1"
-
-[device "ich9-pcie-port-2"]
-  driver = "ioh3420"
-  multifunction = "on"
-  bus = "pcie.0"
-  addr = "1c.1"
-  port = "2"
-  chassis = "2"
-
-[device "ich9-pcie-port-3"]
-  driver = "ioh3420"
-  multifunction = "on"
-  bus = "pcie.0"
-  addr = "1c.2"
-  port = "3"
-  chassis = "3"
-
-[device "ich9-pcie-port-4"]
-  driver = "ioh3420"
-  multifunction = "on"
-  bus = "pcie.0"
-  addr = "1c.3"
-  port = "4"
-  chassis = "4"
-
-##
-# Example PCIe switch with two downstream ports
-#
-#[device "pcie-switch-upstream-port-1"]
-#  driver = "x3130-upstream"
-#  bus = "ich9-pcie-port-4"
-#  addr = "00.0"
-#
-#[device "pcie-switch-downstream-port-1-1"]
-#  driver = "xio3130-downstream"
-#  multifunction = "on"
-#  bus = "pcie-switch-upstream-port-1"
-#  addr = "00.0"
-#  port = "1"
-#  chassis = "5"
-#
-#[device "pcie-switch-downstream-port-1-2"]
-#  driver = "xio3130-downstream"
-#  multifunction = "on"
-#  bus = "pcie-switch-upstream-port-1"
-#  addr = "00.1"
-#  port = "1"
-#  chassis = "6"
-
-[device "ich9-ehci-1"]
-  driver = "ich9-usb-ehci1"
-  multifunction = "on"
-  bus = "pcie.0"
-  addr = "1d.7"
-
-[device "ich9-uhci-1"]
-  driver = "ich9-usb-uhci1"
-  multifunction = "on"
-  bus = "pcie.0"
-  addr = "1d.0"
-  masterbus = "ich9-ehci-1.0"
-  firstport = "0"
-
-[device "ich9-uhci-2"]
-  driver = "ich9-usb-uhci2"
-  multifunction = "on"
-  bus = "pcie.0"
-  addr = "1d.1"
-  masterbus = "ich9-ehci-1.0"
-  firstport = "2"
-
-[device "ich9-uhci-3"]
-  driver = "ich9-usb-uhci3"
-  multifunction = "on"
-  bus = "pcie.0"
-  addr = "1d.2"
-  masterbus = "ich9-ehci-1.0"
-  firstport = "4"
-
-
-[device "ich9-pci-bridge"]
-  driver = "i82801b11-bridge"
-  bus = "pcie.0"
-  addr = "1e.0"
diff --git a/docs/q35-emulated.cfg b/docs/q35-emulated.cfg
new file mode 100644
index 0000000000..c6416d6545
--- /dev/null
+++ b/docs/q35-emulated.cfg
@@ -0,0 +1,288 @@
+# q35 - Emulated guest (graphical console)
+# =========================================================
+#
+# Usage:
+#
+#   $ qemu-system-x86_64 \
+#     -nodefaults \
+#     -readconfig q35-emulated.cfg
+#
+# You will probably need to tweak the lines marked as
+# CHANGE ME before being able to use this configuration!
+#
+# The guest will have a selection of emulated devices that
+# closely resembles that of a physical machine, and will be
+# accessed through a graphical console.
+#
+# ---------------------------------------------------------
+#
+# Using -nodefaults is required to have full control over
+# the virtual hardware: when it's specified, QEMU will
+# populate the board with only the builtin peripherals
+# plus a small selection of core PCI devices and
+# controllers; the user will then have to explicitly add
+# further devices.
+#
+# The core PCI devices show up in the guest as:
+#
+#   00:00.0 Host bridge
+#   00:1f.0 ISA bridge / LPC
+#   00:1f.2 SATA (AHCI) controller
+#   00:1f.3 SMBus controller
+#
+# This configuration file adds a number of devices that
+# are pretty much guaranteed to be present in every single
+# physical machine based on q35, more specifically:
+#
+#   00:01.0 VGA compatible controller
+#   00:19.0 Ethernet controller
+#   00:1a.* USB controller (#2)
+#   00:1b.0 Audio device
+#   00:1c.* PCI bridge (PCI Express Root Ports)
+#   00:1d.* USB Controller (#1)
+#   00:1e.0 PCI bridge (legacy PCI bridge)
+#
+# More information about these devices is available below.
+
+
+# Machine options
+# =========================================================
+#
+# We use the q35 machine type and enable KVM acceleration
+# for better performance.
+#
+# Using less than 1 GiB of memory is probably not going to
+# yield good performance in the guest, and might even lead
+# to obscure boot issues in some cases.
+#
+# Unfortunately, there is no way to configure the CPU model
+# in this file, so it will have to be provided on the
+# command line.
+
+[machine]
+  type = "q35"
+  accel = "kvm"
+
+[memory]
+  size = "1024"
+
+
+# PCI bridge (PCI Express Root Ports)
+# =========================================================
+#
+# We add four PCI Express Root Ports, all sharing the same
+# slot on the PCI Express  Root Bus. These ports support
+# hotplug.
+
+[device "ich9-pcie-port-1"]
+  driver = "ioh3420"
+  multifunction = "on"
+  bus = "pcie.0"
+  addr = "1c.0"
+  port = "1"
+  chassis = "1"
+
+[device "ich9-pcie-port-2"]
+  driver = "ioh3420"
+  multifunction = "on"
+  bus = "pcie.0"
+  addr = "1c.1"
+  port = "2"
+  chassis = "2"
+
+[device "ich9-pcie-port-3"]
+  driver = "ioh3420"
+  multifunction = "on"
+  bus = "pcie.0"
+  addr = "1c.2"
+  port = "3"
+  chassis = "3"
+
+[device "ich9-pcie-port-4"]
+  driver = "ioh3420"
+  multifunction = "on"
+  bus = "pcie.0"
+  addr = "1c.3"
+  port = "4"
+  chassis = "4"
+
+
+# PCI bridge (legacy PCI bridge)
+# =========================================================
+#
+# This bridge can be used to build an independent topology
+# for legacy PCI devices. PCI Express devices should be
+# plugged into PCI Express slots instead, so ideally there
+# will be no devices connected to this bridge.
+
+[device "ich9-pci-bridge"]
+  driver = "i82801b11-bridge"
+  bus = "pcie.0"
+  addr = "1e.0"
+
+
+# SATA storage
+# =========================================================
+#
+# An implicit SATA controller is created automatically for
+# every single q35 guest; here we create a disk, backed by
+# a qcow2 disk image on the host's filesystem, and attach
+# it to that controller so that the guest can use it.
+#
+# We also create an optical disk, mostly for installation
+# purposes: once the guest OS has been succesfully
+# installed, the guest will no longer boot from optical
+# media. If you don't want, or no longer want, to have an
+# optical disk in the guest you can safely comment out
+# all relevant sections below.
+
+[device "sata-disk"]
+  driver = "ide-hd"
+  bus = "ide.0"
+  drive = "disk"
+  bootindex = "1"
+
+[drive "disk"]
+  file = "guest.qcow2"                          # CHANGE ME
+  format = "qcow2"
+  if = "none"
+
+[device "sata-optical-disk"]
+  driver = "ide-cd"
+  bus = "ide.1"
+  drive = "optical-disk"
+  bootindex = "2"
+
+[drive "optical-disk"]
+  file = "install.iso"                          # CHANGE ME
+  format = "raw"
+  if = "none"
+
+
+# USB controller (#1)
+# =========================================================
+#
+# EHCI controller + UHCI companion controllers.
+
+[device "ich9-ehci-1"]
+  driver = "ich9-usb-ehci1"
+  multifunction = "on"
+  bus = "pcie.0"
+  addr = "1d.7"
+
+[device "ich9-uhci-1"]
+  driver = "ich9-usb-uhci1"
+  multifunction = "on"
+  bus = "pcie.0"
+  addr = "1d.0"
+  masterbus = "ich9-ehci-1.0"
+  firstport = "0"
+
+[device "ich9-uhci-2"]
+  driver = "ich9-usb-uhci2"
+  multifunction = "on"
+  bus = "pcie.0"
+  addr = "1d.1"
+  masterbus = "ich9-ehci-1.0"
+  firstport = "2"
+
+[device "ich9-uhci-3"]
+  driver = "ich9-usb-uhci3"
+  multifunction = "on"
+  bus = "pcie.0"
+  addr = "1d.2"
+  masterbus = "ich9-ehci-1.0"
+  firstport = "4"
+
+
+# USB controller (#2)
+# =========================================================
+#
+# EHCI controller + UHCI companion controllers.
+
+[device "ich9-ehci-2"]
+  driver = "ich9-usb-ehci2"
+  multifunction = "on"
+  bus = "pcie.0"
+  addr = "1a.7"
+
+[device "ich9-uhci-4"]
+  driver = "ich9-usb-uhci4"
+  multifunction = "on"
+  bus = "pcie.0"
+  addr = "1a.0"
+  masterbus = "ich9-ehci-2.0"
+  firstport = "0"
+
+[device "ich9-uhci-5"]
+  driver = "ich9-usb-uhci5"
+  multifunction = "on"
+  bus = "pcie.0"
+  addr = "1a.1"
+  masterbus = "ich9-ehci-2.0"
+  firstport = "2"
+
+[device "ich9-uhci-6"]
+  driver = "ich9-usb-uhci6"
+  multifunction = "on"
+  bus = "pcie.0"
+  addr = "1a.2"
+  masterbus = "ich9-ehci-2.0"
+  firstport = "4"
+
+
+# Ethernet controller
+# =========================================================
+#
+# We add a Gigabit Ethernet interface to the guest; on the
+# host side, we take advantage of user networking so that
+# the QEMU process doesn't require any additional
+# privileges.
+
+[netdev "hostnet"]
+  type = "user"
+
+[device "net"]
+  driver = "e1000"
+  netdev = "hostnet"
+  bus = "pcie.0"
+  addr = "19.0"
+
+
+# VGA compatible controller
+# =========================================================
+#
+# We use stdvga instead of Cirrus as it supports more video
+# modes and is closer to what actual hardware looks like.
+#
+# If you're running the guest on a remote, potentially
+# headless host, you will probably want to append something
+# like
+#
+#   -display vnc=127.0.0.1:0
+#
+# to the command line in order to prevent QEMU from
+# creating a graphical display window on the host and
+# enable remote access instead.
+
+[device "video"]
+  driver = "VGA"
+  bus = "pcie.0"
+  addr = "01.0"
+
+
+# Audio device
+# =========================================================
+#
+# The sound card is a legacy PCI device that is plugged
+# directly into the PCI Express Root Bus.
+
+[device "ich9-hda-audio"]
+  driver = "ich9-intel-hda"
+  bus = "pcie.0"
+  addr = "1b.0"
+
+[device "ich9-hda-duplex"]
+  driver = "hda-duplex"
+  bus = "ich9-hda-audio.0"
+  cad = "0"
diff --git a/docs/q35-virtio-graphical.cfg b/docs/q35-virtio-graphical.cfg
new file mode 100644
index 0000000000..28bde2fc57
--- /dev/null
+++ b/docs/q35-virtio-graphical.cfg
@@ -0,0 +1,248 @@
+# q35 - VirtIO guest (graphical console)
+# =========================================================
+#
+# Usage:
+#
+#   $ qemu-system-x86_64 \
+#     -nodefaults \
+#     -readconfig q35-virtio-graphical.cfg
+#
+# You will probably need to tweak the lines marked as
+# CHANGE ME before being able to use this configuration!
+#
+# The guest will have a selection of VirtIO devices
+# tailored towards optimal performance with modern guests,
+# and will be accessed through a graphical console.
+#
+# ---------------------------------------------------------
+#
+# Using -nodefaults is required to have full control over
+# the virtual hardware: when it's specified, QEMU will
+# populate the board with only the builtin peripherals
+# plus a small selection of core PCI devices and
+# controllers; the user will then have to explicitly add
+# further devices.
+#
+# The core PCI devices show up in the guest as:
+#
+#   00:00.0 Host bridge
+#   00:1f.0 ISA bridge / LPC
+#   00:1f.2 SATA (AHCI) controller
+#   00:1f.3 SMBus controller
+#
+# This configuration file adds a number of other useful
+# devices, more specifically:
+#
+#   00:01.0 VGA compatible controller
+#   00:1b.0 Audio device
+#   00.1c.* PCI bridge (PCI Express Root Ports)
+#   01:00.0 SCSI storage controller
+#   02:00.0 Ethernet controller
+#   03:00.0 USB controller
+#
+# More information about these devices is available below.
+
+
+# Machine options
+# =========================================================
+#
+# We use the q35 machine type and enable KVM acceleration
+# for better performance.
+#
+# Using less than 1 GiB of memory is probably not going to
+# yield good performance in the guest, and might even lead
+# to obscure boot issues in some cases.
+
+[machine]
+  type = "q35"
+  accel = "kvm"
+
+[memory]
+  size = "1024"
+
+
+# PCI bridge (PCI Express Root Ports)
+# =========================================================
+#
+# We create eight PCI Express Root Ports, and we plug them
+# all into separate functions of the same slot. Some of
+# them will be used by devices, the rest will remain
+# available for hotplug.
+
+[device "pcie.1"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.0"
+  port = "1"
+  chassis = "1"
+  multifunction = "on"
+
+[device "pcie.2"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.1"
+  port = "2"
+  chassis = "2"
+
+[device "pcie.3"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.2"
+  port = "3"
+  chassis = "3"
+
+[device "pcie.4"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.3"
+  port = "4"
+  chassis = "4"
+
+[device "pcie.5"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.4"
+  port = "5"
+  chassis = "5"
+
+[device "pcie.6"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.5"
+  port = "6"
+  chassis = "6"
+
+[device "pcie.7"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.6"
+  port = "7"
+  chassis = "7"
+
+[device "pcie.8"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.7"
+  port = "8"
+  chassis = "8"
+
+
+# SCSI storage controller (and storage)
+# =========================================================
+#
+# We use virtio-scsi here so that we can (hot)plug a large
+# number of disks without running into issues; a SCSI disk,
+# backed by a qcow2 disk image on the host's filesystem, is
+# attached to it.
+#
+# We also create an optical disk, mostly for installation
+# purposes: once the guest OS has been succesfully
+# installed, the guest will no longer boot from optical
+# media. If you don't want, or no longer want, to have an
+# optical disk in the guest you can safely comment out
+# all relevant sections below.
+
+[device "scsi"]
+  driver = "virtio-scsi-pci"
+  bus = "pcie.1"
+  addr = "00.0"
+
+[device "scsi-disk"]
+  driver = "scsi-hd"
+  bus = "scsi.0"
+  drive = "disk"
+  bootindex = "1"
+
+[drive "disk"]
+  file = "guest.qcow2"                          # CHANGE ME
+  format = "qcow2"
+  if = "none"
+
+[device "scsi-optical-disk"]
+  driver = "scsi-cd"
+  bus = "scsi.0"
+  drive = "optical-disk"
+  bootindex = "2"
+
+[drive "optical-disk"]
+  file = "install.iso"                          # CHANGE ME
+  format = "raw"
+  if = "none"
+
+
+# Ethernet controller
+# =========================================================
+#
+# We use virtio-net for improved performance over emulated
+# hardware; on the host side, we take advantage of user
+# networking so that the QEMU process doesn't require any
+# additional privileges.
+
+[netdev "hostnet"]
+  type = "user"
+
+[device "net"]
+  driver = "virtio-net-pci"
+  netdev = "hostnet"
+  bus = "pcie.2"
+  addr = "00.0"
+
+
+# USB controller (and input devices)
+# =========================================================
+#
+# We add a virtualization-friendly USB 3.0 controller and
+# a USB tablet so that graphical guests can be controlled
+# appropriately. A USB keyboard is not needed, as q35
+# guests get a PS/2 one added automatically.
+
+[device "usb"]
+  driver = "nec-usb-xhci"
+  bus = "pcie.3"
+  addr = "00.0"
+
+[device "tablet"]
+  driver = "usb-tablet"
+  bus = "usb.0"
+
+
+# VGA compatible controller
+# =========================================================
+#
+# We plug the QXL video card directly into the PCI Express
+# Root Bus as it is a legacy PCI device; this way, we can
+# reduce the number of PCI Express controllers in the
+# guest.
+#
+# If you're running the guest on a remote, potentially
+# headless host, you will probably want to append something
+# like
+#
+#   -display vnc=127.0.0.1:0
+#
+# to the command line in order to prevent QEMU from
+# creating a graphical display window on the host and
+# enable remote access instead.
+
+[device "video"]
+  driver = "qxl-vga"
+  bus = "pcie.0"
+  addr = "01.0"
+
+
+# Audio device
+# =========================================================
+#
+# Like the video card, the sound card is a legacy PCI
+# device and as such can be plugged directly into the PCI
+# Express Root Bus.
+
+[device "sound"]
+  driver = "ich9-intel-hda"
+  bus = "pcie.0"
+  addr = "1b.0"
+
+[device "duplex"]
+  driver = "hda-duplex"
+  bus = "sound.0"
+  cad = "0"
diff --git a/docs/q35-virtio-serial.cfg b/docs/q35-virtio-serial.cfg
new file mode 100644
index 0000000000..c33c9cc07a
--- /dev/null
+++ b/docs/q35-virtio-serial.cfg
@@ -0,0 +1,193 @@
+# q35 - VirtIO guest (serial console)
+# =========================================================
+#
+# Usage:
+#
+#   $ qemu-system-x86_64 \
+#     -nodefaults \
+#     -readconfig q35-virtio-serial.cfg \
+#     -display none -serial mon:stdio
+#
+# You will probably need to tweak the lines marked as
+# CHANGE ME before being able to use this configuration!
+#
+# The guest will have a selection of VirtIO devices
+# tailored towards optimal performance with modern guests,
+# and will be accessed through the serial console.
+#
+# ---------------------------------------------------------
+#
+# Using -nodefaults is required to have full control over
+# the virtual hardware: when it's specified, QEMU will
+# populate the board with only the builtin peripherals
+# plus a small selection of core PCI devices and
+# controllers; the user will then have to explicitly add
+# further devices.
+#
+# The core PCI devices show up in the guest as:
+#
+#   00:00.0 Host bridge
+#   00:1f.0 ISA bridge / LPC
+#   00:1f.2 SATA (AHCI) controller
+#   00:1f.3 SMBus controller
+#
+# This configuration file adds a number of other useful
+# devices, more specifically:
+#
+#   00.1c.* PCI bridge (PCI Express Root Ports)
+#   01:00.0 SCSI storage controller
+#   02:00.0 Ethernet controller
+#
+# More information about these devices is available below.
+#
+# We use '-display none' to prevent QEMU from creating a
+# graphical display window, which would serve no use in
+# this specific configuration, and '-serial mon:stdio' to
+# multiplex the guest's serial console and the QEMU monitor
+# to the host's stdio; use 'Ctrl+A h' to learn how to
+# switch between the two and more.
+
+
+# Machine options
+# =========================================================
+#
+# We use the q35 machine type and enable KVM acceleration
+# for better performance.
+#
+# Using less than 1 GiB of memory is probably not going to
+# yield good performance in the guest, and might even lead
+# to obscure boot issues in some cases.
+
+[machine]
+  type = "q35"
+  accel = "kvm"
+
+[memory]
+  size = "1024"
+
+
+# PCI bridge (PCI Express Root Ports)
+# =========================================================
+#
+# We create eight PCI Express Root Ports, and we plug them
+# all into separate functions of the same slot. Some of
+# them will be used by devices, the rest will remain
+# available for hotplug.
+
+[device "pcie.1"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.0"
+  port = "1"
+  chassis = "1"
+  multifunction = "on"
+
+[device "pcie.2"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.1"
+  port = "2"
+  chassis = "2"
+
+[device "pcie.3"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.2"
+  port = "3"
+  chassis = "3"
+
+[device "pcie.4"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.3"
+  port = "4"
+  chassis = "4"
+
+[device "pcie.5"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.4"
+  port = "5"
+  chassis = "5"
+
+[device "pcie.6"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.5"
+  port = "6"
+  chassis = "6"
+
+[device "pcie.7"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.6"
+  port = "7"
+  chassis = "7"
+
+[device "pcie.8"]
+  driver = "pcie-root-port"
+  bus = "pcie.0"
+  addr = "1c.7"
+  port = "8"
+  chassis = "8"
+
+
+# SCSI storage controller (and storage)
+# =========================================================
+#
+# We use virtio-scsi here so that we can (hot)plug a large
+# number of disks without running into issues; a SCSI disk,
+# backed by a qcow2 disk image on the host's filesystem, is
+# attached to it.
+#
+# We also create an optical disk, mostly for installation
+# purposes: once the guest OS has been succesfully
+# installed, the guest will no longer boot from optical
+# media. If you don't want, or no longer want, to have an
+# optical disk in the guest you can safely comment out
+# all relevant sections below.
+
+[device "scsi"]
+  driver = "virtio-scsi-pci"
+  bus = "pcie.1"
+  addr = "00.0"
+
+[device "scsi-disk"]
+  driver = "scsi-hd"
+  bus = "scsi.0"
+  drive = "disk"
+  bootindex = "1"
+
+[drive "disk"]
+  file = "guest.qcow2"                          # CHANGE ME
+  format = "qcow2"
+  if = "none"
+
+[device "scsi-optical-disk"]
+  driver = "scsi-cd"
+  bus = "scsi.0"
+  drive = "optical-disk"
+  bootindex = "2"
+
+[drive "optical-disk"]
+  file = "install.iso"                          # CHANGE ME
+  format = "raw"
+  if = "none"
+
+
+# Ethernet controller
+# =========================================================
+#
+# We use virtio-net for improved performance over emulated
+# hardware; on the host side, we take advantage of user
+# networking so that the QEMU process doesn't require any
+# additional privileges.
+
+[netdev "hostnet"]
+  type = "user"
+
+[device "net"]
+  driver = "virtio-net-pci"
+  netdev = "hostnet"
+  bus = "pcie.2"
+  addr = "00.0"
diff --git a/docs/replay.txt b/docs/replay.txt
index 03e193193f..486c1e0e9d 100644
--- a/docs/replay.txt
+++ b/docs/replay.txt
@@ -225,3 +225,10 @@ recording the virtual machine this filter puts all packets coming from
 the outer world into the log. In replay mode packets from the log are
 injected into the network device. All interactions with network backend
 in replay mode are disabled.
+
+Audio devices
+-------------
+
+Audio data is recorded and replay automatically. The command line for recording
+and replaying must contain identical specifications of audio hardware, e.g.:
+ -soundhw ac97
diff --git a/docs/specs/vmgenid.txt b/docs/specs/vmgenid.txt
new file mode 100644
index 0000000000..aa9f518676
--- /dev/null
+++ b/docs/specs/vmgenid.txt
@@ -0,0 +1,245 @@
+VIRTUAL MACHINE GENERATION ID
+=============================
+
+Copyright (C) 2016 Red Hat, Inc.
+Copyright (C) 2017 Skyport Systems, Inc.
+
+This work is licensed under the terms of the GNU GPL, version 2 or later.
+See the COPYING file in the top-level directory.
+
+===
+
+The VM generation ID (vmgenid) device is an emulated device which
+exposes a 128-bit, cryptographically random, integer value identifier,
+referred to as a Globally Unique Identifier, or GUID.
+
+This allows management applications (e.g. libvirt) to notify the guest
+operating system when the virtual machine is executed with a different
+configuration (e.g. snapshot execution or creation from a template).  The
+guest operating system notices the change, and is then able to react as
+appropriate by marking its copies of distributed databases as dirty,
+re-initializing its random number generator etc.
+
+
+Requirements
+------------
+
+These requirements are extracted from the "How to implement virtual machine
+generation ID support in a virtualization platform" section of the
+specification, dated August 1, 2012.
+
+
+The document may be found on the web at:
+  http://go.microsoft.com/fwlink/?LinkId=260709
+
+R1a. The generation ID shall live in an 8-byte aligned buffer.
+
+R1b. The buffer holding the generation ID shall be in guest RAM, ROM, or device
+     MMIO range.
+
+R1c. The buffer holding the generation ID shall be kept separate from areas
+     used by the operating system.
+
+R1d. The buffer shall not be covered by an AddressRangeMemory or
+     AddressRangeACPI entry in the E820 or UEFI memory map.
+
+R1e. The generation ID shall not live in a page frame that could be mapped with
+     caching disabled. (In other words, regardless of whether the generation ID
+     lives in RAM, ROM or MMIO, it shall only be mapped as cacheable.)
+
+R2 to R5. [These AML requirements are isolated well enough in the Microsoft
+          specification for us to simply refer to them here.]
+
+R6. The hypervisor shall expose a _HID (hardware identifier) object in the
+    VMGenId device's scope that is unique to the hypervisor vendor.
+
+
+QEMU Implementation
+-------------------
+
+The above-mentioned specification does not dictate which ACPI descriptor table
+will contain the VM Generation ID device.  Other implementations (Hyper-V and
+Xen) put it in the main descriptor table (Differentiated System Description
+Table or DSDT).  For ease of debugging and implementation, we have decided to
+put it in its own Secondary System Description Table, or SSDT.
+
+The following is a dump of the contents from a running system:
+
+# iasl -p ./SSDT -d /sys/firmware/acpi/tables/SSDT
+
+Intel ACPI Component Architecture
+ASL+ Optimizing Compiler version 20150717-64
+Copyright (c) 2000 - 2015 Intel Corporation
+
+Reading ACPI table from file /sys/firmware/acpi/tables/SSDT - Length
+00000198 (0x0000C6)
+ACPI: SSDT 0x0000000000000000 0000C6 (v01 BOCHS  VMGENID  00000001 BXPC
+00000001)
+Acpi table [SSDT] successfully installed and loaded
+Pass 1 parse of [SSDT]
+Pass 2 parse of [SSDT]
+Parsing Deferred Opcodes (Methods/Buffers/Packages/Regions)
+
+Parsing completed
+Disassembly completed
+ASL Output:    ./SSDT.dsl - 1631 bytes
+# cat SSDT.dsl
+/*
+ * Intel ACPI Component Architecture
+ * AML/ASL+ Disassembler version 20150717-64
+ * Copyright (c) 2000 - 2015 Intel Corporation
+ *
+ * Disassembling to symbolic ASL+ operators
+ *
+ * Disassembly of /sys/firmware/acpi/tables/SSDT, Sun Feb  5 00:19:37 2017
+ *
+ * Original Table Header:
+ *     Signature        "SSDT"
+ *     Length           0x000000CA (202)
+ *     Revision         0x01
+ *     Checksum         0x4B
+ *     OEM ID           "BOCHS "
+ *     OEM Table ID     "VMGENID"
+ *     OEM Revision     0x00000001 (1)
+ *     Compiler ID      "BXPC"
+ *     Compiler Version 0x00000001 (1)
+ */
+DefinitionBlock ("/sys/firmware/acpi/tables/SSDT.aml", "SSDT", 1, "BOCHS ",
+"VMGENID", 0x00000001)
+{
+    Name (VGIA, 0x07FFF000)
+    Scope (\_SB)
+    {
+        Device (VGEN)
+        {
+            Name (_HID, "QEMUVGID")  // _HID: Hardware ID
+            Name (_CID, "VM_Gen_Counter")  // _CID: Compatible ID
+            Name (_DDN, "VM_Gen_Counter")  // _DDN: DOS Device Name
+            Method (_STA, 0, NotSerialized)  // _STA: Status
+            {
+                Local0 = 0x0F
+                If ((VGIA == Zero))
+                {
+                    Local0 = Zero
+                }
+
+                Return (Local0)
+            }
+
+            Method (ADDR, 0, NotSerialized)
+            {
+                Local0 = Package (0x02) {}
+                Index (Local0, Zero) = (VGIA + 0x28)
+                Index (Local0, One) = Zero
+                Return (Local0)
+            }
+        }
+    }
+
+    Method (\_GPE._E05, 0, NotSerialized)  // _Exx: Edge-Triggered GPE
+    {
+        Notify (\_SB.VGEN, 0x80) // Status Change
+    }
+}
+
+
+Design Details:
+---------------
+
+Requirements R1a through R1e dictate that the memory holding the
+VM Generation ID must be allocated and owned by the guest firmware,
+in this case BIOS or UEFI.  However, to be useful, QEMU must be able to
+change the contents of the memory at runtime, specifically when starting a
+backed-up or snapshotted image.  In order to do this, QEMU must know the
+address that has been allocated.
+
+The mechanism chosen for this memory sharing is writeable fw_cfg blobs.
+These are data object that are visible to both QEMU and guests, and are
+addressable as sequential files.
+
+More information about fw_cfg can be found in "docs/specs/fw_cfg.txt"
+
+Two fw_cfg blobs are used in this case:
+
+/etc/vmgenid_guid - contains the actual VM Generation ID GUID
+                  - read-only to the guest
+/etc/vmgenid_addr - contains the address of the downloaded vmgenid blob
+                  - writeable by the guest
+
+
+QEMU sends the following commands to the guest at startup:
+
+1. Allocate memory for vmgenid_guid fw_cfg blob.
+2. Write the address of vmgenid_guid into the SSDT (VGIA ACPI variable as
+   shown above in the iasl dump).  Note that this change is not propagated
+   back to QEMU.
+3. Write the address of vmgenid_guid back to QEMU's copy of vmgenid_addr
+   via the fw_cfg DMA interface.
+
+After step 3, QEMU is able to update the contents of vmgenid_guid at will.
+
+Since BIOS or UEFI does not necessarily run when we wish to change the GUID,
+the value of VGIA is persisted via the VMState mechanism.
+
+As spelled out in the specification, any change to the GUID executes an
+ACPI notification.  The exact handler to use is not specified, so the vmgenid
+device uses the first unused one:  \_GPE._E05.
+
+
+Endian-ness Considerations:
+---------------------------
+
+Although not specified in Microsoft's document, it is assumed that the
+device is expected to use little-endian format.
+
+All GUID passed in via command line or monitor are treated as big-endian.
+GUID values displayed via monitor are shown in big-endian format.
+
+
+GUID Storage Format:
+--------------------
+
+In order to implement an OVMF "SDT Header Probe Suppressor", the contents of
+the vmgenid_guid fw_cfg blob are not simply a 128-bit GUID.  There is also
+significant padding in order to align and fill a memory page, as shown in the
+following diagram:
+
++----------------------------------+
+| SSDT with OEM Table ID = VMGENID |
++----------------------------------+
+| ...                              |       TOP OF PAGE
+| VGIA dword object ---------------|-----> +---------------------------+
+| ...                              |       | fw-allocated array for    |
+| _STA method referring to VGIA    |       | "etc/vmgenid_guid"        |
+| ...                              |       +---------------------------+
+| ADDR method referring to VGIA    |       |  0: OVMF SDT Header probe |
+| ...                              |       |     suppressor            |
++----------------------------------+       | 36: padding for 8-byte    |
+                                           |     alignment             |
+                                           | 40: GUID                  |
+                                           | 56: padding to page size  |
+                                           +---------------------------+
+                                           END OF PAGE
+
+
+Device Usage:
+-------------
+
+The device has one property, which may be only be set using the command line:
+
+  guid - sets the value of the GUID.  A special value "auto" instructs
+         QEMU to generate a new random GUID.
+
+For example:
+
+  QEMU  -device vmgenid,guid="324e6eaf-d1d1-4bf6-bf41-b9bb6c91fb87"
+  QEMU  -device vmgenid,guid=auto
+
+The property may be queried via QMP/HMP:
+
+  (QEMU) query-vm-generation-id
+  {"return": {"guid": "324e6eaf-d1d1-4bf6-bf41-b9bb6c91fb87"}}
+
+Setting of this parameter is intentionally left out from the QMP/HMP
+interfaces.  There are no known use cases for changing the GUID once QEMU is
+running, and adding this capability would greatly increase the complexity.
diff --git a/dtc b/dtc
-Subproject 65cc4d2748a2c2e6f27f1cf39e07a5dbabd80eb
+Subproject ec02b34c05be04f249ffaaca4b666f5246877de
diff --git a/exec.c b/exec.c
index 865a1e8295..785d20f648 100644
--- a/exec.c
+++ b/exec.c
@@ -45,6 +45,12 @@
 #include "exec/address-spaces.h"
 #include "sysemu/xen-mapcache.h"
 #include "trace-root.h"
+
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+#include <fcntl.h>
+#include <linux/falloc.h>
+#endif
+
 #endif
 #include "exec/cpu-all.h"
 #include "qemu/rcu_queue.h"
@@ -1518,6 +1524,19 @@ size_t qemu_ram_pagesize(RAMBlock *rb)
     return rb->page_size;
 }
 
+/* Returns the largest size of page in use */
+size_t qemu_ram_pagesize_largest(void)
+{
+    RAMBlock *block;
+    size_t largest = 0;
+
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+        largest = MAX(largest, qemu_ram_pagesize(block));
+    }
+
+    return largest;
+}
+
 static int memory_try_enable_merging(void *addr, size_t len)
 {
     if (!machine_mem_merge(current_machine)) {
@@ -2134,9 +2153,9 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
                 }
                 cpu->watchpoint_hit = wp;
 
-                /* The tb_lock will be reset when cpu_loop_exit or
-                 * cpu_loop_exit_noexc longjmp back into the cpu_exec
-                 * main loop.
+                /* Both tb_lock and iothread_mutex will be reset when
+                 * cpu_loop_exit or cpu_loop_exit_noexc longjmp
+                 * back into the cpu_exec main loop.
                  */
                 tb_lock();
                 tb_check_watchpoint(cpu);
@@ -2371,8 +2390,14 @@ static void io_mem_init(void)
     memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
     memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
                           NULL, UINT64_MAX);
+
+    /* io_mem_notdirty calls tb_invalidate_phys_page_fast,
+     * which can be called without the iothread mutex.
+     */
     memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
                           NULL, UINT64_MAX);
+    memory_region_clear_global_locking(&io_mem_notdirty);
+
     memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
                           NULL, UINT64_MAX);
 }
@@ -3288,4 +3313,68 @@ int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
     rcu_read_unlock();
     return ret;
 }
+
+/*
+ * Unmap pages of memory from start to start+length such that
+ * they a) read as 0, b) Trigger whatever fault mechanism
+ * the OS provides for postcopy.
+ * The pages must be unmapped by the end of the function.
+ * Returns: 0 on success, none-0 on failure
+ *
+ */
+int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
+{
+    int ret = -1;
+
+    uint8_t *host_startaddr = rb->host + start;
+
+    if ((uintptr_t)host_startaddr & (rb->page_size - 1)) {
+        error_report("ram_block_discard_range: Unaligned start address: %p",
+                     host_startaddr);
+        goto err;
+    }
+
+    if ((start + length) <= rb->used_length) {
+        uint8_t *host_endaddr = host_startaddr + length;
+        if ((uintptr_t)host_endaddr & (rb->page_size - 1)) {
+            error_report("ram_block_discard_range: Unaligned end address: %p",
+                         host_endaddr);
+            goto err;
+        }
+
+        errno = ENOTSUP; /* If we are missing MADVISE etc */
+
+        if (rb->page_size == qemu_host_page_size) {
+#if defined(CONFIG_MADVISE)
+            /* Note: We need the madvise MADV_DONTNEED behaviour of definitely
+             * freeing the page.
+             */
+            ret = madvise(host_startaddr, length, MADV_DONTNEED);
+#endif
+        } else {
+            /* Huge page case  - unfortunately it can't do DONTNEED, but
+             * it can do the equivalent by FALLOC_FL_PUNCH_HOLE in the
+             * huge page file.
+             */
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+            ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+                            start, length);
+#endif
+        }
+        if (ret) {
+            ret = -errno;
+            error_report("ram_block_discard_range: Failed to discard range "
+                         "%s:%" PRIx64 " +%zx (%d)",
+                         rb->idstr, start, length, ret);
+        }
+    } else {
+        error_report("ram_block_discard_range: Overrun block '%s' (%" PRIu64
+                     "/%zx/" RAM_ADDR_FMT")",
+                     rb->idstr, start, length, rb->used_length);
+    }
+
+err:
+    return ret;
+}
+
 #endif
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index c295f3183f..7af14e29aa 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -623,6 +623,9 @@ static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
     case float_round_down:
         roundIncrement = zSign ? 0x3ff : 0;
         break;
+    case float_round_to_odd:
+        roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
+        break;
     default:
         abort();
     }
@@ -632,8 +635,10 @@ static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
              || (    ( zExp == 0x7FD )
                   && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
            ) {
+            bool overflow_to_inf = roundingMode != float_round_to_odd &&
+                                   roundIncrement != 0;
             float_raise(float_flag_overflow | float_flag_inexact, status);
-            return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
+            return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
         }
         if ( zExp < 0 ) {
             if (status->flush_to_zero) {
@@ -651,6 +656,13 @@ static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
             if (isTiny && roundBits) {
                 float_raise(float_flag_underflow, status);
             }
+            if (roundingMode == float_round_to_odd) {
+                /*
+                 * For round-to-odd case, the roundIncrement depends on
+                 * zSig which just changed.
+                 */
+                roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
+            }
         }
     }
     if (roundBits) {
@@ -1149,6 +1161,9 @@ static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
     case float_round_down:
         increment = zSign && zSig2;
         break;
+    case float_round_to_odd:
+        increment = !(zSig1 & 0x1) && zSig2;
+        break;
     default:
         abort();
     }
@@ -1168,6 +1183,7 @@ static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
             if (    ( roundingMode == float_round_to_zero )
                  || ( zSign && ( roundingMode == float_round_up ) )
                  || ( ! zSign && ( roundingMode == float_round_down ) )
+                 || (roundingMode == float_round_to_odd)
                ) {
                 return
                     packFloat128(
@@ -1215,6 +1231,9 @@ static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
             case float_round_down:
                 increment = zSign && zSig2;
                 break;
+            case float_round_to_odd:
+                increment = !(zSig1 & 0x1) && zSig2;
+                break;
             default:
                 abort();
             }
@@ -6109,6 +6128,93 @@ int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
 }
 
 /*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point value
+| `a' to the 64-bit unsigned integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  If the conversion overflows, the
+| largest unsigned integer is returned.  If 'a' is negative, the value is
+| rounded and zero is returned; negative values that do not round to zero
+| will raise the inexact exception.
+*----------------------------------------------------------------------------*/
+
+uint64_t float128_to_uint64(float128 a, float_status *status)
+{
+    flag aSign;
+    int aExp;
+    int shiftCount;
+    uint64_t aSig0, aSig1;
+
+    aSig0 = extractFloat128Frac0(a);
+    aSig1 = extractFloat128Frac1(a);
+    aExp = extractFloat128Exp(a);
+    aSign = extractFloat128Sign(a);
+    if (aSign && (aExp > 0x3FFE)) {
+        float_raise(float_flag_invalid, status);
+        if (float128_is_any_nan(a)) {
+            return LIT64(0xFFFFFFFFFFFFFFFF);
+        } else {
+            return 0;
+        }
+    }
+    if (aExp) {
+        aSig0 |= LIT64(0x0001000000000000);
+    }
+    shiftCount = 0x402F - aExp;
+    if (shiftCount <= 0) {
+        if (0x403E < aExp) {
+            float_raise(float_flag_invalid, status);
+            return LIT64(0xFFFFFFFFFFFFFFFF);
+        }
+        shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
+    } else {
+        shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
+    }
+    return roundAndPackUint64(aSign, aSig0, aSig1, status);
+}
+
+uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
+{
+    uint64_t v;
+    signed char current_rounding_mode = status->float_rounding_mode;
+
+    set_float_rounding_mode(float_round_to_zero, status);
+    v = float128_to_uint64(a, status);
+    set_float_rounding_mode(current_rounding_mode, status);
+
+    return v;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 32-bit unsigned integer format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned.  Otherwise,
+| if the conversion overflows, the largest unsigned integer is returned.
+| If 'a' is negative, the value is rounded and zero is returned; negative
+| values that do not round to zero will raise the inexact exception.
+*----------------------------------------------------------------------------*/
+
+uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
+{
+    uint64_t v;
+    uint32_t res;
+    int old_exc_flags = get_float_exception_flags(status);
+
+    v = float128_to_uint64_round_to_zero(a, status);
+    if (v > 0xffffffff) {
+        res = 0xffffffff;
+    } else {
+        return v;
+    }
+    set_float_exception_flags(old_exc_flags, status);
+    float_raise(float_flag_invalid, status);
+    return res;
+}
+
+/*----------------------------------------------------------------------------
 | Returns the result of converting the quadruple-precision floating-point
 | value `a' to the single-precision floating-point format.  The conversion
 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
@@ -7386,7 +7492,7 @@ uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *status)
 {
     signed char current_rounding_mode = status->float_rounding_mode;
     set_float_rounding_mode(float_round_to_zero, status);
-    int64_t v = float64_to_uint64(a, status);
+    uint64_t v = float64_to_uint64(a, status);
     set_float_rounding_mode(current_rounding_mode, status);
     return v;
 }
diff --git a/fsdev/Makefile.objs b/fsdev/Makefile.objs
index 1b120a4a7d..659df6e187 100644
--- a/fsdev/Makefile.objs
+++ b/fsdev/Makefile.objs
@@ -5,7 +5,7 @@ common-obj-y = qemu-fsdev.o 9p-marshal.o 9p-iov-marshal.o
 else
 common-obj-y = qemu-fsdev-dummy.o
 endif
-common-obj-y += qemu-fsdev-opts.o
+common-obj-y += qemu-fsdev-opts.o qemu-fsdev-throttle.o
 
 # Toplevel always builds this; targets without virtio will put it in
 # common-obj-y
diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h
index a56dc8488d..0844a403dc 100644
--- a/fsdev/file-op-9p.h
+++ b/fsdev/file-op-9p.h
@@ -17,6 +17,7 @@
 #include <dirent.h>
 #include <utime.h>
 #include <sys/vfs.h>
+#include "qemu-fsdev-throttle.h"
 
 #define SM_LOCAL_MODE_BITS    0600
 #define SM_LOCAL_DIR_MODE_BITS    0700
@@ -74,6 +75,7 @@ typedef struct FsDriverEntry {
     char *path;
     int export_flags;
     FileOperations *ops;
+    FsThrottle fst;
 } FsDriverEntry;
 
 typedef struct FsContext
@@ -83,6 +85,7 @@ typedef struct FsContext
     int export_flags;
     struct xattr_operations **xops;
     struct extended_ops exops;
+    FsThrottle *fst;
     /* fs driver specific data */
     void *private;
 } FsContext;
diff --git a/fsdev/qemu-fsdev-opts.c b/fsdev/qemu-fsdev-opts.c
index 1dd8c7a24c..bf5713008a 100644
--- a/fsdev/qemu-fsdev-opts.c
+++ b/fsdev/qemu-fsdev-opts.c
@@ -9,6 +9,7 @@
 #include "qemu/config-file.h"
 #include "qemu/option.h"
 #include "qemu/module.h"
+#include "qemu/throttle-options.h"
 
 static QemuOptsList qemu_fsdev_opts = {
     .name = "fsdev",
@@ -39,6 +40,8 @@ static QemuOptsList qemu_fsdev_opts = {
             .type = QEMU_OPT_NUMBER,
         },
 
+        THROTTLE_OPTS,
+
         { /*End of list */ }
     },
 };
diff --git a/fsdev/qemu-fsdev-throttle.c b/fsdev/qemu-fsdev-throttle.c
new file mode 100644
index 0000000000..7ae4e86646
--- /dev/null
+++ b/fsdev/qemu-fsdev-throttle.c
@@ -0,0 +1,118 @@
+/*
+ * Fsdev Throttle
+ *
+ * Copyright (C) 2016 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.
+ *
+ * See the COPYING file in the top-level directory for details.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qemu-fsdev-throttle.h"
+#include "qemu/iov.h"
+
+static void fsdev_throttle_read_timer_cb(void *opaque)
+{
+    FsThrottle *fst = opaque;
+    qemu_co_enter_next(&fst->throttled_reqs[false]);
+}
+
+static void fsdev_throttle_write_timer_cb(void *opaque)
+{
+    FsThrottle *fst = opaque;
+    qemu_co_enter_next(&fst->throttled_reqs[true]);
+}
+
+void fsdev_throttle_parse_opts(QemuOpts *opts, FsThrottle *fst, Error **errp)
+{
+    throttle_config_init(&fst->cfg);
+    fst->cfg.buckets[THROTTLE_BPS_TOTAL].avg =
+        qemu_opt_get_number(opts, "throttling.bps-total", 0);
+    fst->cfg.buckets[THROTTLE_BPS_READ].avg  =
+        qemu_opt_get_number(opts, "throttling.bps-read", 0);
+    fst->cfg.buckets[THROTTLE_BPS_WRITE].avg =
+        qemu_opt_get_number(opts, "throttling.bps-write", 0);
+    fst->cfg.buckets[THROTTLE_OPS_TOTAL].avg =
+        qemu_opt_get_number(opts, "throttling.iops-total", 0);
+    fst->cfg.buckets[THROTTLE_OPS_READ].avg =
+        qemu_opt_get_number(opts, "throttling.iops-read", 0);
+    fst->cfg.buckets[THROTTLE_OPS_WRITE].avg =
+        qemu_opt_get_number(opts, "throttling.iops-write", 0);
+
+    fst->cfg.buckets[THROTTLE_BPS_TOTAL].max =
+        qemu_opt_get_number(opts, "throttling.bps-total-max", 0);
+    fst->cfg.buckets[THROTTLE_BPS_READ].max  =
+        qemu_opt_get_number(opts, "throttling.bps-read-max", 0);
+    fst->cfg.buckets[THROTTLE_BPS_WRITE].max =
+        qemu_opt_get_number(opts, "throttling.bps-write-max", 0);
+    fst->cfg.buckets[THROTTLE_OPS_TOTAL].max =
+        qemu_opt_get_number(opts, "throttling.iops-total-max", 0);
+    fst->cfg.buckets[THROTTLE_OPS_READ].max =
+        qemu_opt_get_number(opts, "throttling.iops-read-max", 0);
+    fst->cfg.buckets[THROTTLE_OPS_WRITE].max =
+        qemu_opt_get_number(opts, "throttling.iops-write-max", 0);
+
+    fst->cfg.buckets[THROTTLE_BPS_TOTAL].burst_length =
+        qemu_opt_get_number(opts, "throttling.bps-total-max-length", 1);
+    fst->cfg.buckets[THROTTLE_BPS_READ].burst_length  =
+        qemu_opt_get_number(opts, "throttling.bps-read-max-length", 1);
+    fst->cfg.buckets[THROTTLE_BPS_WRITE].burst_length =
+        qemu_opt_get_number(opts, "throttling.bps-write-max-length", 1);
+    fst->cfg.buckets[THROTTLE_OPS_TOTAL].burst_length =
+        qemu_opt_get_number(opts, "throttling.iops-total-max-length", 1);
+    fst->cfg.buckets[THROTTLE_OPS_READ].burst_length =
+        qemu_opt_get_number(opts, "throttling.iops-read-max-length", 1);
+    fst->cfg.buckets[THROTTLE_OPS_WRITE].burst_length =
+        qemu_opt_get_number(opts, "throttling.iops-write-max-length", 1);
+    fst->cfg.op_size =
+        qemu_opt_get_number(opts, "throttling.iops-size", 0);
+
+    throttle_is_valid(&fst->cfg, errp);
+}
+
+void fsdev_throttle_init(FsThrottle *fst)
+{
+    if (throttle_enabled(&fst->cfg)) {
+        throttle_init(&fst->ts);
+        throttle_timers_init(&fst->tt,
+                             qemu_get_aio_context(),
+                             QEMU_CLOCK_REALTIME,
+                             fsdev_throttle_read_timer_cb,
+                             fsdev_throttle_write_timer_cb,
+                             fst);
+        throttle_config(&fst->ts, &fst->tt, &fst->cfg);
+        qemu_co_queue_init(&fst->throttled_reqs[0]);
+        qemu_co_queue_init(&fst->throttled_reqs[1]);
+    }
+}
+
+void coroutine_fn fsdev_co_throttle_request(FsThrottle *fst, bool is_write,
+                                            struct iovec *iov, int iovcnt)
+{
+    if (throttle_enabled(&fst->cfg)) {
+        if (throttle_schedule_timer(&fst->ts, &fst->tt, is_write) ||
+            !qemu_co_queue_empty(&fst->throttled_reqs[is_write])) {
+            qemu_co_queue_wait(&fst->throttled_reqs[is_write], NULL);
+        }
+
+        throttle_account(&fst->ts, is_write, iov_size(iov, iovcnt));
+
+        if (!qemu_co_queue_empty(&fst->throttled_reqs[is_write]) &&
+            !throttle_schedule_timer(&fst->ts, &fst->tt, is_write)) {
+            qemu_co_queue_next(&fst->throttled_reqs[is_write]);
+        }
+    }
+}
+
+void fsdev_throttle_cleanup(FsThrottle *fst)
+{
+    if (throttle_enabled(&fst->cfg)) {
+        throttle_timers_destroy(&fst->tt);
+    }
+}
diff --git a/fsdev/qemu-fsdev-throttle.h b/fsdev/qemu-fsdev-throttle.h
new file mode 100644
index 0000000000..e418643ccb
--- /dev/null
+++ b/fsdev/qemu-fsdev-throttle.h
@@ -0,0 +1,39 @@
+/*
+ * Fsdev Throttle
+ *
+ * Copyright (C) 2016 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Pradeep Jagadeesh <pradeep.jagadeesh@huawei.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.
+ *
+ * See the COPYING file in the top-level directory for details.
+ *
+ */
+
+#ifndef _FSDEV_THROTTLE_H
+#define _FSDEV_THROTTLE_H
+
+#include "block/aio.h"
+#include "qemu/main-loop.h"
+#include "qemu/coroutine.h"
+#include "qapi/error.h"
+#include "qemu/throttle.h"
+
+typedef struct FsThrottle {
+    ThrottleState ts;
+    ThrottleTimers tt;
+    ThrottleConfig cfg;
+    CoQueue      throttled_reqs[2];
+} FsThrottle;
+
+void fsdev_throttle_parse_opts(QemuOpts *, FsThrottle *, Error **);
+
+void fsdev_throttle_init(FsThrottle *);
+
+void coroutine_fn fsdev_co_throttle_request(FsThrottle *, bool ,
+                                            struct iovec *, int);
+
+void fsdev_throttle_cleanup(FsThrottle *);
+#endif /* _FSDEV_THROTTLE_H */
diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
index b0f35e6829..a53f105c52 100644
--- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx
@@ -802,6 +802,20 @@ Show information about hotpluggable CPUs
 ETEXI
 
 STEXI
+@item info vm-generation-id
+@findex vm-generation-id
+Show Virtual Machine Generation ID
+ETEXI
+
+    {
+        .name       = "vm-generation-id",
+        .args_type  = "",
+        .params     = "",
+        .help       = "Show Virtual Machine Generation ID",
+        .cmd = hmp_info_vm_generation_id,
+    },
+
+STEXI
 @end table
 ETEXI
 
diff --git a/hmp.c b/hmp.c
index 2bc4f062bb..261843f7a2 100644
--- a/hmp.c
+++ b/hmp.c
@@ -1014,8 +1014,14 @@ void hmp_memsave(Monitor *mon, const QDict *qdict)
     const char *filename = qdict_get_str(qdict, "filename");
     uint64_t addr = qdict_get_int(qdict, "val");
     Error *err = NULL;
+    int cpu_index = monitor_get_cpu_index();
 
-    qmp_memsave(addr, size, filename, true, monitor_get_cpu_index(), &err);
+    if (cpu_index < 0) {
+        monitor_printf(mon, "No CPU available\n");
+        return;
+    }
+
+    qmp_memsave(addr, size, filename, true, cpu_index, &err);
     hmp_handle_error(mon, &err);
 }
 
@@ -1338,12 +1344,11 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
 {
     const char *param = qdict_get_str(qdict, "parameter");
     const char *valuestr = qdict_get_str(qdict, "value");
-    int64_t valuebw = 0;
+    uint64_t valuebw = 0;
     long valueint = 0;
-    char *endp;
     Error *err = NULL;
     bool use_int_value = false;
-    int i;
+    int i, ret;
 
     for (i = 0; i < MIGRATION_PARAMETER__MAX; i++) {
         if (strcmp(param, MigrationParameter_lookup[i]) == 0) {
@@ -1379,9 +1384,9 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
                 break;
             case MIGRATION_PARAMETER_MAX_BANDWIDTH:
                 p.has_max_bandwidth = true;
-                valuebw = qemu_strtosz(valuestr, &endp);
-                if (valuebw < 0 || (size_t)valuebw != valuebw
-                    || *endp != '\0') {
+                ret = qemu_strtosz_MiB(valuestr, NULL, &valuebw);
+                if (ret < 0 || valuebw > INT64_MAX
+                    || (size_t)valuebw != valuebw) {
                     error_setg(&err, "Invalid size %s", valuestr);
                     goto cleanup;
                 }
@@ -1552,6 +1557,7 @@ void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict)
 {
     Error *err = NULL;
     BlockIOThrottle throttle = {
+        .has_device = true,
         .device = (char *) qdict_get_str(qdict, "device"),
         .bps = qdict_get_int(qdict, "bps"),
         .bps_rd = qdict_get_int(qdict, "bps_rd"),
@@ -2039,13 +2045,17 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
     const char* device = qdict_get_str(qdict, "device");
     const char* command = qdict_get_str(qdict, "command");
     Error *err = NULL;
+    int ret;
 
     blk = blk_by_name(device);
     if (!blk) {
         BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err);
         if (bs) {
-            blk = local_blk = blk_new();
-            blk_insert_bs(blk, bs);
+            blk = local_blk = blk_new(0, BLK_PERM_ALL);
+            ret = blk_insert_bs(blk, bs, &err);
+            if (ret < 0) {
+                goto fail;
+            }
         } else {
             goto fail;
         }
@@ -2054,6 +2064,31 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
     aio_context = blk_get_aio_context(blk);
     aio_context_acquire(aio_context);
 
+    /*
+     * Notably absent: Proper permission management. This is sad, but it seems
+     * almost impossible to achieve without changing the semantics and thereby
+     * limiting the use cases of the qemu-io HMP command.
+     *
+     * In an ideal world we would unconditionally create a new BlockBackend for
+     * qemuio_command(), but we have commands like 'reopen' and want them to
+     * take effect on the exact BlockBackend whose name the user passed instead
+     * of just on a temporary copy of it.
+     *
+     * Another problem is that deleting the temporary BlockBackend involves
+     * draining all requests on it first, but some qemu-iotests cases want to
+     * issue multiple aio_read/write requests and expect them to complete in
+     * the background while the monitor has already returned.
+     *
+     * This is also what prevents us from saving the original permissions and
+     * restoring them later: We can't revoke permissions until all requests
+     * have completed, and we don't know when that is nor can we really let
+     * anything else run before we have revoken them to avoid race conditions.
+     *
+     * What happens now is that command() in qemu-io-cmds.c can extend the
+     * permissions if necessary for the qemu-io command. And they simply stay
+     * extended, possibly resulting in a read-only guest device keeping write
+     * permissions. Ugly, but it appears to be the lesser evil.
+     */
     qemuio_command(blk, command);
 
     aio_context_release(aio_context);
@@ -2148,10 +2183,15 @@ void hmp_info_iothreads(Monitor *mon, const QDict *qdict)
 {
     IOThreadInfoList *info_list = qmp_query_iothreads(NULL);
     IOThreadInfoList *info;
+    IOThreadInfo *value;
 
     for (info = info_list; info; info = info->next) {
-        monitor_printf(mon, "%s: thread_id=%" PRId64 "\n",
-                       info->value->id, info->value->thread_id);
+        value = info->value;
+        monitor_printf(mon, "%s:\n", value->id);
+        monitor_printf(mon, "  thread_id=%" PRId64 "\n", value->thread_id);
+        monitor_printf(mon, "  poll-max-ns=%" PRId64 "\n", value->poll_max_ns);
+        monitor_printf(mon, "  poll-grow=%" PRId64 "\n", value->poll_grow);
+        monitor_printf(mon, "  poll-shrink=%" PRId64 "\n", value->poll_shrink);
     }
 
     qapi_free_IOThreadInfoList(info_list);
@@ -2565,3 +2605,12 @@ void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict)
 
     qapi_free_HotpluggableCPUList(saved);
 }
+
+void hmp_info_vm_generation_id(Monitor *mon, const QDict *qdict)
+{
+    GuidInfo *info = qmp_query_vm_generation_id(NULL);
+    if (info) {
+        monitor_printf(mon, "%s\n", info->guid);
+    }
+    qapi_free_GuidInfo(info);
+}
diff --git a/hmp.h b/hmp.h
index 05daf7cd5c..799fd371fa 100644
--- a/hmp.h
+++ b/hmp.h
@@ -137,5 +137,6 @@ void hmp_rocker_of_dpa_flows(Monitor *mon, const QDict *qdict);
 void hmp_rocker_of_dpa_groups(Monitor *mon, const QDict *qdict);
 void hmp_info_dump(Monitor *mon, const QDict *qdict);
 void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict);
+void hmp_info_vm_generation_id(Monitor *mon, const QDict *qdict);
 
 #endif
diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
index 7de07e1ba6..f22a3c3654 100644
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@@ -13,7 +13,9 @@
 
 #include "qemu/osdep.h"
 #include "9p.h"
+#include "9p-local.h"
 #include "9p-xattr.h"
+#include "9p-util.h"
 #include "fsdev/qemu-fsdev.h"   /* local_ops */
 #include <arpa/inet.h>
 #include <pwd.h>
@@ -43,40 +45,62 @@
 #define BTRFS_SUPER_MAGIC 0x9123683E
 #endif
 
-#define VIRTFS_META_DIR ".virtfs_metadata"
+typedef struct {
+    int mountfd;
+} LocalData;
 
-static char *local_mapped_attr_path(FsContext *ctx, const char *path)
+int local_open_nofollow(FsContext *fs_ctx, const char *path, int flags,
+                        mode_t mode)
 {
-    int dirlen;
-    const char *name = strrchr(path, '/');
-    if (name) {
-        dirlen = name - path;
-        ++name;
-    } else {
-        name = path;
-        dirlen = 0;
+    LocalData *data = fs_ctx->private;
+
+    /* All paths are relative to the path data->mountfd points to */
+    while (*path == '/') {
+        path++;
     }
-    return g_strdup_printf("%s/%.*s/%s/%s", ctx->fs_root,
-                           dirlen, path, VIRTFS_META_DIR, name);
+
+    return relative_openat_nofollow(data->mountfd, path, flags, mode);
+}
+
+int local_opendir_nofollow(FsContext *fs_ctx, const char *path)
+{
+    return local_open_nofollow(fs_ctx, path, O_DIRECTORY | O_RDONLY, 0);
+}
+
+static void renameat_preserve_errno(int odirfd, const char *opath, int ndirfd,
+                                    const char *npath)
+{
+    int serrno = errno;
+    renameat(odirfd, opath, ndirfd, npath);
+    errno = serrno;
 }
 
-static FILE *local_fopen(const char *path, const char *mode)
+static void unlinkat_preserve_errno(int dirfd, const char *path, int flags)
+{
+    int serrno = errno;
+    unlinkat(dirfd, path, flags);
+    errno = serrno;
+}
+
+#define VIRTFS_META_DIR ".virtfs_metadata"
+
+static FILE *local_fopenat(int dirfd, const char *name, const char *mode)
 {
     int fd, o_mode = 0;
     FILE *fp;
-    int flags = O_NOFOLLOW;
+    int flags;
     /*
      * only supports two modes
      */
     if (mode[0] == 'r') {
-        flags |= O_RDONLY;
+        flags = O_RDONLY;
     } else if (mode[0] == 'w') {
-        flags |= O_WRONLY | O_TRUNC | O_CREAT;
+        flags = O_WRONLY | O_TRUNC | O_CREAT;
         o_mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
     } else {
         return NULL;
     }
-    fd = open(path, flags, o_mode);
+    fd = openat_file(dirfd, name, flags, o_mode);
     if (fd == -1) {
         return NULL;
     }
@@ -88,16 +112,20 @@ static FILE *local_fopen(const char *path, const char *mode)
 }
 
 #define ATTR_MAX 100
-static void local_mapped_file_attr(FsContext *ctx, const char *path,
+static void local_mapped_file_attr(int dirfd, const char *name,
                                    struct stat *stbuf)
 {
     FILE *fp;
     char buf[ATTR_MAX];
-    char *attr_path;
+    int map_dirfd;
 
-    attr_path = local_mapped_attr_path(ctx, path);
-    fp = local_fopen(attr_path, "r");
-    g_free(attr_path);
+    map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR);
+    if (map_dirfd == -1) {
+        return;
+    }
+
+    fp = local_fopenat(map_dirfd, name, "r");
+    close_preserve_errno(map_dirfd);
     if (!fp) {
         return;
     }
@@ -119,12 +147,17 @@ static void local_mapped_file_attr(FsContext *ctx, const char *path,
 
 static int local_lstat(FsContext *fs_ctx, V9fsPath *fs_path, struct stat *stbuf)
 {
-    int err;
-    char *buffer;
-    char *path = fs_path->data;
+    int err = -1;
+    char *dirpath = g_path_get_dirname(fs_path->data);
+    char *name = g_path_get_basename(fs_path->data);
+    int dirfd;
 
-    buffer = rpath(fs_ctx, path);
-    err =  lstat(buffer, stbuf);
+    dirfd = local_opendir_nofollow(fs_ctx, dirpath);
+    if (dirfd == -1) {
+        goto out;
+    }
+
+    err = fstatat(dirfd, name, stbuf, AT_SYMLINK_NOFOLLOW);
     if (err) {
         goto err_out;
     }
@@ -134,87 +167,83 @@ static int local_lstat(FsContext *fs_ctx, V9fsPath *fs_path, struct stat *stbuf)
         gid_t tmp_gid;
         mode_t tmp_mode;
         dev_t tmp_dev;
-        if (getxattr(buffer, "user.virtfs.uid", &tmp_uid, sizeof(uid_t)) > 0) {
+
+        if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.uid", &tmp_uid,
+                                 sizeof(uid_t)) > 0) {
             stbuf->st_uid = le32_to_cpu(tmp_uid);
         }
-        if (getxattr(buffer, "user.virtfs.gid", &tmp_gid, sizeof(gid_t)) > 0) {
+        if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.gid", &tmp_gid,
+                                 sizeof(gid_t)) > 0) {
             stbuf->st_gid = le32_to_cpu(tmp_gid);
         }
-        if (getxattr(buffer, "user.virtfs.mode",
-                    &tmp_mode, sizeof(mode_t)) > 0) {
+        if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.mode", &tmp_mode,
+                                 sizeof(mode_t)) > 0) {
             stbuf->st_mode = le32_to_cpu(tmp_mode);
         }
-        if (getxattr(buffer, "user.virtfs.rdev", &tmp_dev, sizeof(dev_t)) > 0) {
+        if (fgetxattrat_nofollow(dirfd, name, "user.virtfs.rdev", &tmp_dev,
+                                 sizeof(dev_t)) > 0) {
             stbuf->st_rdev = le64_to_cpu(tmp_dev);
         }
     } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        local_mapped_file_attr(fs_ctx, path, stbuf);
+        local_mapped_file_attr(dirfd, name, stbuf);
     }
 
 err_out:
-    g_free(buffer);
-    return err;
-}
-
-static int local_create_mapped_attr_dir(FsContext *ctx, const char *path)
-{
-    int err;
-    char *attr_dir;
-    char *tmp_path = g_strdup(path);
-
-    attr_dir = g_strdup_printf("%s/%s/%s",
-             ctx->fs_root, dirname(tmp_path), VIRTFS_META_DIR);
-
-    err = mkdir(attr_dir, 0700);
-    if (err < 0 && errno == EEXIST) {
-        err = 0;
-    }
-    g_free(attr_dir);
-    g_free(tmp_path);
+    close_preserve_errno(dirfd);
+out:
+    g_free(name);
+    g_free(dirpath);
     return err;
 }
 
-static int local_set_mapped_file_attr(FsContext *ctx,
-                                      const char *path, FsCred *credp)
+static int local_set_mapped_file_attrat(int dirfd, const char *name,
+                                        FsCred *credp)
 {
     FILE *fp;
-    int ret = 0;
+    int ret;
     char buf[ATTR_MAX];
-    char *attr_path;
     int uid = -1, gid = -1, mode = -1, rdev = -1;
+    int map_dirfd;
+
+    ret = mkdirat(dirfd, VIRTFS_META_DIR, 0700);
+    if (ret < 0 && errno != EEXIST) {
+        return -1;
+    }
 
-    attr_path = local_mapped_attr_path(ctx, path);
-    fp = local_fopen(attr_path, "r");
+    map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR);
+    if (map_dirfd == -1) {
+        return -1;
+    }
+
+    fp = local_fopenat(map_dirfd, name, "r");
     if (!fp) {
-        goto create_map_file;
+        if (errno == ENOENT) {
+            goto update_map_file;
+        } else {
+            close_preserve_errno(map_dirfd);
+            return -1;
+        }
     }
     memset(buf, 0, ATTR_MAX);
     while (fgets(buf, ATTR_MAX, fp)) {
         if (!strncmp(buf, "virtfs.uid", 10)) {
-            uid = atoi(buf+11);
+            uid = atoi(buf + 11);
         } else if (!strncmp(buf, "virtfs.gid", 10)) {
-            gid = atoi(buf+11);
+            gid = atoi(buf + 11);
         } else if (!strncmp(buf, "virtfs.mode", 11)) {
-            mode = atoi(buf+12);
+            mode = atoi(buf + 12);
         } else if (!strncmp(buf, "virtfs.rdev", 11)) {
-            rdev = atoi(buf+12);
+            rdev = atoi(buf + 12);
         }
         memset(buf, 0, ATTR_MAX);
     }
     fclose(fp);
-    goto update_map_file;
-
-create_map_file:
-    ret = local_create_mapped_attr_dir(ctx, path);
-    if (ret < 0) {
-        goto err_out;
-    }
 
 update_map_file:
-    fp = local_fopen(attr_path, "w");
+    fp = local_fopenat(map_dirfd, name, "w");
+    close_preserve_errno(map_dirfd);
     if (!fp) {
-        ret = -1;
-        goto err_out;
+        return -1;
     }
 
     if (credp->fc_uid != -1) {
@@ -230,7 +259,6 @@ update_map_file:
         rdev = credp->fc_rdev;
     }
 
-
     if (uid != -1) {
         fprintf(fp, "virtfs.uid=%d\n", uid);
     }
@@ -245,39 +273,71 @@ update_map_file:
     }
     fclose(fp);
 
-err_out:
-    g_free(attr_path);
+    return 0;
+}
+
+static int fchmodat_nofollow(int dirfd, const char *name, mode_t mode)
+{
+    int fd, ret;
+
+    /* FIXME: this should be handled with fchmodat(AT_SYMLINK_NOFOLLOW).
+     * Unfortunately, the linux kernel doesn't implement it yet. As an
+     * alternative, let's open the file and use fchmod() instead. This
+     * may fail depending on the permissions of the file, but it is the
+     * best we can do to avoid TOCTTOU. We first try to open read-only
+     * in case name points to a directory. If that fails, we try write-only
+     * in case name doesn't point to a directory.
+     */
+    fd = openat_file(dirfd, name, O_RDONLY, 0);
+    if (fd == -1) {
+        /* In case the file is writable-only and isn't a directory. */
+        if (errno == EACCES) {
+            fd = openat_file(dirfd, name, O_WRONLY, 0);
+        }
+        if (fd == -1 && errno == EISDIR) {
+            errno = EACCES;
+        }
+    }
+    if (fd == -1) {
+        return -1;
+    }
+    ret = fchmod(fd, mode);
+    close_preserve_errno(fd);
     return ret;
 }
 
-static int local_set_xattr(const char *path, FsCred *credp)
+static int local_set_xattrat(int dirfd, const char *path, FsCred *credp)
 {
     int err;
 
     if (credp->fc_uid != -1) {
         uint32_t tmp_uid = cpu_to_le32(credp->fc_uid);
-        err = setxattr(path, "user.virtfs.uid", &tmp_uid, sizeof(uid_t), 0);
+        err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.uid", &tmp_uid,
+                                   sizeof(uid_t), 0);
         if (err) {
             return err;
         }
     }
     if (credp->fc_gid != -1) {
         uint32_t tmp_gid = cpu_to_le32(credp->fc_gid);
-        err = setxattr(path, "user.virtfs.gid", &tmp_gid, sizeof(gid_t), 0);
+        err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.gid", &tmp_gid,
+                                   sizeof(gid_t), 0);
         if (err) {
             return err;
         }
     }
     if (credp->fc_mode != -1) {
         uint32_t tmp_mode = cpu_to_le32(credp->fc_mode);
-        err = setxattr(path, "user.virtfs.mode", &tmp_mode, sizeof(mode_t), 0);
+        err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.mode", &tmp_mode,
+                                   sizeof(mode_t), 0);
         if (err) {
             return err;
         }
     }
     if (credp->fc_rdev != -1) {
         uint64_t tmp_rdev = cpu_to_le64(credp->fc_rdev);
-        err = setxattr(path, "user.virtfs.rdev", &tmp_rdev, sizeof(dev_t), 0);
+        err = fsetxattrat_nofollow(dirfd, path, "user.virtfs.rdev", &tmp_rdev,
+                                   sizeof(dev_t), 0);
         if (err) {
             return err;
         }
@@ -285,58 +345,56 @@ static int local_set_xattr(const char *path, FsCred *credp)
     return 0;
 }
 
-static int local_post_create_passthrough(FsContext *fs_ctx, const char *path,
-                                         FsCred *credp)
+static int local_set_cred_passthrough(FsContext *fs_ctx, int dirfd,
+                                      const char *name, FsCred *credp)
 {
-    char *buffer;
-
-    buffer = rpath(fs_ctx, path);
-    if (lchown(buffer, credp->fc_uid, credp->fc_gid) < 0) {
+    if (fchownat(dirfd, name, credp->fc_uid, credp->fc_gid,
+                 AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH) < 0) {
         /*
          * If we fail to change ownership and if we are
          * using security model none. Ignore the error
          */
         if ((fs_ctx->export_flags & V9FS_SEC_MASK) != V9FS_SM_NONE) {
-            goto err;
+            return -1;
         }
     }
 
-    if (chmod(buffer, credp->fc_mode & 07777) < 0) {
-        goto err;
-    }
-
-    g_free(buffer);
-    return 0;
-err:
-    g_free(buffer);
-    return -1;
+    return fchmodat_nofollow(dirfd, name, credp->fc_mode & 07777);
 }
 
 static ssize_t local_readlink(FsContext *fs_ctx, V9fsPath *fs_path,
                               char *buf, size_t bufsz)
 {
     ssize_t tsize = -1;
-    char *buffer;
-    char *path = fs_path->data;
 
     if ((fs_ctx->export_flags & V9FS_SM_MAPPED) ||
         (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE)) {
         int fd;
-        buffer = rpath(fs_ctx, path);
-        fd = open(buffer, O_RDONLY | O_NOFOLLOW);
-        g_free(buffer);
+
+        fd = local_open_nofollow(fs_ctx, fs_path->data, O_RDONLY, 0);
         if (fd == -1) {
             return -1;
         }
         do {
             tsize = read(fd, (void *)buf, bufsz);
         } while (tsize == -1 && errno == EINTR);
-        close(fd);
+        close_preserve_errno(fd);
     } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) ||
                (fs_ctx->export_flags & V9FS_SM_NONE)) {
-        buffer = rpath(fs_ctx, path);
-        tsize = readlink(buffer, buf, bufsz);
-        g_free(buffer);
+        char *dirpath = g_path_get_dirname(fs_path->data);
+        char *name = g_path_get_basename(fs_path->data);
+        int dirfd;
+
+        dirfd = local_opendir_nofollow(fs_ctx, dirpath);
+        if (dirfd == -1) {
+            goto out;
+        }
+
+        tsize = readlinkat(dirfd, name, buf, bufsz);
+        close_preserve_errno(dirfd);
+    out:
+        g_free(name);
+        g_free(dirpath);
     }
     return tsize;
 }
@@ -354,27 +412,32 @@ static int local_closedir(FsContext *ctx, V9fsFidOpenState *fs)
 static int local_open(FsContext *ctx, V9fsPath *fs_path,
                       int flags, V9fsFidOpenState *fs)
 {
-    char *buffer;
-    char *path = fs_path->data;
+    int fd;
 
-    buffer = rpath(ctx, path);
-    fs->fd = open(buffer, flags | O_NOFOLLOW);
-    g_free(buffer);
+    fd = local_open_nofollow(ctx, fs_path->data, flags, 0);
+    if (fd == -1) {
+        return -1;
+    }
+    fs->fd = fd;
     return fs->fd;
 }
 
 static int local_opendir(FsContext *ctx,
                          V9fsPath *fs_path, V9fsFidOpenState *fs)
 {
-    char *buffer;
-    char *path = fs_path->data;
+    int dirfd;
+    DIR *stream;
+
+    dirfd = local_opendir_nofollow(ctx, fs_path->data);
+    if (dirfd == -1) {
+        return -1;
+    }
 
-    buffer = rpath(ctx, path);
-    fs->dir.stream = opendir(buffer);
-    g_free(buffer);
-    if (!fs->dir.stream) {
+    stream = fdopendir(dirfd);
+    if (!stream) {
         return -1;
     }
+    fs->dir.stream = stream;
     return 0;
 }
 
@@ -463,145 +526,122 @@ static ssize_t local_pwritev(FsContext *ctx, V9fsFidOpenState *fs,
 
 static int local_chmod(FsContext *fs_ctx, V9fsPath *fs_path, FsCred *credp)
 {
-    char *buffer;
+    char *dirpath = g_path_get_dirname(fs_path->data);
+    char *name = g_path_get_basename(fs_path->data);
     int ret = -1;
-    char *path = fs_path->data;
+    int dirfd;
+
+    dirfd = local_opendir_nofollow(fs_ctx, dirpath);
+    if (dirfd == -1) {
+        goto out;
+    }
 
     if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
-        buffer = rpath(fs_ctx, path);
-        ret = local_set_xattr(buffer, credp);
-        g_free(buffer);
+        ret = local_set_xattrat(dirfd, name, credp);
     } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        return local_set_mapped_file_attr(fs_ctx, path, credp);
-    } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) ||
-               (fs_ctx->export_flags & V9FS_SM_NONE)) {
-        buffer = rpath(fs_ctx, path);
-        ret = chmod(buffer, credp->fc_mode);
-        g_free(buffer);
+        ret = local_set_mapped_file_attrat(dirfd, name, credp);
+    } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH ||
+               fs_ctx->export_flags & V9FS_SM_NONE) {
+        ret = fchmodat_nofollow(dirfd, name, credp->fc_mode);
     }
+    close_preserve_errno(dirfd);
+
+out:
+    g_free(dirpath);
+    g_free(name);
     return ret;
 }
 
 static int local_mknod(FsContext *fs_ctx, V9fsPath *dir_path,
                        const char *name, FsCred *credp)
 {
-    char *path;
     int err = -1;
-    int serrno = 0;
-    V9fsString fullname;
-    char *buffer = NULL;
+    int dirfd;
 
-    v9fs_string_init(&fullname);
-    v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name);
-    path = fullname.data;
+    dirfd = local_opendir_nofollow(fs_ctx, dir_path->data);
+    if (dirfd == -1) {
+        return -1;
+    }
 
-    /* Determine the security model */
-    if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
-        buffer = rpath(fs_ctx, path);
-        err = mknod(buffer, SM_LOCAL_MODE_BITS|S_IFREG, 0);
+    if (fs_ctx->export_flags & V9FS_SM_MAPPED ||
+        fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
+        err = mknodat(dirfd, name, SM_LOCAL_MODE_BITS | S_IFREG, 0);
         if (err == -1) {
             goto out;
         }
-        err = local_set_xattr(buffer, credp);
-        if (err == -1) {
-            serrno = errno;
-            goto err_end;
-        }
-    } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
 
-        buffer = rpath(fs_ctx, path);
-        err = mknod(buffer, SM_LOCAL_MODE_BITS|S_IFREG, 0);
-        if (err == -1) {
-            goto out;
+        if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
+            err = local_set_xattrat(dirfd, name, credp);
+        } else {
+            err = local_set_mapped_file_attrat(dirfd, name, credp);
         }
-        err = local_set_mapped_file_attr(fs_ctx, path, credp);
         if (err == -1) {
-            serrno = errno;
             goto err_end;
         }
-    } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) ||
-               (fs_ctx->export_flags & V9FS_SM_NONE)) {
-        buffer = rpath(fs_ctx, path);
-        err = mknod(buffer, credp->fc_mode, credp->fc_rdev);
+    } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH ||
+               fs_ctx->export_flags & V9FS_SM_NONE) {
+        err = mknodat(dirfd, name, credp->fc_mode, credp->fc_rdev);
         if (err == -1) {
             goto out;
         }
-        err = local_post_create_passthrough(fs_ctx, path, credp);
+        err = local_set_cred_passthrough(fs_ctx, dirfd, name, credp);
         if (err == -1) {
-            serrno = errno;
             goto err_end;
         }
     }
     goto out;
 
 err_end:
-    remove(buffer);
-    errno = serrno;
+    unlinkat_preserve_errno(dirfd, name, 0);
 out:
-    g_free(buffer);
-    v9fs_string_free(&fullname);
+    close_preserve_errno(dirfd);
     return err;
 }
 
 static int local_mkdir(FsContext *fs_ctx, V9fsPath *dir_path,
                        const char *name, FsCred *credp)
 {
-    char *path;
     int err = -1;
-    int serrno = 0;
-    V9fsString fullname;
-    char *buffer = NULL;
+    int dirfd;
 
-    v9fs_string_init(&fullname);
-    v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name);
-    path = fullname.data;
+    dirfd = local_opendir_nofollow(fs_ctx, dir_path->data);
+    if (dirfd == -1) {
+        return -1;
+    }
 
-    /* Determine the security model */
-    if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
-        buffer = rpath(fs_ctx, path);
-        err = mkdir(buffer, SM_LOCAL_DIR_MODE_BITS);
+    if (fs_ctx->export_flags & V9FS_SM_MAPPED ||
+        fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
+        err = mkdirat(dirfd, name, SM_LOCAL_DIR_MODE_BITS);
         if (err == -1) {
             goto out;
         }
-        credp->fc_mode = credp->fc_mode|S_IFDIR;
-        err = local_set_xattr(buffer, credp);
-        if (err == -1) {
-            serrno = errno;
-            goto err_end;
-        }
-    } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        buffer = rpath(fs_ctx, path);
-        err = mkdir(buffer, SM_LOCAL_DIR_MODE_BITS);
-        if (err == -1) {
-            goto out;
+        credp->fc_mode = credp->fc_mode | S_IFDIR;
+
+        if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
+            err = local_set_xattrat(dirfd, name, credp);
+        } else {
+            err = local_set_mapped_file_attrat(dirfd, name, credp);
         }
-        credp->fc_mode = credp->fc_mode|S_IFDIR;
-        err = local_set_mapped_file_attr(fs_ctx, path, credp);
         if (err == -1) {
-            serrno = errno;
             goto err_end;
         }
-    } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) ||
-               (fs_ctx->export_flags & V9FS_SM_NONE)) {
-        buffer = rpath(fs_ctx, path);
-        err = mkdir(buffer, credp->fc_mode);
+    } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH ||
+               fs_ctx->export_flags & V9FS_SM_NONE) {
+        err = mkdirat(dirfd, name, credp->fc_mode);
         if (err == -1) {
             goto out;
         }
-        err = local_post_create_passthrough(fs_ctx, path, credp);
+        err = local_set_cred_passthrough(fs_ctx, dirfd, name, credp);
         if (err == -1) {
-            serrno = errno;
             goto err_end;
         }
     }
     goto out;
 
 err_end:
-    remove(buffer);
-    errno = serrno;
+    unlinkat_preserve_errno(dirfd, name, AT_REMOVEDIR);
 out:
-    g_free(buffer);
-    v9fs_string_free(&fullname);
+    close_preserve_errno(dirfd);
     return err;
 }
 
@@ -649,62 +689,45 @@ static int local_fstat(FsContext *fs_ctx, int fid_type,
 static int local_open2(FsContext *fs_ctx, V9fsPath *dir_path, const char *name,
                        int flags, FsCred *credp, V9fsFidOpenState *fs)
 {
-    char *path;
     int fd = -1;
     int err = -1;
-    int serrno = 0;
-    V9fsString fullname;
-    char *buffer = NULL;
+    int dirfd;
 
     /*
      * Mark all the open to not follow symlinks
      */
     flags |= O_NOFOLLOW;
 
-    v9fs_string_init(&fullname);
-    v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name);
-    path = fullname.data;
+    dirfd = local_opendir_nofollow(fs_ctx, dir_path->data);
+    if (dirfd == -1) {
+        return -1;
+    }
 
     /* Determine the security model */
-    if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
-        buffer = rpath(fs_ctx, path);
-        fd = open(buffer, flags, SM_LOCAL_MODE_BITS);
+    if (fs_ctx->export_flags & V9FS_SM_MAPPED ||
+        fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
+        fd = openat_file(dirfd, name, flags, SM_LOCAL_MODE_BITS);
         if (fd == -1) {
-            err = fd;
             goto out;
         }
         credp->fc_mode = credp->fc_mode|S_IFREG;
-        /* Set cleint credentials in xattr */
-        err = local_set_xattr(buffer, credp);
-        if (err == -1) {
-            serrno = errno;
-            goto err_end;
+        if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
+            /* Set cleint credentials in xattr */
+            err = local_set_xattrat(dirfd, name, credp);
+        } else {
+            err = local_set_mapped_file_attrat(dirfd, name, credp);
         }
-    } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        buffer = rpath(fs_ctx, path);
-        fd = open(buffer, flags, SM_LOCAL_MODE_BITS);
-        if (fd == -1) {
-            err = fd;
-            goto out;
-        }
-        credp->fc_mode = credp->fc_mode|S_IFREG;
-        /* Set client credentials in .virtfs_metadata directory files */
-        err = local_set_mapped_file_attr(fs_ctx, path, credp);
         if (err == -1) {
-            serrno = errno;
             goto err_end;
         }
     } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) ||
                (fs_ctx->export_flags & V9FS_SM_NONE)) {
-        buffer = rpath(fs_ctx, path);
-        fd = open(buffer, flags, credp->fc_mode);
+        fd = openat_file(dirfd, name, flags, credp->fc_mode);
         if (fd == -1) {
-            err = fd;
             goto out;
         }
-        err = local_post_create_passthrough(fs_ctx, path, credp);
+        err = local_set_cred_passthrough(fs_ctx, dirfd, name, credp);
         if (err == -1) {
-            serrno = errno;
             goto err_end;
         }
     }
@@ -713,12 +736,11 @@ static int local_open2(FsContext *fs_ctx, V9fsPath *dir_path, const char *name,
     goto out;
 
 err_end:
-    close(fd);
-    remove(buffer);
-    errno = serrno;
+    unlinkat_preserve_errno(dirfd, name,
+                            flags & O_DIRECTORY ? AT_REMOVEDIR : 0);
+    close_preserve_errno(fd);
 out:
-    g_free(buffer);
-    v9fs_string_free(&fullname);
+    close_preserve_errno(dirfd);
     return err;
 }
 
@@ -727,23 +749,22 @@ static int local_symlink(FsContext *fs_ctx, const char *oldpath,
                          V9fsPath *dir_path, const char *name, FsCred *credp)
 {
     int err = -1;
-    int serrno = 0;
-    char *newpath;
-    V9fsString fullname;
-    char *buffer = NULL;
+    int dirfd;
 
-    v9fs_string_init(&fullname);
-    v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name);
-    newpath = fullname.data;
+    dirfd = local_opendir_nofollow(fs_ctx, dir_path->data);
+    if (dirfd == -1) {
+        return -1;
+    }
 
     /* Determine the security model */
-    if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
+    if (fs_ctx->export_flags & V9FS_SM_MAPPED ||
+        fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
         int fd;
         ssize_t oldpath_size, write_size;
-        buffer = rpath(fs_ctx, newpath);
-        fd = open(buffer, O_CREAT|O_EXCL|O_RDWR|O_NOFOLLOW, SM_LOCAL_MODE_BITS);
+
+        fd = openat_file(dirfd, name, O_CREAT | O_EXCL | O_RDWR,
+                         SM_LOCAL_MODE_BITS);
         if (fd == -1) {
-            err = fd;
             goto out;
         }
         /* Write the oldpath (target) to the file. */
@@ -751,218 +772,204 @@ static int local_symlink(FsContext *fs_ctx, const char *oldpath,
         do {
             write_size = write(fd, (void *)oldpath, oldpath_size);
         } while (write_size == -1 && errno == EINTR);
+        close_preserve_errno(fd);
 
         if (write_size != oldpath_size) {
-            serrno = errno;
-            close(fd);
-            err = -1;
             goto err_end;
         }
-        close(fd);
         /* Set cleint credentials in symlink's xattr */
-        credp->fc_mode = credp->fc_mode|S_IFLNK;
-        err = local_set_xattr(buffer, credp);
-        if (err == -1) {
-            serrno = errno;
-            goto err_end;
-        }
-    } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        int fd;
-        ssize_t oldpath_size, write_size;
-        buffer = rpath(fs_ctx, newpath);
-        fd = open(buffer, O_CREAT|O_EXCL|O_RDWR|O_NOFOLLOW, SM_LOCAL_MODE_BITS);
-        if (fd == -1) {
-            err = fd;
-            goto out;
-        }
-        /* Write the oldpath (target) to the file. */
-        oldpath_size = strlen(oldpath);
-        do {
-            write_size = write(fd, (void *)oldpath, oldpath_size);
-        } while (write_size == -1 && errno == EINTR);
+        credp->fc_mode = credp->fc_mode | S_IFLNK;
 
-        if (write_size != oldpath_size) {
-            serrno = errno;
-            close(fd);
-            err = -1;
-            goto err_end;
+        if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
+            err = local_set_xattrat(dirfd, name, credp);
+        } else {
+            err = local_set_mapped_file_attrat(dirfd, name, credp);
         }
-        close(fd);
-        /* Set cleint credentials in symlink's xattr */
-        credp->fc_mode = credp->fc_mode|S_IFLNK;
-        err = local_set_mapped_file_attr(fs_ctx, newpath, credp);
         if (err == -1) {
-            serrno = errno;
             goto err_end;
         }
-    } else if ((fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) ||
-               (fs_ctx->export_flags & V9FS_SM_NONE)) {
-        buffer = rpath(fs_ctx, newpath);
-        err = symlink(oldpath, buffer);
+    } else if (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH ||
+               fs_ctx->export_flags & V9FS_SM_NONE) {
+        err = symlinkat(oldpath, dirfd, name);
         if (err) {
             goto out;
         }
-        err = lchown(buffer, credp->fc_uid, credp->fc_gid);
+        err = fchownat(dirfd, name, credp->fc_uid, credp->fc_gid,
+                       AT_SYMLINK_NOFOLLOW);
         if (err == -1) {
             /*
              * If we fail to change ownership and if we are
              * using security model none. Ignore the error
              */
             if ((fs_ctx->export_flags & V9FS_SEC_MASK) != V9FS_SM_NONE) {
-                serrno = errno;
                 goto err_end;
-            } else
+            } else {
                 err = 0;
+            }
         }
     }
     goto out;
 
 err_end:
-    remove(buffer);
-    errno = serrno;
+    unlinkat_preserve_errno(dirfd, name, 0);
 out:
-    g_free(buffer);
-    v9fs_string_free(&fullname);
+    close_preserve_errno(dirfd);
     return err;
 }
 
 static int local_link(FsContext *ctx, V9fsPath *oldpath,
                       V9fsPath *dirpath, const char *name)
 {
-    int ret;
-    V9fsString newpath;
-    char *buffer, *buffer1;
+    char *odirpath = g_path_get_dirname(oldpath->data);
+    char *oname = g_path_get_basename(oldpath->data);
+    int ret = -1;
+    int odirfd, ndirfd;
 
-    v9fs_string_init(&newpath);
-    v9fs_string_sprintf(&newpath, "%s/%s", dirpath->data, name);
+    odirfd = local_opendir_nofollow(ctx, odirpath);
+    if (odirfd == -1) {
+        goto out;
+    }
+
+    ndirfd = local_opendir_nofollow(ctx, dirpath->data);
+    if (ndirfd == -1) {
+        close_preserve_errno(odirfd);
+        goto out;
+    }
 
-    buffer = rpath(ctx, oldpath->data);
-    buffer1 = rpath(ctx, newpath.data);
-    ret = link(buffer, buffer1);
-    g_free(buffer);
-    g_free(buffer1);
+    ret = linkat(odirfd, oname, ndirfd, name, 0);
+    if (ret < 0) {
+        goto out_close;
+    }
 
     /* now link the virtfs_metadata files */
-    if (!ret && (ctx->export_flags & V9FS_SM_MAPPED_FILE)) {
-        /* Link the .virtfs_metadata files. Create the metada directory */
-        ret = local_create_mapped_attr_dir(ctx, newpath.data);
-        if (ret < 0) {
-            goto err_out;
+    if (ctx->export_flags & V9FS_SM_MAPPED_FILE) {
+        int omap_dirfd, nmap_dirfd;
+
+        ret = mkdirat(ndirfd, VIRTFS_META_DIR, 0700);
+        if (ret < 0 && errno != EEXIST) {
+            goto err_undo_link;
+        }
+
+        omap_dirfd = openat_dir(odirfd, VIRTFS_META_DIR);
+        if (omap_dirfd == -1) {
+            goto err;
+        }
+
+        nmap_dirfd = openat_dir(ndirfd, VIRTFS_META_DIR);
+        if (nmap_dirfd == -1) {
+            close_preserve_errno(omap_dirfd);
+            goto err;
         }
-        buffer = local_mapped_attr_path(ctx, oldpath->data);
-        buffer1 = local_mapped_attr_path(ctx, newpath.data);
-        ret = link(buffer, buffer1);
-        g_free(buffer);
-        g_free(buffer1);
+
+        ret = linkat(omap_dirfd, oname, nmap_dirfd, name, 0);
+        close_preserve_errno(nmap_dirfd);
+        close_preserve_errno(omap_dirfd);
         if (ret < 0 && errno != ENOENT) {
-            goto err_out;
+            goto err_undo_link;
         }
-    }
-err_out:
-    v9fs_string_free(&newpath);
-    return ret;
-}
 
-static int local_truncate(FsContext *ctx, V9fsPath *fs_path, off_t size)
-{
-    char *buffer;
-    int ret;
-    char *path = fs_path->data;
+        ret = 0;
+    }
+    goto out_close;
 
-    buffer = rpath(ctx, path);
-    ret = truncate(buffer, size);
-    g_free(buffer);
+err:
+    ret = -1;
+err_undo_link:
+    unlinkat_preserve_errno(ndirfd, name, 0);
+out_close:
+    close_preserve_errno(ndirfd);
+    close_preserve_errno(odirfd);
+out:
+    g_free(oname);
+    g_free(odirpath);
     return ret;
 }
 
-static int local_rename(FsContext *ctx, const char *oldpath,
-                        const char *newpath)
+static int local_truncate(FsContext *ctx, V9fsPath *fs_path, off_t size)
 {
-    int err;
-    char *buffer, *buffer1;
+    int fd, ret;
 
-    if (ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        err = local_create_mapped_attr_dir(ctx, newpath);
-        if (err < 0) {
-            return err;
-        }
-        /* rename the .virtfs_metadata files */
-        buffer = local_mapped_attr_path(ctx, oldpath);
-        buffer1 = local_mapped_attr_path(ctx, newpath);
-        err = rename(buffer, buffer1);
-        g_free(buffer);
-        g_free(buffer1);
-        if (err < 0 && errno != ENOENT) {
-            return err;
-        }
+    fd = local_open_nofollow(ctx, fs_path->data, O_WRONLY, 0);
+    if (fd == -1) {
+        return -1;
     }
-
-    buffer = rpath(ctx, oldpath);
-    buffer1 = rpath(ctx, newpath);
-    err = rename(buffer, buffer1);
-    g_free(buffer);
-    g_free(buffer1);
-    return err;
+    ret = ftruncate(fd, size);
+    close_preserve_errno(fd);
+    return ret;
 }
 
 static int local_chown(FsContext *fs_ctx, V9fsPath *fs_path, FsCred *credp)
 {
-    char *buffer;
+    char *dirpath = g_path_get_dirname(fs_path->data);
+    char *name = g_path_get_basename(fs_path->data);
     int ret = -1;
-    char *path = fs_path->data;
+    int dirfd;
+
+    dirfd = local_opendir_nofollow(fs_ctx, dirpath);
+    if (dirfd == -1) {
+        goto out;
+    }
 
     if ((credp->fc_uid == -1 && credp->fc_gid == -1) ||
         (fs_ctx->export_flags & V9FS_SM_PASSTHROUGH) ||
         (fs_ctx->export_flags & V9FS_SM_NONE)) {
-        buffer = rpath(fs_ctx, path);
-        ret = lchown(buffer, credp->fc_uid, credp->fc_gid);
-        g_free(buffer);
+        ret = fchownat(dirfd, name, credp->fc_uid, credp->fc_gid,
+                       AT_SYMLINK_NOFOLLOW);
     } else if (fs_ctx->export_flags & V9FS_SM_MAPPED) {
-        buffer = rpath(fs_ctx, path);
-        ret = local_set_xattr(buffer, credp);
-        g_free(buffer);
+        ret = local_set_xattrat(dirfd, name, credp);
     } else if (fs_ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        return local_set_mapped_file_attr(fs_ctx, path, credp);
+        ret = local_set_mapped_file_attrat(dirfd, name, credp);
     }
+
+    close_preserve_errno(dirfd);
+out:
+    g_free(name);
+    g_free(dirpath);
     return ret;
 }
 
 static int local_utimensat(FsContext *s, V9fsPath *fs_path,
                            const struct timespec *buf)
 {
-    char *buffer;
-    int ret;
-    char *path = fs_path->data;
+    char *dirpath = g_path_get_dirname(fs_path->data);
+    char *name = g_path_get_basename(fs_path->data);
+    int dirfd, ret = -1;
+
+    dirfd = local_opendir_nofollow(s, dirpath);
+    if (dirfd == -1) {
+        goto out;
+    }
 
-    buffer = rpath(s, path);
-    ret = qemu_utimens(buffer, buf);
-    g_free(buffer);
+    ret = utimensat(dirfd, name, buf, AT_SYMLINK_NOFOLLOW);
+    close_preserve_errno(dirfd);
+out:
+    g_free(dirpath);
+    g_free(name);
     return ret;
 }
 
-static int local_remove(FsContext *ctx, const char *path)
+static int local_unlinkat_common(FsContext *ctx, int dirfd, const char *name,
+                                 int flags)
 {
-    int err;
-    struct stat stbuf;
-    char *buffer;
+    int ret = -1;
 
     if (ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        buffer = rpath(ctx, path);
-        err =  lstat(buffer, &stbuf);
-        g_free(buffer);
-        if (err) {
-            goto err_out;
-        }
-        /*
-         * If directory remove .virtfs_metadata contained in the
-         * directory
-         */
-        if (S_ISDIR(stbuf.st_mode)) {
-            buffer = g_strdup_printf("%s/%s/%s", ctx->fs_root,
-                                     path, VIRTFS_META_DIR);
-            err = remove(buffer);
-            g_free(buffer);
-            if (err < 0 && errno != ENOENT) {
+        int map_dirfd;
+
+        if (flags == AT_REMOVEDIR) {
+            int fd;
+
+            fd = openat(dirfd, name, O_RDONLY | O_DIRECTORY | O_PATH);
+            if (fd == -1) {
+                goto err_out;
+            }
+            /*
+             * If directory remove .virtfs_metadata contained in the
+             * directory
+             */
+            ret = unlinkat(fd, VIRTFS_META_DIR, AT_REMOVEDIR);
+            close_preserve_errno(fd);
+            if (ret < 0 && errno != ENOENT) {
                 /*
                  * We didn't had the .virtfs_metadata file. May be file created
                  * in non-mapped mode ?. Ignore ENOENT.
@@ -972,12 +979,12 @@ static int local_remove(FsContext *ctx, const char *path)
         }
         /*
          * Now remove the name from parent directory
-         * .virtfs_metadata directory
+         * .virtfs_metadata directory.
          */
-        buffer = local_mapped_attr_path(ctx, path);
-        err = remove(buffer);
-        g_free(buffer);
-        if (err < 0 && errno != ENOENT) {
+        map_dirfd = openat_dir(dirfd, VIRTFS_META_DIR);
+        ret = unlinkat(map_dirfd, name, 0);
+        close_preserve_errno(map_dirfd);
+        if (ret < 0 && errno != ENOENT) {
             /*
              * We didn't had the .virtfs_metadata file. May be file created
              * in non-mapped mode ?. Ignore ENOENT.
@@ -986,10 +993,39 @@ static int local_remove(FsContext *ctx, const char *path)
         }
     }
 
-    buffer = rpath(ctx, path);
-    err = remove(buffer);
-    g_free(buffer);
+    ret = unlinkat(dirfd, name, flags);
+err_out:
+    return ret;
+}
+
+static int local_remove(FsContext *ctx, const char *path)
+{
+    struct stat stbuf;
+    char *dirpath = g_path_get_dirname(path);
+    char *name = g_path_get_basename(path);
+    int flags = 0;
+    int dirfd;
+    int err = -1;
+
+    dirfd = local_opendir_nofollow(ctx, dirpath);
+    if (dirfd) {
+        goto out;
+    }
+
+    if (fstatat(dirfd, path, &stbuf, AT_SYMLINK_NOFOLLOW) < 0) {
+        goto err_out;
+    }
+
+    if (S_ISDIR(stbuf.st_mode)) {
+        flags |= AT_REMOVEDIR;
+    }
+
+    err = local_unlinkat_common(ctx, dirfd, name, flags);
 err_out:
+    close_preserve_errno(dirfd);
+out:
+    g_free(name);
+    g_free(dirpath);
     return err;
 }
 
@@ -1013,13 +1049,11 @@ static int local_fsync(FsContext *ctx, int fid_type,
 
 static int local_statfs(FsContext *s, V9fsPath *fs_path, struct statfs *stbuf)
 {
-    char *buffer;
-    int ret;
-    char *path = fs_path->data;
+    int fd, ret;
 
-    buffer = rpath(s, path);
-    ret = statfs(buffer, stbuf);
-    g_free(buffer);
+    fd = local_open_nofollow(s, fs_path->data, O_RDONLY, 0);
+    ret = fstatfs(fd, stbuf);
+    close_preserve_errno(fd);
     return ret;
 }
 
@@ -1071,70 +1105,105 @@ static int local_renameat(FsContext *ctx, V9fsPath *olddir,
                           const char *new_name)
 {
     int ret;
-    V9fsString old_full_name, new_full_name;
+    int odirfd, ndirfd;
+
+    odirfd = local_opendir_nofollow(ctx, olddir->data);
+    if (odirfd == -1) {
+        return -1;
+    }
+
+    ndirfd = local_opendir_nofollow(ctx, newdir->data);
+    if (ndirfd == -1) {
+        close_preserve_errno(odirfd);
+        return -1;
+    }
+
+    ret = renameat(odirfd, old_name, ndirfd, new_name);
+    if (ret < 0) {
+        goto out;
+    }
+
+    if (ctx->export_flags & V9FS_SM_MAPPED_FILE) {
+        int omap_dirfd, nmap_dirfd;
 
-    v9fs_string_init(&old_full_name);
-    v9fs_string_init(&new_full_name);
+        ret = mkdirat(ndirfd, VIRTFS_META_DIR, 0700);
+        if (ret < 0 && errno != EEXIST) {
+            goto err_undo_rename;
+        }
+
+        omap_dirfd = openat_dir(odirfd, VIRTFS_META_DIR);
+        if (omap_dirfd == -1) {
+            goto err;
+        }
+
+        nmap_dirfd = openat_dir(ndirfd, VIRTFS_META_DIR);
+        if (nmap_dirfd == -1) {
+            close_preserve_errno(omap_dirfd);
+            goto err;
+        }
 
-    v9fs_string_sprintf(&old_full_name, "%s/%s", olddir->data, old_name);
-    v9fs_string_sprintf(&new_full_name, "%s/%s", newdir->data, new_name);
+        /* rename the .virtfs_metadata files */
+        ret = renameat(omap_dirfd, old_name, nmap_dirfd, new_name);
+        close_preserve_errno(nmap_dirfd);
+        close_preserve_errno(omap_dirfd);
+        if (ret < 0 && errno != ENOENT) {
+            goto err_undo_rename;
+        }
 
-    ret = local_rename(ctx, old_full_name.data, new_full_name.data);
-    v9fs_string_free(&old_full_name);
-    v9fs_string_free(&new_full_name);
+        ret = 0;
+    }
+    goto out;
+
+err:
+    ret = -1;
+err_undo_rename:
+    renameat_preserve_errno(ndirfd, new_name, odirfd, old_name);
+out:
+    close_preserve_errno(ndirfd);
+    close_preserve_errno(odirfd);
     return ret;
 }
 
+static void v9fs_path_init_dirname(V9fsPath *path, const char *str)
+{
+    path->data = g_path_get_dirname(str);
+    path->size = strlen(path->data) + 1;
+}
+
+static int local_rename(FsContext *ctx, const char *oldpath,
+                        const char *newpath)
+{
+    int err;
+    char *oname = g_path_get_basename(oldpath);
+    char *nname = g_path_get_basename(newpath);
+    V9fsPath olddir, newdir;
+
+    v9fs_path_init_dirname(&olddir, oldpath);
+    v9fs_path_init_dirname(&newdir, newpath);
+
+    err = local_renameat(ctx, &olddir, oname, &newdir, nname);
+
+    v9fs_path_free(&newdir);
+    v9fs_path_free(&olddir);
+    g_free(nname);
+    g_free(oname);
+
+    return err;
+}
+
 static int local_unlinkat(FsContext *ctx, V9fsPath *dir,
                           const char *name, int flags)
 {
     int ret;
-    V9fsString fullname;
-    char *buffer;
+    int dirfd;
 
-    v9fs_string_init(&fullname);
-
-    v9fs_string_sprintf(&fullname, "%s/%s", dir->data, name);
-    if (ctx->export_flags & V9FS_SM_MAPPED_FILE) {
-        if (flags == AT_REMOVEDIR) {
-            /*
-             * If directory remove .virtfs_metadata contained in the
-             * directory
-             */
-            buffer = g_strdup_printf("%s/%s/%s", ctx->fs_root,
-                                     fullname.data, VIRTFS_META_DIR);
-            ret = remove(buffer);
-            g_free(buffer);
-            if (ret < 0 && errno != ENOENT) {
-                /*
-                 * We didn't had the .virtfs_metadata file. May be file created
-                 * in non-mapped mode ?. Ignore ENOENT.
-                 */
-                goto err_out;
-            }
-        }
-        /*
-         * Now remove the name from parent directory
-         * .virtfs_metadata directory.
-         */
-        buffer = local_mapped_attr_path(ctx, fullname.data);
-        ret = remove(buffer);
-        g_free(buffer);
-        if (ret < 0 && errno != ENOENT) {
-            /*
-             * We didn't had the .virtfs_metadata file. May be file created
-             * in non-mapped mode ?. Ignore ENOENT.
-             */
-            goto err_out;
-        }
+    dirfd = local_opendir_nofollow(ctx, dir->data);
+    if (dirfd == -1) {
+        return -1;
     }
-    /* Remove the name finally */
-    buffer = rpath(ctx, fullname.data);
-    ret = remove(buffer);
-    g_free(buffer);
 
-err_out:
-    v9fs_string_free(&fullname);
+    ret = local_unlinkat_common(ctx, dirfd, name, flags);
+    close_preserve_errno(dirfd);
     return ret;
 }
 
@@ -1168,8 +1237,31 @@ static int local_ioc_getversion(FsContext *ctx, V9fsPath *path,
 
 static int local_init(FsContext *ctx)
 {
-    int err = 0;
     struct statfs stbuf;
+    LocalData *data = g_malloc(sizeof(*data));
+
+    data->mountfd = open(ctx->fs_root, O_DIRECTORY | O_RDONLY);
+    if (data->mountfd == -1) {
+        goto err;
+    }
+
+#ifdef FS_IOC_GETVERSION
+    /*
+     * use ioc_getversion only if the ioctl is definied
+     */
+    if (fstatfs(data->mountfd, &stbuf) < 0) {
+        close_preserve_errno(data->mountfd);
+        goto err;
+    }
+    switch (stbuf.f_type) {
+    case EXT2_SUPER_MAGIC:
+    case BTRFS_SUPER_MAGIC:
+    case REISERFS_SUPER_MAGIC:
+    case XFS_SUPER_MAGIC:
+        ctx->exops.get_st_gen = local_ioc_getversion;
+        break;
+    }
+#endif
 
     if (ctx->export_flags & V9FS_SM_PASSTHROUGH) {
         ctx->xops = passthrough_xattr_ops;
@@ -1185,29 +1277,28 @@ static int local_init(FsContext *ctx)
         ctx->xops = passthrough_xattr_ops;
     }
     ctx->export_flags |= V9FS_PATHNAME_FSCONTEXT;
-#ifdef FS_IOC_GETVERSION
-    /*
-     * use ioc_getversion only if the iocl is definied
-     */
-    err = statfs(ctx->fs_root, &stbuf);
-    if (!err) {
-        switch (stbuf.f_type) {
-        case EXT2_SUPER_MAGIC:
-        case BTRFS_SUPER_MAGIC:
-        case REISERFS_SUPER_MAGIC:
-        case XFS_SUPER_MAGIC:
-            ctx->exops.get_st_gen = local_ioc_getversion;
-            break;
-        }
-    }
-#endif
-    return err;
+
+    ctx->private = data;
+    return 0;
+
+err:
+    g_free(data);
+    return -1;
+}
+
+static void local_cleanup(FsContext *ctx)
+{
+    LocalData *data = ctx->private;
+
+    close(data->mountfd);
+    g_free(data);
 }
 
 static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
 {
     const char *sec_model = qemu_opt_get(opts, "security_model");
     const char *path = qemu_opt_get(opts, "path");
+    Error *err = NULL;
 
     if (!sec_model) {
         error_report("Security model not specified, local fs needs security model");
@@ -1236,6 +1327,13 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
         error_report("fsdev: No path specified");
         return -1;
     }
+
+    fsdev_throttle_parse_opts(opts, &fse->fst, &err);
+    if (err) {
+        error_reportf_err(err, "Throttle configuration is not valid: ");
+        return -1;
+    }
+
     fse->path = g_strdup(path);
 
     return 0;
@@ -1244,6 +1342,7 @@ static int local_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
 FileOperations local_ops = {
     .parse_opts = local_parse_opts,
     .init  = local_init,
+    .cleanup = local_cleanup,
     .lstat = local_lstat,
     .readlink = local_readlink,
     .close = local_close,
diff --git a/hw/9pfs/9p-local.h b/hw/9pfs/9p-local.h
new file mode 100644
index 0000000000..32c72749d9
--- /dev/null
+++ b/hw/9pfs/9p-local.h
@@ -0,0 +1,20 @@
+/*
+ * 9p local backend utilities
+ *
+ * Copyright IBM, Corp. 2017
+ *
+ * Authors:
+ *  Greg Kurz <groug@kaod.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_9P_LOCAL_H
+#define QEMU_9P_LOCAL_H
+
+int local_open_nofollow(FsContext *fs_ctx, const char *path, int flags,
+                        mode_t mode);
+int local_opendir_nofollow(FsContext *fs_ctx, const char *path);
+
+#endif
diff --git a/hw/9pfs/9p-posix-acl.c b/hw/9pfs/9p-posix-acl.c
index ec003181cd..bbf89064f7 100644
--- a/hw/9pfs/9p-posix-acl.c
+++ b/hw/9pfs/9p-posix-acl.c
@@ -25,13 +25,7 @@
 static ssize_t mp_pacl_getxattr(FsContext *ctx, const char *path,
                                 const char *name, void *value, size_t size)
 {
-    char *buffer;
-    ssize_t ret;
-
-    buffer = rpath(ctx, path);
-    ret = lgetxattr(buffer, MAP_ACL_ACCESS, value, size);
-    g_free(buffer);
-    return ret;
+    return local_getxattr_nofollow(ctx, path, MAP_ACL_ACCESS, value, size);
 }
 
 static ssize_t mp_pacl_listxattr(FsContext *ctx, const char *path,
@@ -56,23 +50,16 @@ static ssize_t mp_pacl_listxattr(FsContext *ctx, const char *path,
 static int mp_pacl_setxattr(FsContext *ctx, const char *path, const char *name,
                             void *value, size_t size, int flags)
 {
-    char *buffer;
-    int ret;
-
-    buffer = rpath(ctx, path);
-    ret = lsetxattr(buffer, MAP_ACL_ACCESS, value, size, flags);
-    g_free(buffer);
-    return ret;
+    return local_setxattr_nofollow(ctx, path, MAP_ACL_ACCESS, value, size,
+                                   flags);
 }
 
 static int mp_pacl_removexattr(FsContext *ctx,
                                const char *path, const char *name)
 {
     int ret;
-    char *buffer;
 
-    buffer = rpath(ctx, path);
-    ret  = lremovexattr(buffer, MAP_ACL_ACCESS);
+    ret = local_removexattr_nofollow(ctx, path, MAP_ACL_ACCESS);
     if (ret == -1 && errno == ENODATA) {
         /*
          * We don't get ENODATA error when trying to remove a
@@ -82,20 +69,13 @@ static int mp_pacl_removexattr(FsContext *ctx,
         errno = 0;
         ret = 0;
     }
-    g_free(buffer);
     return ret;
 }
 
 static ssize_t mp_dacl_getxattr(FsContext *ctx, const char *path,
                                 const char *name, void *value, size_t size)
 {
-    char *buffer;
-    ssize_t ret;
-
-    buffer = rpath(ctx, path);
-    ret = lgetxattr(buffer, MAP_ACL_DEFAULT, value, size);
-    g_free(buffer);
-    return ret;
+    return local_getxattr_nofollow(ctx, path, MAP_ACL_DEFAULT, value, size);
 }
 
 static ssize_t mp_dacl_listxattr(FsContext *ctx, const char *path,
@@ -120,23 +100,16 @@ static ssize_t mp_dacl_listxattr(FsContext *ctx, const char *path,
 static int mp_dacl_setxattr(FsContext *ctx, const char *path, const char *name,
                             void *value, size_t size, int flags)
 {
-    char *buffer;
-    int ret;
-
-    buffer = rpath(ctx, path);
-    ret = lsetxattr(buffer, MAP_ACL_DEFAULT, value, size, flags);
-    g_free(buffer);
-    return ret;
+    return local_setxattr_nofollow(ctx, path, MAP_ACL_DEFAULT, value, size,
+                                   flags);
 }
 
 static int mp_dacl_removexattr(FsContext *ctx,
                                const char *path, const char *name)
 {
     int ret;
-    char *buffer;
 
-    buffer = rpath(ctx, path);
-    ret  = lremovexattr(buffer, MAP_ACL_DEFAULT);
+    ret = local_removexattr_nofollow(ctx, path, MAP_ACL_DEFAULT);
     if (ret == -1 && errno == ENODATA) {
         /*
          * We don't get ENODATA error when trying to remove a
@@ -146,7 +119,6 @@ static int mp_dacl_removexattr(FsContext *ctx,
         errno = 0;
         ret = 0;
     }
-    g_free(buffer);
     return ret;
 }
 
diff --git a/hw/9pfs/9p-util.c b/hw/9pfs/9p-util.c
new file mode 100644
index 0000000000..fdb4d57376
--- /dev/null
+++ b/hw/9pfs/9p-util.c
@@ -0,0 +1,69 @@
+/*
+ * 9p utilities
+ *
+ * Copyright IBM, Corp. 2017
+ *
+ * Authors:
+ *  Greg Kurz <groug@kaod.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/xattr.h"
+#include "9p-util.h"
+
+int relative_openat_nofollow(int dirfd, const char *path, int flags,
+                             mode_t mode)
+{
+    int fd;
+
+    fd = dup(dirfd);
+    if (fd == -1) {
+        return -1;
+    }
+
+    while (*path) {
+        const char *c;
+        int next_fd;
+        char *head;
+
+        /* Only relative paths without consecutive slashes */
+        assert(path[0] != '/');
+
+        head = g_strdup(path);
+        c = strchr(path, '/');
+        if (c) {
+            head[c - path] = 0;
+            next_fd = openat_dir(fd, head);
+        } else {
+            next_fd = openat_file(fd, head, flags, mode);
+        }
+        g_free(head);
+        if (next_fd == -1) {
+            close_preserve_errno(fd);
+            return -1;
+        }
+        close(fd);
+        fd = next_fd;
+
+        if (!c) {
+            break;
+        }
+        path = c + 1;
+    }
+
+    return fd;
+}
+
+ssize_t fgetxattrat_nofollow(int dirfd, const char *filename, const char *name,
+                             void *value, size_t size)
+{
+    char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
+    int ret;
+
+    ret = lgetxattr(proc_path, name, value, size);
+    g_free(proc_path);
+    return ret;
+}
diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h
new file mode 100644
index 0000000000..091f3ce88e
--- /dev/null
+++ b/hw/9pfs/9p-util.h
@@ -0,0 +1,54 @@
+/*
+ * 9p utilities
+ *
+ * Copyright IBM, Corp. 2017
+ *
+ * Authors:
+ *  Greg Kurz <groug@kaod.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_9P_UTIL_H
+#define QEMU_9P_UTIL_H
+
+static inline void close_preserve_errno(int fd)
+{
+    int serrno = errno;
+    close(fd);
+    errno = serrno;
+}
+
+static inline int openat_dir(int dirfd, const char *name)
+{
+    return openat(dirfd, name, O_DIRECTORY | O_RDONLY | O_PATH);
+}
+
+static inline int openat_file(int dirfd, const char *name, int flags,
+                              mode_t mode)
+{
+    int fd, serrno, ret;
+
+    fd = openat(dirfd, name, flags | O_NOFOLLOW | O_NOCTTY | O_NONBLOCK,
+                mode);
+    if (fd == -1) {
+        return -1;
+    }
+
+    serrno = errno;
+    /* O_NONBLOCK was only needed to open the file. Let's drop it. */
+    ret = fcntl(fd, F_SETFL, flags);
+    assert(!ret);
+    errno = serrno;
+    return fd;
+}
+
+int relative_openat_nofollow(int dirfd, const char *path, int flags,
+                             mode_t mode);
+ssize_t fgetxattrat_nofollow(int dirfd, const char *path, const char *name,
+                             void *value, size_t size);
+int fsetxattrat_nofollow(int dirfd, const char *path, const char *name,
+                         void *value, size_t size, int flags);
+
+#endif
diff --git a/hw/9pfs/9p-xattr-user.c b/hw/9pfs/9p-xattr-user.c
index f87530c8b5..2c90817b75 100644
--- a/hw/9pfs/9p-xattr-user.c
+++ b/hw/9pfs/9p-xattr-user.c
@@ -20,9 +20,6 @@
 static ssize_t mp_user_getxattr(FsContext *ctx, const char *path,
                                 const char *name, void *value, size_t size)
 {
-    char *buffer;
-    ssize_t ret;
-
     if (strncmp(name, "user.virtfs.", 12) == 0) {
         /*
          * Don't allow fetch of user.virtfs namesapce
@@ -31,10 +28,7 @@ static ssize_t mp_user_getxattr(FsContext *ctx, const char *path,
         errno = ENOATTR;
         return -1;
     }
-    buffer = rpath(ctx, path);
-    ret = lgetxattr(buffer, name, value, size);
-    g_free(buffer);
-    return ret;
+    return local_getxattr_nofollow(ctx, path, name, value, size);
 }
 
 static ssize_t mp_user_listxattr(FsContext *ctx, const char *path,
@@ -73,9 +67,6 @@ static ssize_t mp_user_listxattr(FsContext *ctx, const char *path,
 static int mp_user_setxattr(FsContext *ctx, const char *path, const char *name,
                             void *value, size_t size, int flags)
 {
-    char *buffer;
-    int ret;
-
     if (strncmp(name, "user.virtfs.", 12) == 0) {
         /*
          * Don't allow fetch of user.virtfs namesapce
@@ -84,18 +75,12 @@ static int mp_user_setxattr(FsContext *ctx, const char *path, const char *name,
         errno = EACCES;
         return -1;
     }
-    buffer = rpath(ctx, path);
-    ret = lsetxattr(buffer, name, value, size, flags);
-    g_free(buffer);
-    return ret;
+    return local_setxattr_nofollow(ctx, path, name, value, size, flags);
 }
 
 static int mp_user_removexattr(FsContext *ctx,
                                const char *path, const char *name)
 {
-    char *buffer;
-    int ret;
-
     if (strncmp(name, "user.virtfs.", 12) == 0) {
         /*
          * Don't allow fetch of user.virtfs namesapce
@@ -104,10 +89,7 @@ static int mp_user_removexattr(FsContext *ctx,
         errno = EACCES;
         return -1;
     }
-    buffer = rpath(ctx, path);
-    ret = lremovexattr(buffer, name);
-    g_free(buffer);
-    return ret;
+    return local_removexattr_nofollow(ctx, path, name);
 }
 
 XattrOperations mapped_user_xattr = {
diff --git a/hw/9pfs/9p-xattr.c b/hw/9pfs/9p-xattr.c
index 5d8595ed93..eec160b3c2 100644
--- a/hw/9pfs/9p-xattr.c
+++ b/hw/9pfs/9p-xattr.c
@@ -15,6 +15,8 @@
 #include "9p.h"
 #include "fsdev/file-op-9p.h"
 #include "9p-xattr.h"
+#include "9p-util.h"
+#include "9p-local.h"
 
 
 static XattrOperations *get_xattr_operations(XattrOperations **h,
@@ -58,6 +60,16 @@ ssize_t pt_listxattr(FsContext *ctx, const char *path,
     return name_size;
 }
 
+static ssize_t flistxattrat_nofollow(int dirfd, const char *filename,
+                                     char *list, size_t size)
+{
+    char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
+    int ret;
+
+    ret = llistxattr(proc_path, list, size);
+    g_free(proc_path);
+    return ret;
+}
 
 /*
  * Get the list and pass to each layer to find out whether
@@ -67,24 +79,37 @@ ssize_t v9fs_list_xattr(FsContext *ctx, const char *path,
                         void *value, size_t vsize)
 {
     ssize_t size = 0;
-    char *buffer;
     void *ovalue = value;
     XattrOperations *xops;
     char *orig_value, *orig_value_start;
     ssize_t xattr_len, parsed_len = 0, attr_len;
+    char *dirpath, *name;
+    int dirfd;
 
     /* Get the actual len */
-    buffer = rpath(ctx, path);
-    xattr_len = llistxattr(buffer, value, 0);
+    dirpath = g_path_get_dirname(path);
+    dirfd = local_opendir_nofollow(ctx, dirpath);
+    g_free(dirpath);
+    if (dirfd == -1) {
+        return -1;
+    }
+
+    name = g_path_get_basename(path);
+    xattr_len = flistxattrat_nofollow(dirfd, name, value, 0);
     if (xattr_len <= 0) {
-        g_free(buffer);
+        g_free(name);
+        close_preserve_errno(dirfd);
         return xattr_len;
     }
 
     /* Now fetch the xattr and find the actual size */
     orig_value = g_malloc(xattr_len);
-    xattr_len = llistxattr(buffer, orig_value, xattr_len);
-    g_free(buffer);
+    xattr_len = flistxattrat_nofollow(dirfd, name, orig_value, xattr_len);
+    g_free(name);
+    close_preserve_errno(dirfd);
+    if (xattr_len < 0) {
+        return -1;
+    }
 
     /* store the orig pointer */
     orig_value_start = orig_value;
@@ -143,6 +168,135 @@ int v9fs_remove_xattr(FsContext *ctx,
 
 }
 
+ssize_t local_getxattr_nofollow(FsContext *ctx, const char *path,
+                                const char *name, void *value, size_t size)
+{
+    char *dirpath = g_path_get_dirname(path);
+    char *filename = g_path_get_basename(path);
+    int dirfd;
+    ssize_t ret = -1;
+
+    dirfd = local_opendir_nofollow(ctx, dirpath);
+    if (dirfd == -1) {
+        goto out;
+    }
+
+    ret = fgetxattrat_nofollow(dirfd, filename, name, value, size);
+    close_preserve_errno(dirfd);
+out:
+    g_free(dirpath);
+    g_free(filename);
+    return ret;
+}
+
+ssize_t pt_getxattr(FsContext *ctx, const char *path, const char *name,
+                    void *value, size_t size)
+{
+    return local_getxattr_nofollow(ctx, path, name, value, size);
+}
+
+int fsetxattrat_nofollow(int dirfd, const char *filename, const char *name,
+                         void *value, size_t size, int flags)
+{
+    char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
+    int ret;
+
+    ret = lsetxattr(proc_path, name, value, size, flags);
+    g_free(proc_path);
+    return ret;
+}
+
+ssize_t local_setxattr_nofollow(FsContext *ctx, const char *path,
+                                const char *name, void *value, size_t size,
+                                int flags)
+{
+    char *dirpath = g_path_get_dirname(path);
+    char *filename = g_path_get_basename(path);
+    int dirfd;
+    ssize_t ret = -1;
+
+    dirfd = local_opendir_nofollow(ctx, dirpath);
+    if (dirfd == -1) {
+        goto out;
+    }
+
+    ret = fsetxattrat_nofollow(dirfd, filename, name, value, size, flags);
+    close_preserve_errno(dirfd);
+out:
+    g_free(dirpath);
+    g_free(filename);
+    return ret;
+}
+
+int pt_setxattr(FsContext *ctx, const char *path, const char *name, void *value,
+                size_t size, int flags)
+{
+    return local_setxattr_nofollow(ctx, path, name, value, size, flags);
+}
+
+static ssize_t fremovexattrat_nofollow(int dirfd, const char *filename,
+                                       const char *name)
+{
+    char *proc_path = g_strdup_printf("/proc/self/fd/%d/%s", dirfd, filename);
+    int ret;
+
+    ret = lremovexattr(proc_path, name);
+    g_free(proc_path);
+    return ret;
+}
+
+ssize_t local_removexattr_nofollow(FsContext *ctx, const char *path,
+                                   const char *name)
+{
+    char *dirpath = g_path_get_dirname(path);
+    char *filename = g_path_get_basename(path);
+    int dirfd;
+    ssize_t ret = -1;
+
+    dirfd = local_opendir_nofollow(ctx, dirpath);
+    if (dirfd == -1) {
+        goto out;
+    }
+
+    ret = fremovexattrat_nofollow(dirfd, filename, name);
+    close_preserve_errno(dirfd);
+out:
+    g_free(dirpath);
+    g_free(filename);
+    return ret;
+}
+
+int pt_removexattr(FsContext *ctx, const char *path, const char *name)
+{
+    return local_removexattr_nofollow(ctx, path, name);
+}
+
+ssize_t notsup_getxattr(FsContext *ctx, const char *path, const char *name,
+                        void *value, size_t size)
+{
+    errno = ENOTSUP;
+    return -1;
+}
+
+int notsup_setxattr(FsContext *ctx, const char *path, const char *name,
+                    void *value, size_t size, int flags)
+{
+    errno = ENOTSUP;
+    return -1;
+}
+
+ssize_t notsup_listxattr(FsContext *ctx, const char *path, char *name,
+                         void *value, size_t size)
+{
+    return 0;
+}
+
+int notsup_removexattr(FsContext *ctx, const char *path, const char *name)
+{
+    errno = ENOTSUP;
+    return -1;
+}
+
 XattrOperations *mapped_xattr_ops[] = {
     &mapped_user_xattr,
     &mapped_pacl_xattr,
diff --git a/hw/9pfs/9p-xattr.h b/hw/9pfs/9p-xattr.h
index a853ea641c..0d83996575 100644
--- a/hw/9pfs/9p-xattr.h
+++ b/hw/9pfs/9p-xattr.h
@@ -29,6 +29,13 @@ typedef struct xattr_operations
                        const char *path, const char *name);
 } XattrOperations;
 
+ssize_t local_getxattr_nofollow(FsContext *ctx, const char *path,
+                                const char *name, void *value, size_t size);
+ssize_t local_setxattr_nofollow(FsContext *ctx, const char *path,
+                                const char *name, void *value, size_t size,
+                                int flags);
+ssize_t local_removexattr_nofollow(FsContext *ctx, const char *path,
+                                   const char *name);
 
 extern XattrOperations mapped_user_xattr;
 extern XattrOperations passthrough_user_xattr;
@@ -49,73 +56,21 @@ ssize_t v9fs_list_xattr(FsContext *ctx, const char *path, void *value,
 int v9fs_set_xattr(FsContext *ctx, const char *path, const char *name,
                           void *value, size_t size, int flags);
 int v9fs_remove_xattr(FsContext *ctx, const char *path, const char *name);
+
 ssize_t pt_listxattr(FsContext *ctx, const char *path, char *name, void *value,
                      size_t size);
-
-static inline ssize_t pt_getxattr(FsContext *ctx, const char *path,
-                                  const char *name, void *value, size_t size)
-{
-    char *buffer;
-    ssize_t ret;
-
-    buffer = rpath(ctx, path);
-    ret = lgetxattr(buffer, name, value, size);
-    g_free(buffer);
-    return ret;
-}
-
-static inline int pt_setxattr(FsContext *ctx, const char *path,
-                              const char *name, void *value,
-                              size_t size, int flags)
-{
-    char *buffer;
-    int ret;
-
-    buffer = rpath(ctx, path);
-    ret = lsetxattr(buffer, name, value, size, flags);
-    g_free(buffer);
-    return ret;
-}
-
-static inline int pt_removexattr(FsContext *ctx,
-                                 const char *path, const char *name)
-{
-    char *buffer;
-    int ret;
-
-    buffer = rpath(ctx, path);
-    ret = lremovexattr(path, name);
-    g_free(buffer);
-    return ret;
-}
-
-static inline ssize_t notsup_getxattr(FsContext *ctx, const char *path,
-                                      const char *name, void *value,
-                                      size_t size)
-{
-    errno = ENOTSUP;
-    return -1;
-}
-
-static inline int notsup_setxattr(FsContext *ctx, const char *path,
-                                  const char *name, void *value,
-                                  size_t size, int flags)
-{
-    errno = ENOTSUP;
-    return -1;
-}
-
-static inline ssize_t notsup_listxattr(FsContext *ctx, const char *path,
-                                       char *name, void *value, size_t size)
-{
-    return 0;
-}
-
-static inline int notsup_removexattr(FsContext *ctx,
-                                     const char *path, const char *name)
-{
-    errno = ENOTSUP;
-    return -1;
-}
+ssize_t pt_getxattr(FsContext *ctx, const char *path, const char *name,
+                    void *value, size_t size);
+int pt_setxattr(FsContext *ctx, const char *path, const char *name, void *value,
+                size_t size, int flags);
+int pt_removexattr(FsContext *ctx, const char *path, const char *name);
+
+ssize_t notsup_getxattr(FsContext *ctx, const char *path, const char *name,
+                        void *value, size_t size);
+int notsup_setxattr(FsContext *ctx, const char *path, const char *name,
+                    void *value, size_t size, int flags);
+ssize_t notsup_listxattr(FsContext *ctx, const char *path, char *name,
+                         void *value, size_t size);
+int notsup_removexattr(FsContext *ctx, const char *path, const char *name);
 
 #endif
diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index 3af1c93dc8..76c9247c77 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -3010,7 +3010,6 @@ out_nofid:
  */
 static void coroutine_fn v9fs_lock(void *opaque)
 {
-    int8_t status;
     V9fsFlock flock;
     size_t offset = 7;
     struct stat stbuf;
@@ -3018,7 +3017,6 @@ static void coroutine_fn v9fs_lock(void *opaque)
     int32_t fid, err = 0;
     V9fsPDU *pdu = opaque;
 
-    status = P9_LOCK_ERROR;
     v9fs_string_init(&flock.client_id);
     err = pdu_unmarshal(pdu, offset, "dbdqqds", &fid, &flock.type,
                         &flock.flags, &flock.start, &flock.length,
@@ -3044,15 +3042,15 @@ static void coroutine_fn v9fs_lock(void *opaque)
     if (err < 0) {
         goto out;
     }
-    status = P9_LOCK_SUCCESS;
+    err = pdu_marshal(pdu, offset, "b", P9_LOCK_SUCCESS);
+    if (err < 0) {
+        goto out;
+    }
+    err += offset;
+    trace_v9fs_lock_return(pdu->tag, pdu->id, P9_LOCK_SUCCESS);
 out:
     put_fid(pdu, fidp);
 out_nofid:
-    err = pdu_marshal(pdu, offset, "b", status);
-    if (err > 0) {
-        err += offset;
-    }
-    trace_v9fs_lock_return(pdu->tag, pdu->id, status);
     pdu_complete(pdu, err);
     v9fs_string_free(&flock.client_id);
 }
@@ -3531,6 +3529,10 @@ int v9fs_device_realize_common(V9fsState *s, Error **errp)
         error_setg(errp, "share path %s is not a directory", fse->path);
         goto out;
     }
+
+    s->ctx.fst = &fse->fst;
+    fsdev_throttle_init(s->ctx.fst);
+
     v9fs_path_free(&path);
 
     rc = 0;
@@ -3551,6 +3553,7 @@ void v9fs_device_unrealize_common(V9fsState *s, Error **errp)
     if (s->ops->cleanup) {
         s->ops->cleanup(&s->ctx);
     }
+    fsdev_throttle_cleanup(s->ctx.fst);
     g_free(s->tag);
     g_free(s->ctx.fs_root);
 }
diff --git a/hw/9pfs/Makefile.objs b/hw/9pfs/Makefile.objs
index da0ae0cfdb..32197e6671 100644
--- a/hw/9pfs/Makefile.objs
+++ b/hw/9pfs/Makefile.objs
@@ -1,4 +1,4 @@
-common-obj-y  = 9p.o
+common-obj-y  = 9p.o 9p-util.o
 common-obj-y += 9p-local.o 9p-xattr.o
 common-obj-y += 9p-xattr-user.o 9p-posix-acl.o
 common-obj-y += coth.o cofs.o codir.o cofile.o
diff --git a/hw/9pfs/cofile.c b/hw/9pfs/cofile.c
index 120e267108..88791bc327 100644
--- a/hw/9pfs/cofile.c
+++ b/hw/9pfs/cofile.c
@@ -247,6 +247,7 @@ int coroutine_fn v9fs_co_pwritev(V9fsPDU *pdu, V9fsFidState *fidp,
     if (v9fs_request_cancelled(pdu)) {
         return -EINTR;
     }
+    fsdev_co_throttle_request(s->ctx.fst, true, iov, iovcnt);
     v9fs_co_run_in_worker(
         {
             err = s->ops->pwritev(&s->ctx, &fidp->fs, iov, iovcnt, offset);
@@ -266,6 +267,7 @@ int coroutine_fn v9fs_co_preadv(V9fsPDU *pdu, V9fsFidState *fidp,
     if (v9fs_request_cancelled(pdu)) {
         return -EINTR;
     }
+    fsdev_co_throttle_request(s->ctx.fst, false, iov, iovcnt);
     v9fs_co_run_in_worker(
         {
             err = s->ops->preadv(&s->ctx, &fidp->fs, iov, iovcnt, offset);
diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs
index 6acf79860a..11c35bcb44 100644
--- a/hw/acpi/Makefile.objs
+++ b/hw/acpi/Makefile.objs
@@ -5,6 +5,7 @@ common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu_hotplug.o
 common-obj-$(CONFIG_ACPI_MEMORY_HOTPLUG) += memory_hotplug.o
 common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu.o
 common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o
+common-obj-$(CONFIG_ACPI_VMGENID) += vmgenid.o
 common-obj-$(call lnot,$(CONFIG_ACPI_X86)) += acpi-stub.o
 
 common-obj-y += acpi_interface.o
diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index b2a1e4033b..c6f2032dec 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -1559,6 +1559,7 @@ void acpi_build_tables_init(AcpiBuildTables *tables)
     tables->rsdp = g_array_new(false, true /* clear */, 1);
     tables->table_data = g_array_new(false, true /* clear */, 1);
     tables->tcpalog = g_array_new(false, true /* clear */, 1);
+    tables->vmgenid = g_array_new(false, true /* clear */, 1);
     tables->linker = bios_linker_loader_init();
 }
 
@@ -1568,6 +1569,7 @@ void acpi_build_tables_cleanup(AcpiBuildTables *tables, bool mfre)
     g_array_free(tables->rsdp, true);
     g_array_free(tables->table_data, true);
     g_array_free(tables->tcpalog, mfre);
+    g_array_free(tables->vmgenid, mfre);
 }
 
 /* Build rsdt table */
diff --git a/hw/acpi/bios-linker-loader.c b/hw/acpi/bios-linker-loader.c
index d963ebe241..046183a0f1 100644
--- a/hw/acpi/bios-linker-loader.c
+++ b/hw/acpi/bios-linker-loader.c
@@ -78,6 +78,21 @@ struct BiosLinkerLoaderEntry {
             uint32_t length;
         } cksum;
 
+        /*
+         * COMMAND_WRITE_POINTER - write the fw_cfg file (originating from
+         * @dest_file) at @wr_pointer.offset, by adding a pointer to
+         * @src_offset within the table originating from @src_file.
+         * 1,2,4 or 8 byte unsigned addition is used depending on
+         * @wr_pointer.size.
+         */
+        struct {
+            char dest_file[BIOS_LINKER_LOADER_FILESZ];
+            char src_file[BIOS_LINKER_LOADER_FILESZ];
+            uint32_t dst_offset;
+            uint32_t src_offset;
+            uint8_t size;
+        } wr_pointer;
+
         /* padding */
         char pad[124];
     };
@@ -85,9 +100,10 @@ struct BiosLinkerLoaderEntry {
 typedef struct BiosLinkerLoaderEntry BiosLinkerLoaderEntry;
 
 enum {
-    BIOS_LINKER_LOADER_COMMAND_ALLOCATE     = 0x1,
-    BIOS_LINKER_LOADER_COMMAND_ADD_POINTER  = 0x2,
-    BIOS_LINKER_LOADER_COMMAND_ADD_CHECKSUM = 0x3,
+    BIOS_LINKER_LOADER_COMMAND_ALLOCATE          = 0x1,
+    BIOS_LINKER_LOADER_COMMAND_ADD_POINTER       = 0x2,
+    BIOS_LINKER_LOADER_COMMAND_ADD_CHECKSUM      = 0x3,
+    BIOS_LINKER_LOADER_COMMAND_WRITE_POINTER     = 0x4,
 };
 
 enum {
@@ -278,3 +294,47 @@ void bios_linker_loader_add_pointer(BIOSLinker *linker,
 
     g_array_append_vals(linker->cmd_blob, &entry, sizeof entry);
 }
+
+/*
+ * bios_linker_loader_write_pointer: ask guest to write a pointer to the
+ * source file into the destination file, and write it back to QEMU via
+ * fw_cfg DMA.
+ *
+ * @linker: linker object instance
+ * @dest_file: destination file that must be written
+ * @dst_patched_offset: location within destination file blob to be patched
+ *                      with the pointer to @src_file, in bytes
+ * @dst_patched_offset_size: size of the pointer to be patched
+ *                      at @dst_patched_offset in @dest_file blob, in bytes
+ * @src_file: source file who's address must be taken
+ * @src_offset: location within source file blob to which
+ *              @dest_file+@dst_patched_offset will point to after
+ *              firmware's executed WRITE_POINTER command
+ */
+void bios_linker_loader_write_pointer(BIOSLinker *linker,
+                                    const char *dest_file,
+                                    uint32_t dst_patched_offset,
+                                    uint8_t dst_patched_size,
+                                    const char *src_file,
+                                    uint32_t src_offset)
+{
+    BiosLinkerLoaderEntry entry;
+    const BiosLinkerFileEntry *source_file =
+        bios_linker_find_file(linker, src_file);
+
+    assert(source_file);
+    assert(src_offset < source_file->blob->len);
+    memset(&entry, 0, sizeof entry);
+    strncpy(entry.wr_pointer.dest_file, dest_file,
+            sizeof entry.wr_pointer.dest_file - 1);
+    strncpy(entry.wr_pointer.src_file, src_file,
+            sizeof entry.wr_pointer.src_file - 1);
+    entry.command = cpu_to_le32(BIOS_LINKER_LOADER_COMMAND_WRITE_POINTER);
+    entry.wr_pointer.dst_offset = cpu_to_le32(dst_patched_offset);
+    entry.wr_pointer.src_offset = cpu_to_le32(src_offset);
+    entry.wr_pointer.size = dst_patched_size;
+    assert(dst_patched_size == 1 || dst_patched_size == 2 ||
+           dst_patched_size == 4 || dst_patched_size == 8);
+
+    g_array_append_vals(linker->cmd_blob, &entry, sizeof entry);
+}
diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c
index 6017ca04bf..8c719d3f9d 100644
--- a/hw/acpi/cpu.c
+++ b/hw/acpi/cpu.c
@@ -198,7 +198,7 @@ void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner,
     state->dev_count = id_list->len;
     state->devs = g_new0(typeof(*state->devs), state->dev_count);
     for (i = 0; i < id_list->len; i++) {
-        state->devs[i].cpu =  id_list->cpus[i].cpu;
+        state->devs[i].cpu =  CPU(id_list->cpus[i].cpu);
         state->devs[i].arch_id = id_list->cpus[i].arch_id;
     }
     memory_region_init_io(&state->ctrl_reg, owner, &cpu_hotplug_ops, state,
diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c
index d957d1e30d..2b0f3e1bfb 100644
--- a/hw/acpi/pcihp.c
+++ b/hw/acpi/pcihp.c
@@ -49,7 +49,6 @@
 
 #define ACPI_PCIHP_ADDR 0xae00
 #define ACPI_PCIHP_SIZE 0x0014
-#define ACPI_PCIHP_LEGACY_SIZE 0x000f
 #define PCI_UP_BASE 0x0000
 #define PCI_DOWN_BASE 0x0004
 #define PCI_EJ_BASE 0x0008
@@ -302,16 +301,6 @@ void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus,
     s->root= root_bus;
     s->legacy_piix = !bridges_enabled;
 
-    if (s->legacy_piix) {
-        unsigned *bus_bsel = g_malloc(sizeof *bus_bsel);
-
-        s->io_len = ACPI_PCIHP_LEGACY_SIZE;
-
-        *bus_bsel = ACPI_PCIHP_BSEL_DEFAULT;
-        object_property_add_uint32_ptr(OBJECT(root_bus), ACPI_PCIHP_PROP_BSEL,
-                                       bus_bsel, NULL);
-    }
-
     memory_region_init_io(&s->io, owner, &acpi_pcihp_io_ops, s,
                           "acpi-pci-hotplug", s->io_len);
     memory_region_add_subregion(address_space_io, s->io_base, &s->io);
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index 6d99fe407c..a553a7e110 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -440,6 +440,8 @@ static void piix4_update_bus_hotplug(PCIBus *pci_bus, void *opaque)
 {
     PIIX4PMState *s = opaque;
 
+    /* pci_bus cannot outlive PIIX4PMState, because /machine keeps it alive
+     * and it's not hot-unpluggable */
     qbus_set_hotplug_handler(BUS(pci_bus), DEVICE(s), &error_abort);
 }
 
diff --git a/hw/acpi/tco.c b/hw/acpi/tco.c
index 8ce7daf23a..b4adac88cd 100644
--- a/hw/acpi/tco.c
+++ b/hw/acpi/tco.c
@@ -49,6 +49,7 @@ static inline void tco_timer_reload(TCOIORegs *tr)
 static inline void tco_timer_stop(TCOIORegs *tr)
 {
     tr->expire_time = -1;
+    timer_del(tr->tco_timer);
 }
 
 static void tco_timer_expired(void *opaque)
diff --git a/hw/acpi/vmgenid.c b/hw/acpi/vmgenid.c
new file mode 100644
index 0000000000..744f2847da
--- /dev/null
+++ b/hw/acpi/vmgenid.c
@@ -0,0 +1,258 @@
+/*
+ *  Virtual Machine Generation ID Device
+ *
+ *  Copyright (C) 2017 Skyport Systems.
+ *
+ *  Author: Ben Warren <ben@skyportsystems.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qmp-commands.h"
+#include "hw/acpi/acpi.h"
+#include "hw/acpi/aml-build.h"
+#include "hw/acpi/vmgenid.h"
+#include "hw/nvram/fw_cfg.h"
+#include "sysemu/sysemu.h"
+
+void vmgenid_build_acpi(VmGenIdState *vms, GArray *table_data, GArray *guid,
+                        BIOSLinker *linker)
+{
+    Aml *ssdt, *dev, *scope, *method, *addr, *if_ctx;
+    uint32_t vgia_offset;
+    QemuUUID guid_le;
+
+    /* Fill in the GUID values.  These need to be converted to little-endian
+     * first, since that's what the guest expects
+     */
+    g_array_set_size(guid, VMGENID_FW_CFG_SIZE - ARRAY_SIZE(guid_le.data));
+    guid_le = vms->guid;
+    qemu_uuid_bswap(&guid_le);
+    /* The GUID is written at a fixed offset into the fw_cfg file
+     * in order to implement the "OVMF SDT Header probe suppressor"
+     * see docs/specs/vmgenid.txt for more details
+     */
+    g_array_insert_vals(guid, VMGENID_GUID_OFFSET, guid_le.data,
+                        ARRAY_SIZE(guid_le.data));
+
+    /* Put this in a separate SSDT table */
+    ssdt = init_aml_allocator();
+
+    /* Reserve space for header */
+    acpi_data_push(ssdt->buf, sizeof(AcpiTableHeader));
+
+    /* Storage for the GUID address */
+    vgia_offset = table_data->len +
+        build_append_named_dword(ssdt->buf, "VGIA");
+    scope = aml_scope("\\_SB");
+    dev = aml_device("VGEN");
+    aml_append(dev, aml_name_decl("_HID", aml_string("QEMUVGID")));
+    aml_append(dev, aml_name_decl("_CID", aml_string("VM_Gen_Counter")));
+    aml_append(dev, aml_name_decl("_DDN", aml_string("VM_Gen_Counter")));
+
+    /* Simple status method to check that address is linked and non-zero */
+    method = aml_method("_STA", 0, AML_NOTSERIALIZED);
+    addr = aml_local(0);
+    aml_append(method, aml_store(aml_int(0xf), addr));
+    if_ctx = aml_if(aml_equal(aml_name("VGIA"), aml_int(0)));
+    aml_append(if_ctx, aml_store(aml_int(0), addr));
+    aml_append(method, if_ctx);
+    aml_append(method, aml_return(addr));
+    aml_append(dev, method);
+
+    /* the ADDR method returns two 32-bit words representing the lower and
+     * upper halves * of the physical address of the fw_cfg blob
+     * (holding the GUID)
+     */
+    method = aml_method("ADDR", 0, AML_NOTSERIALIZED);
+
+    addr = aml_local(0);
+    aml_append(method, aml_store(aml_package(2), addr));
+
+    aml_append(method, aml_store(aml_add(aml_name("VGIA"),
+                                         aml_int(VMGENID_GUID_OFFSET), NULL),
+                                 aml_index(addr, aml_int(0))));
+    aml_append(method, aml_store(aml_int(0), aml_index(addr, aml_int(1))));
+    aml_append(method, aml_return(addr));
+
+    aml_append(dev, method);
+    aml_append(scope, dev);
+    aml_append(ssdt, scope);
+
+    /* attach an ACPI notify */
+    method = aml_method("\\_GPE._E05", 0, AML_NOTSERIALIZED);
+    aml_append(method, aml_notify(aml_name("\\_SB.VGEN"), aml_int(0x80)));
+    aml_append(ssdt, method);
+
+    g_array_append_vals(table_data, ssdt->buf->data, ssdt->buf->len);
+
+    /* Allocate guest memory for the Data fw_cfg blob */
+    bios_linker_loader_alloc(linker, VMGENID_GUID_FW_CFG_FILE, guid, 4096,
+                             false /* page boundary, high memory */);
+
+    /* Patch address of GUID fw_cfg blob into the ADDR fw_cfg blob
+     * so QEMU can write the GUID there.  The address is expected to be
+     * < 4GB, but write 64 bits anyway.
+     * The address that is patched in is offset in order to implement
+     * the "OVMF SDT Header probe suppressor"
+     * see docs/specs/vmgenid.txt for more details.
+     */
+    bios_linker_loader_write_pointer(linker,
+        VMGENID_ADDR_FW_CFG_FILE, 0, sizeof(uint64_t),
+        VMGENID_GUID_FW_CFG_FILE, VMGENID_GUID_OFFSET);
+
+    /* Patch address of GUID fw_cfg blob into the AML so OSPM can retrieve
+     * and read it.  Note that while we provide storage for 64 bits, only
+     * the least-signficant 32 get patched into AML.
+     */
+    bios_linker_loader_add_pointer(linker,
+        ACPI_BUILD_TABLE_FILE, vgia_offset, sizeof(uint32_t),
+        VMGENID_GUID_FW_CFG_FILE, 0);
+
+    build_header(linker, table_data,
+        (void *)(table_data->data + table_data->len - ssdt->buf->len),
+        "SSDT", ssdt->buf->len, 1, NULL, "VMGENID");
+    free_aml_allocator();
+}
+
+void vmgenid_add_fw_cfg(VmGenIdState *vms, FWCfgState *s, GArray *guid)
+{
+    /* Create a read-only fw_cfg file for GUID */
+    fw_cfg_add_file(s, VMGENID_GUID_FW_CFG_FILE, guid->data,
+                    VMGENID_FW_CFG_SIZE);
+    /* Create a read-write fw_cfg file for Address */
+    fw_cfg_add_file_callback(s, VMGENID_ADDR_FW_CFG_FILE, NULL, NULL,
+                             vms->vmgenid_addr_le,
+                             ARRAY_SIZE(vms->vmgenid_addr_le), false);
+}
+
+static void vmgenid_update_guest(VmGenIdState *vms)
+{
+    Object *obj = object_resolve_path_type("", TYPE_ACPI_DEVICE_IF, NULL);
+    uint32_t vmgenid_addr;
+    QemuUUID guid_le;
+
+    if (obj) {
+        /* Write the GUID to guest memory */
+        memcpy(&vmgenid_addr, vms->vmgenid_addr_le, sizeof(vmgenid_addr));
+        vmgenid_addr = le32_to_cpu(vmgenid_addr);
+        /* A zero value in vmgenid_addr means that BIOS has not yet written
+         * the address
+         */
+        if (vmgenid_addr) {
+            /* QemuUUID has the first three words as big-endian, and expect
+             * that any GUIDs passed in will always be BE.  The guest,
+             * however, will expect the fields to be little-endian.
+             * Perform a byte swap immediately before writing.
+             */
+            guid_le = vms->guid;
+            qemu_uuid_bswap(&guid_le);
+            /* The GUID is written at a fixed offset into the fw_cfg file
+             * in order to implement the "OVMF SDT Header probe suppressor"
+             * see docs/specs/vmgenid.txt for more details.
+             */
+            cpu_physical_memory_write(vmgenid_addr, guid_le.data,
+                                      sizeof(guid_le.data));
+            /* Send _GPE.E05 event */
+            acpi_send_event(DEVICE(obj), ACPI_VMGENID_CHANGE_STATUS);
+        }
+    }
+}
+
+static void vmgenid_set_guid(Object *obj, const char *value, Error **errp)
+{
+    VmGenIdState *vms = VMGENID(obj);
+
+    if (!strcmp(value, "auto")) {
+        qemu_uuid_generate(&vms->guid);
+    } else if (qemu_uuid_parse(value, &vms->guid) < 0) {
+        error_setg(errp, "'%s. %s': Failed to parse GUID string: %s",
+                   object_get_typename(OBJECT(vms)), VMGENID_GUID, value);
+        return;
+    }
+
+    vmgenid_update_guest(vms);
+}
+
+/* After restoring an image, we need to update the guest memory and notify
+ * it of a potential change to VM Generation ID
+ */
+static int vmgenid_post_load(void *opaque, int version_id)
+{
+    VmGenIdState *vms = opaque;
+    vmgenid_update_guest(vms);
+    return 0;
+}
+
+static const VMStateDescription vmstate_vmgenid = {
+    .name = "vmgenid",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .post_load = vmgenid_post_load,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8_ARRAY(vmgenid_addr_le, VmGenIdState, sizeof(uint64_t)),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static void vmgenid_handle_reset(void *opaque)
+{
+    VmGenIdState *vms = VMGENID(opaque);
+    /* Clear the guest-allocated GUID address when the VM resets */
+    memset(vms->vmgenid_addr_le, 0, ARRAY_SIZE(vms->vmgenid_addr_le));
+}
+
+static void vmgenid_realize(DeviceState *dev, Error **errp)
+{
+    VmGenIdState *vms = VMGENID(dev);
+    qemu_register_reset(vmgenid_handle_reset, vms);
+}
+
+static void vmgenid_device_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->vmsd = &vmstate_vmgenid;
+    dc->realize = vmgenid_realize;
+    dc->hotpluggable = false;
+
+    object_class_property_add_str(klass, VMGENID_GUID, NULL,
+                                  vmgenid_set_guid, NULL);
+    object_class_property_set_description(klass, VMGENID_GUID,
+                                    "Set Global Unique Identifier "
+                                    "(big-endian) or auto for random value",
+                                    NULL);
+}
+
+static const TypeInfo vmgenid_device_info = {
+    .name          = VMGENID_DEVICE,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(VmGenIdState),
+    .class_init    = vmgenid_device_class_init,
+};
+
+static void vmgenid_register_types(void)
+{
+    type_register_static(&vmgenid_device_info);
+}
+
+type_init(vmgenid_register_types)
+
+GuidInfo *qmp_query_vm_generation_id(Error **errp)
+{
+    GuidInfo *info;
+    VmGenIdState *vms;
+    Object *obj = find_vmgenid_dev();
+
+    if (!obj) {
+        return NULL;
+    }
+    vms = VMGENID(obj);
+
+    info = g_malloc0(sizeof(*info));
+    info->guid = qemu_uuid_unparse_strdup(&vms->guid);
+    return info;
+}
diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c
index 0c9ca7bfa0..c8a11f2b53 100644
--- a/hw/arm/armv7m.c
+++ b/hw/arm/armv7m.c
@@ -8,6 +8,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "hw/arm/armv7m.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "cpu.h"
@@ -17,147 +18,260 @@
 #include "elf.h"
 #include "sysemu/qtest.h"
 #include "qemu/error-report.h"
+#include "exec/address-spaces.h"
 
 /* Bitbanded IO.  Each word corresponds to a single bit.  */
 
 /* Get the byte address of the real memory for a bitband access.  */
-static inline uint32_t bitband_addr(void * opaque, uint32_t addr)
+static inline hwaddr bitband_addr(BitBandState *s, hwaddr offset)
 {
-    uint32_t res;
-
-    res = *(uint32_t *)opaque;
-    res |= (addr & 0x1ffffff) >> 5;
-    return res;
-
+    return s->base | (offset & 0x1ffffff) >> 5;
 }
 
-static uint32_t bitband_readb(void *opaque, hwaddr offset)
+static MemTxResult bitband_read(void *opaque, hwaddr offset,
+                                uint64_t *data, unsigned size, MemTxAttrs attrs)
 {
-    uint8_t v;
-    cpu_physical_memory_read(bitband_addr(opaque, offset), &v, 1);
-    return (v & (1 << ((offset >> 2) & 7))) != 0;
+    BitBandState *s = opaque;
+    uint8_t buf[4];
+    MemTxResult res;
+    int bitpos, bit;
+    hwaddr addr;
+
+    assert(size <= 4);
+
+    /* Find address in underlying memory and round down to multiple of size */
+    addr = bitband_addr(s, offset) & (-size);
+    res = address_space_read(s->source_as, addr, attrs, buf, size);
+    if (res) {
+        return res;
+    }
+    /* Bit position in the N bytes read... */
+    bitpos = (offset >> 2) & ((size * 8) - 1);
+    /* ...converted to byte in buffer and bit in byte */
+    bit = (buf[bitpos >> 3] >> (bitpos & 7)) & 1;
+    *data = bit;
+    return MEMTX_OK;
 }
 
-static void bitband_writeb(void *opaque, hwaddr offset,
-                           uint32_t value)
+static MemTxResult bitband_write(void *opaque, hwaddr offset, uint64_t value,
+                                 unsigned size, MemTxAttrs attrs)
 {
-    uint32_t addr;
-    uint8_t mask;
-    uint8_t v;
-    addr = bitband_addr(opaque, offset);
-    mask = (1 << ((offset >> 2) & 7));
-    cpu_physical_memory_read(addr, &v, 1);
-    if (value & 1)
-        v |= mask;
-    else
-        v &= ~mask;
-    cpu_physical_memory_write(addr, &v, 1);
+    BitBandState *s = opaque;
+    uint8_t buf[4];
+    MemTxResult res;
+    int bitpos, bit;
+    hwaddr addr;
+
+    assert(size <= 4);
+
+    /* Find address in underlying memory and round down to multiple of size */
+    addr = bitband_addr(s, offset) & (-size);
+    res = address_space_read(s->source_as, addr, attrs, buf, size);
+    if (res) {
+        return res;
+    }
+    /* Bit position in the N bytes read... */
+    bitpos = (offset >> 2) & ((size * 8) - 1);
+    /* ...converted to byte in buffer and bit in byte */
+    bit = 1 << (bitpos & 7);
+    if (value & 1) {
+        buf[bitpos >> 3] |= bit;
+    } else {
+        buf[bitpos >> 3] &= ~bit;
+    }
+    return address_space_write(s->source_as, addr, attrs, buf, size);
 }
 
-static uint32_t bitband_readw(void *opaque, hwaddr offset)
+static const MemoryRegionOps bitband_ops = {
+    .read_with_attrs = bitband_read,
+    .write_with_attrs = bitband_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .impl.min_access_size = 1,
+    .impl.max_access_size = 4,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 4,
+};
+
+static void bitband_init(Object *obj)
 {
-    uint32_t addr;
-    uint16_t mask;
-    uint16_t v;
-    addr = bitband_addr(opaque, offset) & ~1;
-    mask = (1 << ((offset >> 2) & 15));
-    mask = tswap16(mask);
-    cpu_physical_memory_read(addr, &v, 2);
-    return (v & mask) != 0;
+    BitBandState *s = BITBAND(obj);
+    SysBusDevice *dev = SYS_BUS_DEVICE(obj);
+
+    object_property_add_link(obj, "source-memory",
+                             TYPE_MEMORY_REGION,
+                             (Object **)&s->source_memory,
+                             qdev_prop_allow_set_link_before_realize,
+                             OBJ_PROP_LINK_UNREF_ON_RELEASE,
+                             &error_abort);
+    memory_region_init_io(&s->iomem, obj, &bitband_ops, s,
+                          "bitband", 0x02000000);
+    sysbus_init_mmio(dev, &s->iomem);
 }
 
-static void bitband_writew(void *opaque, hwaddr offset,
-                           uint32_t value)
+static void bitband_realize(DeviceState *dev, Error **errp)
 {
-    uint32_t addr;
-    uint16_t mask;
-    uint16_t v;
-    addr = bitband_addr(opaque, offset) & ~1;
-    mask = (1 << ((offset >> 2) & 15));
-    mask = tswap16(mask);
-    cpu_physical_memory_read(addr, &v, 2);
-    if (value & 1)
-        v |= mask;
-    else
-        v &= ~mask;
-    cpu_physical_memory_write(addr, &v, 2);
+    BitBandState *s = BITBAND(dev);
+
+    if (!s->source_memory) {
+        error_setg(errp, "source-memory property not set");
+        return;
+    }
+
+    s->source_as = address_space_init_shareable(s->source_memory,
+                                                "bitband-source");
 }
 
-static uint32_t bitband_readl(void *opaque, hwaddr offset)
+/* Board init.  */
+
+static const hwaddr bitband_input_addr[ARMV7M_NUM_BITBANDS] = {
+    0x20000000, 0x40000000
+};
+
+static const hwaddr bitband_output_addr[ARMV7M_NUM_BITBANDS] = {
+    0x22000000, 0x42000000
+};
+
+static void armv7m_instance_init(Object *obj)
 {
-    uint32_t addr;
-    uint32_t mask;
-    uint32_t v;
-    addr = bitband_addr(opaque, offset) & ~3;
-    mask = (1 << ((offset >> 2) & 31));
-    mask = tswap32(mask);
-    cpu_physical_memory_read(addr, &v, 4);
-    return (v & mask) != 0;
+    ARMv7MState *s = ARMV7M(obj);
+    int i;
+
+    /* Can't init the cpu here, we don't yet know which model to use */
+
+    object_property_add_link(obj, "memory",
+                             TYPE_MEMORY_REGION,
+                             (Object **)&s->board_memory,
+                             qdev_prop_allow_set_link_before_realize,
+                             OBJ_PROP_LINK_UNREF_ON_RELEASE,
+                             &error_abort);
+    memory_region_init(&s->container, obj, "armv7m-container", UINT64_MAX);
+
+    object_initialize(&s->nvic, sizeof(s->nvic), "armv7m_nvic");
+    qdev_set_parent_bus(DEVICE(&s->nvic), sysbus_get_default());
+    object_property_add_alias(obj, "num-irq",
+                              OBJECT(&s->nvic), "num-irq", &error_abort);
+
+    for (i = 0; i < ARRAY_SIZE(s->bitband); i++) {
+        object_initialize(&s->bitband[i], sizeof(s->bitband[i]), TYPE_BITBAND);
+        qdev_set_parent_bus(DEVICE(&s->bitband[i]), sysbus_get_default());
+    }
 }
 
-static void bitband_writel(void *opaque, hwaddr offset,
-                           uint32_t value)
+static void armv7m_realize(DeviceState *dev, Error **errp)
 {
-    uint32_t addr;
-    uint32_t mask;
-    uint32_t v;
-    addr = bitband_addr(opaque, offset) & ~3;
-    mask = (1 << ((offset >> 2) & 31));
-    mask = tswap32(mask);
-    cpu_physical_memory_read(addr, &v, 4);
-    if (value & 1)
-        v |= mask;
-    else
-        v &= ~mask;
-    cpu_physical_memory_write(addr, &v, 4);
-}
+    ARMv7MState *s = ARMV7M(dev);
+    SysBusDevice *sbd;
+    Error *err = NULL;
+    int i;
+    char **cpustr;
+    ObjectClass *oc;
+    const char *typename;
+    CPUClass *cc;
+
+    if (!s->board_memory) {
+        error_setg(errp, "memory property was not set");
+        return;
+    }
 
-static const MemoryRegionOps bitband_ops = {
-    .old_mmio = {
-        .read = { bitband_readb, bitband_readw, bitband_readl, },
-        .write = { bitband_writeb, bitband_writew, bitband_writel, },
-    },
-    .endianness = DEVICE_NATIVE_ENDIAN,
-};
+    memory_region_add_subregion_overlap(&s->container, 0, s->board_memory, -1);
 
-#define TYPE_BITBAND "ARM,bitband-memory"
-#define BITBAND(obj) OBJECT_CHECK(BitBandState, (obj), TYPE_BITBAND)
+    cpustr = g_strsplit(s->cpu_model, ",", 2);
 
-typedef struct {
-    /*< private >*/
-    SysBusDevice parent_obj;
-    /*< public >*/
+    oc = cpu_class_by_name(TYPE_ARM_CPU, cpustr[0]);
+    if (!oc) {
+        error_setg(errp, "Unknown CPU model %s", cpustr[0]);
+        g_strfreev(cpustr);
+        return;
+    }
 
-    MemoryRegion iomem;
-    uint32_t base;
-} BitBandState;
+    cc = CPU_CLASS(oc);
+    typename = object_class_get_name(oc);
+    cc->parse_features(typename, cpustr[1], &err);
+    g_strfreev(cpustr);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
 
-static void bitband_init(Object *obj)
-{
-    BitBandState *s = BITBAND(obj);
-    SysBusDevice *dev = SYS_BUS_DEVICE(obj);
+    s->cpu = ARM_CPU(object_new(typename));
+    if (!s->cpu) {
+        error_setg(errp, "Unknown CPU model %s", s->cpu_model);
+        return;
+    }
 
-    memory_region_init_io(&s->iomem, obj, &bitband_ops, &s->base,
-                          "bitband", 0x02000000);
-    sysbus_init_mmio(dev, &s->iomem);
+    object_property_set_link(OBJECT(s->cpu), OBJECT(&s->container), "memory",
+                             &error_abort);
+    object_property_set_bool(OBJECT(s->cpu), true, "realized", &err);
+    if (err != NULL) {
+        error_propagate(errp, err);
+        return;
+    }
+
+    /* Note that we must realize the NVIC after the CPU */
+    object_property_set_bool(OBJECT(&s->nvic), true, "realized", &err);
+    if (err != NULL) {
+        error_propagate(errp, err);
+        return;
+    }
+
+    /* Alias the NVIC's input and output GPIOs as our own so the board
+     * code can wire them up. (We do this in realize because the
+     * NVIC doesn't create the input GPIO array until realize.)
+     */
+    qdev_pass_gpios(DEVICE(&s->nvic), dev, NULL);
+    qdev_pass_gpios(DEVICE(&s->nvic), dev, "SYSRESETREQ");
+
+    /* Wire the NVIC up to the CPU */
+    sbd = SYS_BUS_DEVICE(&s->nvic);
+    sysbus_connect_irq(sbd, 0,
+                       qdev_get_gpio_in(DEVICE(s->cpu), ARM_CPU_IRQ));
+    s->cpu->env.nvic = &s->nvic;
+
+    memory_region_add_subregion(&s->container, 0xe000e000,
+                                sysbus_mmio_get_region(sbd, 0));
+
+    for (i = 0; i < ARRAY_SIZE(s->bitband); i++) {
+        Object *obj = OBJECT(&s->bitband[i]);
+        SysBusDevice *sbd = SYS_BUS_DEVICE(&s->bitband[i]);
+
+        object_property_set_int(obj, bitband_input_addr[i], "base", &err);
+        if (err != NULL) {
+            error_propagate(errp, err);
+            return;
+        }
+        object_property_set_link(obj, OBJECT(s->board_memory),
+                                 "source-memory", &error_abort);
+        object_property_set_bool(obj, true, "realized", &err);
+        if (err != NULL) {
+            error_propagate(errp, err);
+            return;
+        }
+
+        memory_region_add_subregion(&s->container, bitband_output_addr[i],
+                                    sysbus_mmio_get_region(sbd, 0));
+    }
 }
 
-static void armv7m_bitband_init(void)
-{
-    DeviceState *dev;
+static Property armv7m_properties[] = {
+    DEFINE_PROP_STRING("cpu-model", ARMv7MState, cpu_model),
+    DEFINE_PROP_END_OF_LIST(),
+};
 
-    dev = qdev_create(NULL, TYPE_BITBAND);
-    qdev_prop_set_uint32(dev, "base", 0x20000000);
-    qdev_init_nofail(dev);
-    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0x22000000);
+static void armv7m_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
-    dev = qdev_create(NULL, TYPE_BITBAND);
-    qdev_prop_set_uint32(dev, "base", 0x40000000);
-    qdev_init_nofail(dev);
-    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0x42000000);
+    dc->realize = armv7m_realize;
+    dc->props = armv7m_properties;
 }
 
-/* Board init.  */
+static const TypeInfo armv7m_info = {
+    .name = TYPE_ARMV7M,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(ARMv7MState),
+    .instance_init = armv7m_instance_init,
+    .class_init = armv7m_class_init,
+};
 
 static void armv7m_reset(void *opaque)
 {
@@ -168,37 +282,35 @@ static void armv7m_reset(void *opaque)
 
 /* Init CPU and memory for a v7-M based board.
    mem_size is in bytes.
-   Returns the NVIC array.  */
+   Returns the ARMv7M device.  */
 
 DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq,
                       const char *kernel_filename, const char *cpu_model)
 {
-    ARMCPU *cpu;
-    CPUARMState *env;
-    DeviceState *nvic;
-    int image_size;
-    uint64_t entry;
-    uint64_t lowaddr;
-    int big_endian;
+    DeviceState *armv7m;
 
     if (cpu_model == NULL) {
-	cpu_model = "cortex-m3";
-    }
-    cpu = cpu_arm_init(cpu_model);
-    if (cpu == NULL) {
-        fprintf(stderr, "Unable to find CPU definition\n");
-        exit(1);
+        cpu_model = "cortex-m3";
     }
-    env = &cpu->env;
 
-    armv7m_bitband_init();
+    armv7m = qdev_create(NULL, "armv7m");
+    qdev_prop_set_uint32(armv7m, "num-irq", num_irq);
+    qdev_prop_set_string(armv7m, "cpu-model", cpu_model);
+    object_property_set_link(OBJECT(armv7m), OBJECT(get_system_memory()),
+                                     "memory", &error_abort);
+    /* This will exit with an error if the user passed us a bad cpu_model */
+    qdev_init_nofail(armv7m);
+
+    armv7m_load_kernel(ARM_CPU(first_cpu), kernel_filename, mem_size);
+    return armv7m;
+}
 
-    nvic = qdev_create(NULL, "armv7m_nvic");
-    qdev_prop_set_uint32(nvic, "num-irq", num_irq);
-    env->nvic = nvic;
-    qdev_init_nofail(nvic);
-    sysbus_connect_irq(SYS_BUS_DEVICE(nvic), 0,
-                       qdev_get_gpio_in(DEVICE(cpu), ARM_CPU_IRQ));
+void armv7m_load_kernel(ARMCPU *cpu, const char *kernel_filename, int mem_size)
+{
+    int image_size;
+    uint64_t entry;
+    uint64_t lowaddr;
+    int big_endian;
 
 #ifdef TARGET_WORDS_BIGENDIAN
     big_endian = 1;
@@ -224,8 +336,15 @@ DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq,
         }
     }
 
+    /* CPU objects (unlike devices) are not automatically reset on system
+     * reset, so we must always register a handler to do so. Unlike
+     * A-profile CPUs, we don't need to do anything special in the
+     * handler to arrange that it starts correctly.
+     * This is arguably the wrong place to do this, but it matches the
+     * way A-profile does it. Note that this means that every M profile
+     * board must call this function!
+     */
     qemu_register_reset(armv7m_reset, cpu);
-    return nvic;
 }
 
 static Property bitband_properties[] = {
@@ -237,6 +356,7 @@ static void bitband_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
 
+    dc->realize = bitband_realize;
     dc->props = bitband_properties;
 }
 
@@ -251,6 +371,7 @@ static const TypeInfo bitband_info = {
 static void armv7m_register_types(void)
 {
     type_register_static(&bitband_info);
+    type_register_static(&armv7m_info);
 }
 
 type_init(armv7m_register_types)
diff --git a/hw/arm/bcm2835_peripherals.c b/hw/arm/bcm2835_peripherals.c
index 2e641a3989..369ef1e3bd 100644
--- a/hw/arm/bcm2835_peripherals.c
+++ b/hw/arm/bcm2835_peripherals.c
@@ -86,11 +86,21 @@ static void bcm2835_peripherals_init(Object *obj)
     object_property_add_const_link(OBJECT(&s->property), "dma-mr",
                                    OBJECT(&s->gpu_bus_mr), &error_abort);
 
+    /* Random Number Generator */
+    object_initialize(&s->rng, sizeof(s->rng), TYPE_BCM2835_RNG);
+    object_property_add_child(obj, "rng", OBJECT(&s->rng), NULL);
+    qdev_set_parent_bus(DEVICE(&s->rng), sysbus_get_default());
+
     /* Extended Mass Media Controller */
     object_initialize(&s->sdhci, sizeof(s->sdhci), TYPE_SYSBUS_SDHCI);
     object_property_add_child(obj, "sdhci", OBJECT(&s->sdhci), NULL);
     qdev_set_parent_bus(DEVICE(&s->sdhci), sysbus_get_default());
 
+    /* SDHOST */
+    object_initialize(&s->sdhost, sizeof(s->sdhost), TYPE_BCM2835_SDHOST);
+    object_property_add_child(obj, "sdhost", OBJECT(&s->sdhost), NULL);
+    qdev_set_parent_bus(DEVICE(&s->sdhost), sysbus_get_default());
+
     /* DMA Channels */
     object_initialize(&s->dma, sizeof(s->dma), TYPE_BCM2835_DMA);
     object_property_add_child(obj, "dma", OBJECT(&s->dma), NULL);
@@ -98,6 +108,16 @@ static void bcm2835_peripherals_init(Object *obj)
 
     object_property_add_const_link(OBJECT(&s->dma), "dma-mr",
                                    OBJECT(&s->gpu_bus_mr), &error_abort);
+
+    /* GPIO */
+    object_initialize(&s->gpio, sizeof(s->gpio), TYPE_BCM2835_GPIO);
+    object_property_add_child(obj, "gpio", OBJECT(&s->gpio), NULL);
+    qdev_set_parent_bus(DEVICE(&s->gpio), sysbus_get_default());
+
+    object_property_add_const_link(OBJECT(&s->gpio), "sdbus-sdhci",
+                                   OBJECT(&s->sdhci.sdbus), &error_abort);
+    object_property_add_const_link(OBJECT(&s->gpio), "sdbus-sdhost",
+                                   OBJECT(&s->sdhost.sdbus), &error_abort);
 }
 
 static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
@@ -226,6 +246,16 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
     sysbus_connect_irq(SYS_BUS_DEVICE(&s->property), 0,
                       qdev_get_gpio_in(DEVICE(&s->mboxes), MBOX_CHAN_PROPERTY));
 
+    /* Random Number Generator */
+    object_property_set_bool(OBJECT(&s->rng), true, "realized", &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+
+    memory_region_add_subregion(&s->peri_mr, RNG_OFFSET,
+                sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->rng), 0));
+
     /* Extended Mass Media Controller */
     object_property_set_int(OBJECT(&s->sdhci), BCM2835_SDHC_CAPAREG, "capareg",
                             &err);
@@ -252,13 +282,20 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
     sysbus_connect_irq(SYS_BUS_DEVICE(&s->sdhci), 0,
         qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ,
                                INTERRUPT_ARASANSDIO));
-    object_property_add_alias(OBJECT(s), "sd-bus", OBJECT(&s->sdhci), "sd-bus",
-                              &err);
+
+    /* SDHOST */
+    object_property_set_bool(OBJECT(&s->sdhost), true, "realized", &err);
     if (err) {
         error_propagate(errp, err);
         return;
     }
 
+    memory_region_add_subregion(&s->peri_mr, MMCI0_OFFSET,
+                sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->sdhost), 0));
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->sdhost), 0,
+        qdev_get_gpio_in_named(DEVICE(&s->ic), BCM2835_IC_GPU_IRQ,
+                               INTERRUPT_SDIO));
+
     /* DMA Channels */
     object_property_set_bool(OBJECT(&s->dma), true, "realized", &err);
     if (err) {
@@ -277,6 +314,23 @@ static void bcm2835_peripherals_realize(DeviceState *dev, Error **errp)
                                                   BCM2835_IC_GPU_IRQ,
                                                   INTERRUPT_DMA0 + n));
     }
+
+    /* GPIO */
+    object_property_set_bool(OBJECT(&s->gpio), true, "realized", &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+
+    memory_region_add_subregion(&s->peri_mr, GPIO_OFFSET,
+                sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->gpio), 0));
+
+    object_property_add_alias(OBJECT(s), "sd-bus", OBJECT(&s->gpio), "sd-bus",
+                              &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
 }
 
 static void bcm2835_peripherals_class_init(ObjectClass *oc, void *data)
diff --git a/hw/arm/exynos4210.c b/hw/arm/exynos4210.c
index be3c96d21e..1d2b50cc4e 100644
--- a/hw/arm/exynos4210.c
+++ b/hw/arm/exynos4210.c
@@ -24,6 +24,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
+#include "qemu/log.h"
 #include "cpu.h"
 #include "hw/boards.h"
 #include "sysemu/sysemu.h"
@@ -74,6 +75,9 @@
 /* PMU SFR base address */
 #define EXYNOS4210_PMU_BASE_ADDR            0x10020000
 
+/* Clock controller SFR base address */
+#define EXYNOS4210_CLK_BASE_ADDR            0x10030000
+
 /* Display controllers (FIMD) */
 #define EXYNOS4210_FIMD0_BASE_ADDR          0x11C00000
 
@@ -138,6 +142,16 @@ void exynos4210_write_secondary(ARMCPU *cpu,
                        info->smp_loader_start);
 }
 
+static uint64_t exynos4210_calc_affinity(int cpu)
+{
+    uint64_t mp_affinity;
+
+    /* Exynos4210 has 0x9 as cluster ID */
+    mp_affinity = (0x9 << ARM_AFF1_SHIFT) | cpu;
+
+    return mp_affinity;
+}
+
 Exynos4210State *exynos4210_init(MemoryRegion *system_mem,
         unsigned long ram_size)
 {
@@ -163,6 +177,8 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem,
         }
 
         s->cpu[n] = ARM_CPU(cpuobj);
+        object_property_set_int(cpuobj, exynos4210_calc_affinity(n),
+                                "mp-affinity", &error_abort);
         object_property_set_int(cpuobj, EXYNOS4210_SMP_PRIVATE_BASE_ADDR,
                                 "reset-cbar", &error_abort);
         object_property_set_bool(cpuobj, true, "realized", &error_fatal);
@@ -297,6 +313,8 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem,
     */
     sysbus_create_simple("exynos4210.pmu", EXYNOS4210_PMU_BASE_ADDR, NULL);
 
+    sysbus_create_simple("exynos4210.clk", EXYNOS4210_CLK_BASE_ADDR, NULL);
+
     /* PWM */
     sysbus_create_varargs("exynos4210.pwm", EXYNOS4210_PWM_BASE_ADDR,
                           s->irq_table[exynos4210_get_irq(22, 0)],
diff --git a/hw/arm/netduino2.c b/hw/arm/netduino2.c
index 23d792837f..3cfe332dd1 100644
--- a/hw/arm/netduino2.c
+++ b/hw/arm/netduino2.c
@@ -27,17 +27,18 @@
 #include "hw/boards.h"
 #include "qemu/error-report.h"
 #include "hw/arm/stm32f205_soc.h"
+#include "hw/arm/arm.h"
 
 static void netduino2_init(MachineState *machine)
 {
     DeviceState *dev;
 
     dev = qdev_create(NULL, TYPE_STM32F205_SOC);
-    if (machine->kernel_filename) {
-        qdev_prop_set_string(dev, "kernel-filename", machine->kernel_filename);
-    }
     qdev_prop_set_string(dev, "cpu-model", "cortex-m3");
     object_property_set_bool(OBJECT(dev), true, "realized", &error_fatal);
+
+    armv7m_load_kernel(ARM_CPU(first_cpu), machine->kernel_filename,
+                       FLASH_SIZE);
 }
 
 static void netduino2_machine_init(MachineClass *mc)
diff --git a/hw/arm/stm32f205_soc.c b/hw/arm/stm32f205_soc.c
index 38425bda6c..6e1260d2ed 100644
--- a/hw/arm/stm32f205_soc.c
+++ b/hw/arm/stm32f205_soc.c
@@ -49,6 +49,9 @@ static void stm32f205_soc_initfn(Object *obj)
     STM32F205State *s = STM32F205_SOC(obj);
     int i;
 
+    object_initialize(&s->armv7m, sizeof(s->armv7m), TYPE_ARMV7M);
+    qdev_set_parent_bus(DEVICE(&s->armv7m), sysbus_get_default());
+
     object_initialize(&s->syscfg, sizeof(s->syscfg), TYPE_STM32F2XX_SYSCFG);
     qdev_set_parent_bus(DEVICE(&s->syscfg), sysbus_get_default());
 
@@ -82,7 +85,7 @@ static void stm32f205_soc_initfn(Object *obj)
 static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
 {
     STM32F205State *s = STM32F205_SOC(dev_soc);
-    DeviceState *dev, *nvic;
+    DeviceState *dev, *armv7m;
     SysBusDevice *busdev;
     Error *err = NULL;
     int i;
@@ -110,8 +113,16 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
     vmstate_register_ram_global(sram);
     memory_region_add_subregion(system_memory, SRAM_BASE_ADDRESS, sram);
 
-    nvic = armv7m_init(get_system_memory(), FLASH_SIZE, 96,
-                       s->kernel_filename, s->cpu_model);
+    armv7m = DEVICE(&s->armv7m);
+    qdev_prop_set_uint32(armv7m, "num-irq", 96);
+    qdev_prop_set_string(armv7m, "cpu-model", s->cpu_model);
+    object_property_set_link(OBJECT(&s->armv7m), OBJECT(get_system_memory()),
+                                     "memory", &error_abort);
+    object_property_set_bool(OBJECT(&s->armv7m), true, "realized", &err);
+    if (err != NULL) {
+        error_propagate(errp, err);
+        return;
+    }
 
     /* System configuration controller */
     dev = DEVICE(&s->syscfg);
@@ -122,7 +133,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
     }
     busdev = SYS_BUS_DEVICE(dev);
     sysbus_mmio_map(busdev, 0, 0x40013800);
-    sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, 71));
+    sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, 71));
 
     /* Attach UART (uses USART registers) and USART controllers */
     for (i = 0; i < STM_NUM_USARTS; i++) {
@@ -136,7 +147,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
         }
         busdev = SYS_BUS_DEVICE(dev);
         sysbus_mmio_map(busdev, 0, usart_addr[i]);
-        sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, usart_irq[i]));
+        sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, usart_irq[i]));
     }
 
     /* Timer 2 to 5 */
@@ -150,7 +161,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
         }
         busdev = SYS_BUS_DEVICE(dev);
         sysbus_mmio_map(busdev, 0, timer_addr[i]);
-        sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, timer_irq[i]));
+        sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, timer_irq[i]));
     }
 
     /* ADC 1 to 3 */
@@ -162,7 +173,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
         return;
     }
     qdev_connect_gpio_out(DEVICE(s->adc_irqs), 0,
-                          qdev_get_gpio_in(nvic, ADC_IRQ));
+                          qdev_get_gpio_in(armv7m, ADC_IRQ));
 
     for (i = 0; i < STM_NUM_ADCS; i++) {
         dev = DEVICE(&(s->adc[i]));
@@ -187,12 +198,11 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
         }
         busdev = SYS_BUS_DEVICE(dev);
         sysbus_mmio_map(busdev, 0, spi_addr[i]);
-        sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, spi_irq[i]));
+        sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m, spi_irq[i]));
     }
 }
 
 static Property stm32f205_soc_properties[] = {
-    DEFINE_PROP_STRING("kernel-filename", STM32F205State, kernel_filename),
     DEFINE_PROP_STRING("cpu-model", STM32F205State, cpu_model),
     DEFINE_PROP_END_OF_LIST(),
 };
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index f3440f2ccb..5f62a0321e 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -535,7 +535,6 @@ static void create_v2m(VirtMachineState *vms, qemu_irq *pic)
 static void create_gic(VirtMachineState *vms, qemu_irq *pic)
 {
     /* We create a standalone GIC */
-    VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
     DeviceState *gicdev;
     SysBusDevice *gicbusdev;
     const char *gictype;
@@ -605,7 +604,7 @@ static void create_gic(VirtMachineState *vms, qemu_irq *pic)
 
     fdt_add_gic_node(vms);
 
-    if (type == 3 && !vmc->no_its) {
+    if (type == 3 && vms->its) {
         create_its(vms, gicdev);
     } else if (type == 2) {
         create_v2m(vms, pic);
@@ -1378,6 +1377,7 @@ static void machvirt_init(MachineState *machine)
         }
 
         object_property_set_bool(cpuobj, true, "realized", NULL);
+        object_unref(cpuobj);
     }
     fdt_add_timer_nodes(vms);
     fdt_add_cpu_nodes(vms);
@@ -1480,6 +1480,20 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp)
     vms->highmem = value;
 }
 
+static bool virt_get_its(Object *obj, Error **errp)
+{
+    VirtMachineState *vms = VIRT_MACHINE(obj);
+
+    return vms->its;
+}
+
+static void virt_set_its(Object *obj, bool value, Error **errp)
+{
+    VirtMachineState *vms = VIRT_MACHINE(obj);
+
+    vms->its = value;
+}
+
 static char *virt_get_gic_version(Object *obj, Error **errp)
 {
     VirtMachineState *vms = VIRT_MACHINE(obj);
@@ -1540,6 +1554,7 @@ type_init(machvirt_machine_init);
 static void virt_2_9_instance_init(Object *obj)
 {
     VirtMachineState *vms = VIRT_MACHINE(obj);
+    VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
 
     /* EL3 is disabled by default on virt: this makes us consistent
      * between KVM and TCG for this board, and it also allows us to
@@ -1579,6 +1594,19 @@ static void virt_2_9_instance_init(Object *obj)
                                     "Set GIC version. "
                                     "Valid values are 2, 3 and host", NULL);
 
+    if (vmc->no_its) {
+        vms->its = false;
+    } else {
+        /* Default allows ITS instantiation */
+        vms->its = true;
+        object_property_add_bool(obj, "its", virt_get_its,
+                                 virt_set_its, NULL);
+        object_property_set_description(obj, "its",
+                                        "Set on/off to enable/disable "
+                                        "ITS instantiation",
+                                        NULL);
+    }
+
     vms->memmap = a15memmap;
     vms->irqmap = a15irqmap;
 }
diff --git a/hw/block/block.c b/hw/block/block.c
index 8dc9d84a39..27878d0087 100644
--- a/hw/block/block.c
+++ b/hw/block/block.c
@@ -51,11 +51,33 @@ void blkconf_blocksizes(BlockConf *conf)
     }
 }
 
-void blkconf_apply_backend_options(BlockConf *conf)
+void blkconf_apply_backend_options(BlockConf *conf, bool readonly,
+                                   bool resizable, Error **errp)
 {
     BlockBackend *blk = conf->blk;
     BlockdevOnError rerror, werror;
+    uint64_t perm, shared_perm;
     bool wce;
+    int ret;
+
+    perm = BLK_PERM_CONSISTENT_READ;
+    if (!readonly) {
+        perm |= BLK_PERM_WRITE;
+    }
+
+    shared_perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+                  BLK_PERM_GRAPH_MOD;
+    if (resizable) {
+        shared_perm |= BLK_PERM_RESIZE;
+    }
+    if (conf->share_rw) {
+        shared_perm |= BLK_PERM_WRITE;
+    }
+
+    ret = blk_set_perm(blk, perm, shared_perm, errp);
+    if (ret < 0) {
+        return;
+    }
 
     switch (conf->wce) {
     case ON_OFF_AUTO_ON:    wce = true; break;
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 17d29e7bc5..a328693d15 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -186,6 +186,7 @@ typedef enum FDiskFlags {
 struct FDrive {
     FDCtrl *fdctrl;
     BlockBackend *blk;
+    BlockConf *conf;
     /* Drive status */
     FloppyDriveType drive;    /* CMOS drive type        */
     uint8_t perpendicular;    /* 2.88 MB access mode    */
@@ -469,9 +470,22 @@ static void fd_revalidate(FDrive *drv)
     }
 }
 
-static void fd_change_cb(void *opaque, bool load)
+static void fd_change_cb(void *opaque, bool load, Error **errp)
 {
     FDrive *drive = opaque;
+    Error *local_err = NULL;
+
+    if (!load) {
+        blk_set_perm(drive->blk, 0, BLK_PERM_ALL, &error_abort);
+    } else {
+        blkconf_apply_backend_options(drive->conf,
+                                      blk_is_read_only(drive->blk), false,
+                                      &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
 
     drive->media_changed = 1;
     drive->media_validated = false;
@@ -508,6 +522,7 @@ static int floppy_drive_init(DeviceState *qdev)
     FloppyDrive *dev = FLOPPY_DRIVE(qdev);
     FloppyBus *bus = FLOPPY_BUS(qdev->parent_bus);
     FDrive *drive;
+    Error *local_err = NULL;
     int ret;
 
     if (dev->unit == -1) {
@@ -533,7 +548,7 @@ static int floppy_drive_init(DeviceState *qdev)
 
     if (!dev->conf.blk) {
         /* Anonymous BlockBackend for an empty drive */
-        dev->conf.blk = blk_new();
+        dev->conf.blk = blk_new(0, BLK_PERM_ALL);
         ret = blk_attach_dev(dev->conf.blk, qdev);
         assert(ret == 0);
     }
@@ -551,7 +566,13 @@ static int floppy_drive_init(DeviceState *qdev)
      * blkconf_apply_backend_options(). */
     dev->conf.rerror = BLOCKDEV_ON_ERROR_AUTO;
     dev->conf.werror = BLOCKDEV_ON_ERROR_AUTO;
-    blkconf_apply_backend_options(&dev->conf);
+
+    blkconf_apply_backend_options(&dev->conf, blk_is_read_only(dev->conf.blk),
+                                  false, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        return -1;
+    }
 
     /* 'enospc' is the default for -drive, 'report' is what blk_new() gives us
      * for empty drives. */
@@ -565,6 +586,7 @@ static int floppy_drive_init(DeviceState *qdev)
         return -1;
     }
 
+    drive->conf = &dev->conf;
     drive->blk = dev->conf.blk;
     drive->fdctrl = bus->fdc;
 
diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index 2d6eb46a04..190573cefa 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -1215,6 +1215,7 @@ static void m25p80_realize(SSISlave *ss, Error **errp)
 {
     Flash *s = M25P80(ss);
     M25P80Class *mc = M25P80_GET_CLASS(s);
+    int ret;
 
     s->pi = mc->pi;
 
@@ -1222,6 +1223,13 @@ static void m25p80_realize(SSISlave *ss, Error **errp)
     s->dirty_page = -1;
 
     if (s->blk) {
+        uint64_t perm = BLK_PERM_CONSISTENT_READ |
+                        (blk_is_read_only(s->blk) ? 0 : BLK_PERM_WRITE);
+        ret = blk_set_perm(s->blk, perm, BLK_PERM_ALL, errp);
+        if (ret < 0) {
+            return;
+        }
+
         DB_PRINT_L(0, "Binding to IF_MTD drive\n");
         s->storage = blk_blockalign(s->blk, s->size);
 
diff --git a/hw/block/nand.c b/hw/block/nand.c
index c69e6755d9..0d33ac281f 100644
--- a/hw/block/nand.c
+++ b/hw/block/nand.c
@@ -373,6 +373,8 @@ static void nand_realize(DeviceState *dev, Error **errp)
 {
     int pagesize;
     NANDFlashState *s = NAND(dev);
+    int ret;
+
 
     s->buswidth = nand_flash_ids[s->chip_id].width >> 3;
     s->size = nand_flash_ids[s->chip_id].size << 20;
@@ -407,6 +409,11 @@ static void nand_realize(DeviceState *dev, Error **errp)
             error_setg(errp, "Can't use a read-only drive");
             return;
         }
+        ret = blk_set_perm(s->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE,
+                           BLK_PERM_ALL, errp);
+        if (ret < 0) {
+            return;
+        }
         if (blk_getlength(s->blk) >=
                 (s->pages << s->page_shift) + (s->pages << s->oob_shift)) {
             pagesize = 0;
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index ae91a18f17..ae303d44e5 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -835,6 +835,7 @@ static int nvme_init(PCIDevice *pci_dev)
     int i;
     int64_t bs_size;
     uint8_t *pci_conf;
+    Error *local_err = NULL;
 
     if (!n->conf.blk) {
         return -1;
@@ -850,7 +851,12 @@ static int nvme_init(PCIDevice *pci_dev)
         return -1;
     }
     blkconf_blocksizes(&n->conf);
-    blkconf_apply_backend_options(&n->conf);
+    blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk),
+                                  false, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        return -1;
+    }
 
     pci_conf = pci_dev->config;
     pci_conf[PCI_INTERRUPT_PIN] = 1;
diff --git a/hw/block/onenand.c b/hw/block/onenand.c
index 8d8422739e..ddf5492426 100644
--- a/hw/block/onenand.c
+++ b/hw/block/onenand.c
@@ -778,6 +778,7 @@ static int onenand_initfn(SysBusDevice *sbd)
     OneNANDState *s = ONE_NAND(dev);
     uint32_t size = 1 << (24 + ((s->id.dev >> 4) & 7));
     void *ram;
+    Error *local_err = NULL;
 
     s->base = (hwaddr)-1;
     s->rdy = NULL;
@@ -796,6 +797,12 @@ static int onenand_initfn(SysBusDevice *sbd)
             error_report("Can't use a read-only drive");
             return -1;
         }
+        blk_set_perm(s->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE,
+                     BLK_PERM_ALL, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return -1;
+        }
         s->blk_cur = s->blk;
     }
     s->otp = memset(g_malloc((64 + 2) << PAGE_SHIFT),
diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c
index 71b98a3eef..594d4cf6fe 100644
--- a/hw/block/pflash_cfi01.c
+++ b/hw/block/pflash_cfi01.c
@@ -758,6 +758,18 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp)
     sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->mem);
 
     if (pfl->blk) {
+        uint64_t perm;
+        pfl->ro = blk_is_read_only(pfl->blk);
+        perm = BLK_PERM_CONSISTENT_READ | (pfl->ro ? 0 : BLK_PERM_WRITE);
+        ret = blk_set_perm(pfl->blk, perm, BLK_PERM_ALL, errp);
+        if (ret < 0) {
+            return;
+        }
+    } else {
+        pfl->ro = 0;
+    }
+
+    if (pfl->blk) {
         /* read the initial flash content */
         ret = blk_pread(pfl->blk, 0, pfl->storage, total_len);
 
@@ -768,12 +780,6 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp)
         }
     }
 
-    if (pfl->blk) {
-        pfl->ro = blk_is_read_only(pfl->blk);
-    } else {
-        pfl->ro = 0;
-    }
-
     /* Default to devices being used at their maximum device width. This was
      * assumed before the device_width support was added.
      */
diff --git a/hw/block/pflash_cfi02.c b/hw/block/pflash_cfi02.c
index ef71322759..e6c5c6c25d 100644
--- a/hw/block/pflash_cfi02.c
+++ b/hw/block/pflash_cfi02.c
@@ -632,6 +632,19 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp)
     vmstate_register_ram(&pfl->orig_mem, DEVICE(pfl));
     pfl->storage = memory_region_get_ram_ptr(&pfl->orig_mem);
     pfl->chip_len = chip_len;
+
+    if (pfl->blk) {
+        uint64_t perm;
+        pfl->ro = blk_is_read_only(pfl->blk);
+        perm = BLK_PERM_CONSISTENT_READ | (pfl->ro ? 0 : BLK_PERM_WRITE);
+        ret = blk_set_perm(pfl->blk, perm, BLK_PERM_ALL, errp);
+        if (ret < 0) {
+            return;
+        }
+    } else {
+        pfl->ro = 0;
+    }
+
     if (pfl->blk) {
         /* read the initial flash content */
         ret = blk_pread(pfl->blk, 0, pfl->storage, chip_len);
@@ -646,12 +659,6 @@ static void pflash_cfi02_realize(DeviceState *dev, Error **errp)
     pfl->rom_mode = 1;
     sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->mem);
 
-    if (pfl->blk) {
-        pfl->ro = blk_is_read_only(pfl->blk);
-    } else {
-        pfl->ro = 0;
-    }
-
     pfl->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, pflash_timer, pfl);
     pfl->wcycle = 0;
     pfl->cmd = 0;
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 843bd2fa73..98c16a7a9a 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -928,7 +928,13 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
     }
 
     blkconf_serial(&conf->conf, &conf->serial);
-    blkconf_apply_backend_options(&conf->conf);
+    blkconf_apply_backend_options(&conf->conf,
+                                  blk_is_read_only(conf->conf.blk), true,
+                                  &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
     s->original_wce = blk_enable_write_cache(conf->conf.blk);
     blkconf_geometry(&conf->conf, NULL, 65535, 255, 255, &err);
     if (err) {
diff --git a/hw/core/Makefile.objs b/hw/core/Makefile.objs
index 7f8c9dc659..91450b2eab 100644
--- a/hw/core/Makefile.objs
+++ b/hw/core/Makefile.objs
@@ -13,6 +13,7 @@ common-obj-$(CONFIG_PTIMER) += ptimer.o
 common-obj-$(CONFIG_SOFTMMU) += sysbus.o
 common-obj-$(CONFIG_SOFTMMU) += machine.o
 common-obj-$(CONFIG_SOFTMMU) += loader.o
+common-obj-$(CONFIG_FITLOADER) += loader-fit.o
 common-obj-$(CONFIG_SOFTMMU) += qdev-properties-system.o
 common-obj-$(CONFIG_SOFTMMU) += register.o
 common-obj-$(CONFIG_SOFTMMU) += or-irq.o
diff --git a/hw/core/bus.c b/hw/core/bus.c
index cf383fc1af..4651f24486 100644
--- a/hw/core/bus.c
+++ b/hw/core/bus.c
@@ -197,7 +197,7 @@ static void qbus_initfn(Object *obj)
                              TYPE_HOTPLUG_HANDLER,
                              (Object **)&bus->hotplug_handler,
                              object_property_allow_set_link,
-                             OBJ_PROP_LINK_UNREF_ON_RELEASE,
+                             0,
                              NULL);
     object_property_add_bool(obj, "realized",
                              bus_get_realized, bus_set_realized, NULL);
diff --git a/hw/core/irq.c b/hw/core/irq.c
index 49ff2e64fe..b98d1d69f5 100644
--- a/hw/core/irq.c
+++ b/hw/core/irq.c
@@ -22,6 +22,7 @@
  * THE SOFTWARE.
  */
 #include "qemu/osdep.h"
+#include "qemu/main-loop.h"
 #include "qemu-common.h"
 #include "hw/irq.h"
 #include "qom/object.h"
diff --git a/hw/core/loader-fit.c b/hw/core/loader-fit.c
new file mode 100644
index 0000000000..0c4a7207f4
--- /dev/null
+++ b/hw/core/loader-fit.c
@@ -0,0 +1,325 @@
+/*
+ * Flattened Image Tree loader.
+ *
+ * Copyright (c) 2016 Imagination Technologies
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "exec/address-spaces.h"
+#include "exec/memory.h"
+#include "hw/loader.h"
+#include "hw/loader-fit.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
+#include "sysemu/device_tree.h"
+#include "sysemu/sysemu.h"
+
+#include <libfdt.h>
+#include <zlib.h>
+
+#define FIT_LOADER_MAX_PATH (128)
+
+static const void *fit_load_image_alloc(const void *itb, const char *name,
+                                        int *poff, size_t *psz)
+{
+    const void *data;
+    const char *comp;
+    void *uncomp_data;
+    char path[FIT_LOADER_MAX_PATH];
+    int off, sz;
+    ssize_t uncomp_len;
+
+    snprintf(path, sizeof(path), "/images/%s", name);
+
+    off = fdt_path_offset(itb, path);
+    if (off < 0) {
+        return NULL;
+    }
+    if (poff) {
+        *poff = off;
+    }
+
+    data = fdt_getprop(itb, off, "data", &sz);
+    if (!data) {
+        return NULL;
+    }
+
+    comp = fdt_getprop(itb, off, "compression", NULL);
+    if (!comp || !strcmp(comp, "none")) {
+        if (psz) {
+            *psz = sz;
+        }
+        uncomp_data = g_malloc(sz);
+        memmove(uncomp_data, data, sz);
+        return uncomp_data;
+    }
+
+    if (!strcmp(comp, "gzip")) {
+        uncomp_len = UBOOT_MAX_GUNZIP_BYTES;
+        uncomp_data = g_malloc(uncomp_len);
+
+        uncomp_len = gunzip(uncomp_data, uncomp_len, (void *) data, sz);
+        if (uncomp_len < 0) {
+            error_printf("unable to decompress %s image\n", name);
+            g_free(uncomp_data);
+            return NULL;
+        }
+
+        data = g_realloc(uncomp_data, uncomp_len);
+        if (psz) {
+            *psz = uncomp_len;
+        }
+        return data;
+    }
+
+    error_printf("unknown compression '%s'\n", comp);
+    return NULL;
+}
+
+static int fit_image_addr(const void *itb, int img, const char *name,
+                          hwaddr *addr)
+{
+    const void *prop;
+    int len;
+
+    prop = fdt_getprop(itb, img, name, &len);
+    if (!prop) {
+        return -ENOENT;
+    }
+
+    switch (len) {
+    case 4:
+        *addr = fdt32_to_cpu(*(fdt32_t *)prop);
+        return 0;
+    case 8:
+        *addr = fdt64_to_cpu(*(fdt64_t *)prop);
+        return 0;
+    default:
+        error_printf("invalid %s address length %d\n", name, len);
+        return -EINVAL;
+    }
+}
+
+static int fit_load_kernel(const struct fit_loader *ldr, const void *itb,
+                           int cfg, void *opaque, hwaddr *pend)
+{
+    const char *name;
+    const void *data;
+    const void *load_data;
+    hwaddr load_addr, entry_addr;
+    int img_off, err;
+    size_t sz;
+    int ret;
+
+    name = fdt_getprop(itb, cfg, "kernel", NULL);
+    if (!name) {
+        error_printf("no kernel specified by FIT configuration\n");
+        return -EINVAL;
+    }
+
+    load_data = data = fit_load_image_alloc(itb, name, &img_off, &sz);
+    if (!data) {
+        error_printf("unable to load kernel image from FIT\n");
+        return -EINVAL;
+    }
+
+    err = fit_image_addr(itb, img_off, "load", &load_addr);
+    if (err) {
+        error_printf("unable to read kernel load address from FIT\n");
+        ret = err;
+        goto out;
+    }
+
+    err = fit_image_addr(itb, img_off, "entry", &entry_addr);
+    if (err) {
+        error_printf("unable to read kernel entry address from FIT\n");
+        ret = err;
+        goto out;
+    }
+
+    if (ldr->kernel_filter) {
+        load_data = ldr->kernel_filter(opaque, data, &load_addr, &entry_addr);
+    }
+
+    if (pend) {
+        *pend = load_addr + sz;
+    }
+
+    load_addr = ldr->addr_to_phys(opaque, load_addr);
+    rom_add_blob_fixed(name, load_data, sz, load_addr);
+
+    ret = 0;
+out:
+    g_free((void *) data);
+    if (data != load_data) {
+        g_free((void *) load_data);
+    }
+    return ret;
+}
+
+static int fit_load_fdt(const struct fit_loader *ldr, const void *itb,
+                        int cfg, void *opaque, const void *match_data,
+                        hwaddr kernel_end)
+{
+    const char *name;
+    const void *data;
+    const void *load_data;
+    hwaddr load_addr;
+    int img_off, err;
+    size_t sz;
+    int ret;
+
+    name = fdt_getprop(itb, cfg, "fdt", NULL);
+    if (!name) {
+        return 0;
+    }
+
+    load_data = data = fit_load_image_alloc(itb, name, &img_off, &sz);
+    if (!data) {
+        error_printf("unable to load FDT image from FIT\n");
+        return -EINVAL;
+    }
+
+    err = fit_image_addr(itb, img_off, "load", &load_addr);
+    if (err == -ENOENT) {
+        load_addr = ROUND_UP(kernel_end, 64 * K_BYTE) + (10 * M_BYTE);
+    } else if (err) {
+        ret = err;
+        goto out;
+    }
+
+    if (ldr->fdt_filter) {
+        load_data = ldr->fdt_filter(opaque, data, match_data, &load_addr);
+    }
+
+    load_addr = ldr->addr_to_phys(opaque, load_addr);
+    sz = fdt_totalsize(load_data);
+    rom_add_blob_fixed(name, load_data, sz, load_addr);
+
+    ret = 0;
+out:
+    g_free((void *) data);
+    if (data != load_data) {
+        g_free((void *) load_data);
+    }
+    return ret;
+}
+
+static bool fit_cfg_compatible(const void *itb, int cfg, const char *compat)
+{
+    const void *fdt;
+    const char *fdt_name;
+    bool ret;
+
+    fdt_name = fdt_getprop(itb, cfg, "fdt", NULL);
+    if (!fdt_name) {
+        return false;
+    }
+
+    fdt = fit_load_image_alloc(itb, fdt_name, NULL, NULL);
+    if (!fdt) {
+        return false;
+    }
+
+    if (fdt_check_header(fdt)) {
+        ret = false;
+        goto out;
+    }
+
+    if (fdt_node_check_compatible(fdt, 0, compat)) {
+        ret = false;
+        goto out;
+    }
+
+    ret = true;
+out:
+    g_free((void *) fdt);
+    return ret;
+}
+
+int load_fit(const struct fit_loader *ldr, const char *filename, void *opaque)
+{
+    const struct fit_loader_match *match;
+    const void *itb, *match_data = NULL;
+    const char *def_cfg_name;
+    char path[FIT_LOADER_MAX_PATH];
+    int itb_size, configs, cfg_off, off, err;
+    hwaddr kernel_end;
+    int ret;
+
+    itb = load_device_tree(filename, &itb_size);
+    if (!itb) {
+        return -EINVAL;
+    }
+
+    configs = fdt_path_offset(itb, "/configurations");
+    if (configs < 0) {
+        ret = configs;
+        goto out;
+    }
+
+    cfg_off = -FDT_ERR_NOTFOUND;
+
+    if (ldr->matches) {
+        for (match = ldr->matches; match->compatible; match++) {
+            off = fdt_first_subnode(itb, configs);
+            while (off >= 0) {
+                if (fit_cfg_compatible(itb, off, match->compatible)) {
+                    cfg_off = off;
+                    match_data = match->data;
+                    break;
+                }
+
+                off = fdt_next_subnode(itb, off);
+            }
+
+            if (cfg_off >= 0) {
+                break;
+            }
+        }
+    }
+
+    if (cfg_off < 0) {
+        def_cfg_name = fdt_getprop(itb, configs, "default", NULL);
+        if (def_cfg_name) {
+            snprintf(path, sizeof(path), "/configurations/%s", def_cfg_name);
+            cfg_off = fdt_path_offset(itb, path);
+        }
+    }
+
+    if (cfg_off < 0) {
+        /* couldn't find a configuration to use */
+        ret = cfg_off;
+        goto out;
+    }
+
+    err = fit_load_kernel(ldr, itb, cfg_off, opaque, &kernel_end);
+    if (err) {
+        ret = err;
+        goto out;
+    }
+
+    err = fit_load_fdt(ldr, itb, cfg_off, opaque, match_data, kernel_end);
+    if (err) {
+        ret = err;
+        goto out;
+    }
+
+    ret = 0;
+out:
+    g_free((void *) itb);
+    return ret;
+}
diff --git a/hw/core/loader.c b/hw/core/loader.c
index ee5abd6eb7..bf17b42cbe 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -435,6 +435,19 @@ int load_elf_as(const char *filename,
                 uint64_t *highaddr, int big_endian, int elf_machine,
                 int clear_lsb, int data_swab, AddressSpace *as)
 {
+    return load_elf_ram(filename, translate_fn, translate_opaque,
+                        pentry, lowaddr, highaddr, big_endian, elf_machine,
+                        clear_lsb, data_swab, as, true);
+}
+
+/* return < 0 if error, otherwise the number of bytes loaded in memory */
+int load_elf_ram(const char *filename,
+                 uint64_t (*translate_fn)(void *, uint64_t),
+                 void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
+                 uint64_t *highaddr, int big_endian, int elf_machine,
+                 int clear_lsb, int data_swab, AddressSpace *as,
+                 bool load_rom)
+{
     int fd, data_order, target_data_order, must_swab, ret = ELF_LOAD_FAILED;
     uint8_t e_ident[EI_NIDENT];
 
@@ -473,11 +486,11 @@ int load_elf_as(const char *filename,
     if (e_ident[EI_CLASS] == ELFCLASS64) {
         ret = load_elf64(filename, fd, translate_fn, translate_opaque, must_swab,
                          pentry, lowaddr, highaddr, elf_machine, clear_lsb,
-                         data_swab, as);
+                         data_swab, as, load_rom);
     } else {
         ret = load_elf32(filename, fd, translate_fn, translate_opaque, must_swab,
                          pentry, lowaddr, highaddr, elf_machine, clear_lsb,
-                         data_swab, as);
+                         data_swab, as, load_rom);
     }
 
  fail:
@@ -527,12 +540,7 @@ static void zfree(void *x, void *addr)
 
 #define DEFLATED	8
 
-/* This is the usual maximum in uboot, so if a uImage overflows this, it would
- * overflow on real hardware too. */
-#define UBOOT_MAX_GUNZIP_BYTES (64 << 20)
-
-static ssize_t gunzip(void *dst, size_t dstlen, uint8_t *src,
-                      size_t srclen)
+ssize_t gunzip(void *dst, size_t dstlen, uint8_t *src, size_t srclen)
 {
     z_stream s;
     ssize_t dstbytes;
diff --git a/hw/core/machine.c b/hw/core/machine.c
index b0fd91f6cd..0699750336 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -357,6 +357,37 @@ static void machine_init_notify(Notifier *notifier, void *data)
     foreach_dynamic_sysbus_device(error_on_sysbus_device, NULL);
 }
 
+HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine)
+{
+    int i;
+    Object *cpu;
+    HotpluggableCPUList *head = NULL;
+    const char *cpu_type;
+
+    cpu = machine->possible_cpus->cpus[0].cpu;
+    assert(cpu); /* Boot cpu is always present */
+    cpu_type = object_get_typename(cpu);
+    for (i = 0; i < machine->possible_cpus->len; i++) {
+        HotpluggableCPUList *list_item = g_new0(typeof(*list_item), 1);
+        HotpluggableCPU *cpu_item = g_new0(typeof(*cpu_item), 1);
+
+        cpu_item->type = g_strdup(cpu_type);
+        cpu_item->vcpus_count = machine->possible_cpus->cpus[i].vcpus_count;
+        cpu_item->props = g_memdup(&machine->possible_cpus->cpus[i].props,
+                                   sizeof(*cpu_item->props));
+
+        cpu = machine->possible_cpus->cpus[i].cpu;
+        if (cpu) {
+            cpu_item->has_qom_path = true;
+            cpu_item->qom_path = object_get_canonical_path(cpu);
+        }
+        list_item->value = cpu_item;
+        list_item->next = head;
+        head = list_item;
+    }
+    return head;
+}
+
 static void machine_class_init(ObjectClass *oc, void *data)
 {
     MachineClass *mc = MACHINE_CLASS(oc);
diff --git a/hw/core/or-irq.c b/hw/core/or-irq.c
index 1ac090d1a4..1485d5b285 100644
--- a/hw/core/or-irq.c
+++ b/hw/core/or-irq.c
@@ -89,6 +89,9 @@ static void or_irq_class_init(ObjectClass *klass, void *data)
     dc->props = or_irq_properties;
     dc->realize = or_irq_realize;
     dc->vmsd = &vmstate_or_irq;
+
+    /* Reason: Needs to be wired up to work, e.g. see stm32f205_soc.c */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo or_irq_type_info = {
diff --git a/hw/core/ptimer.c b/hw/core/ptimer.c
index 3af82afe78..59ccb00550 100644
--- a/hw/core/ptimer.c
+++ b/hw/core/ptimer.c
@@ -12,6 +12,7 @@
 #include "qemu/host-utils.h"
 #include "sysemu/replay.h"
 #include "sysemu/qtest.h"
+#include "block/aio.h"
 
 #define DELTA_ADJUST     1
 #define DELTA_NO_ADJUST -1
@@ -353,3 +354,10 @@ ptimer_state *ptimer_init(QEMUBH *bh, uint8_t policy_mask)
     s->policy_mask = policy_mask;
     return s;
 }
+
+void ptimer_free(ptimer_state *s)
+{
+    qemu_bh_delete(s->bh);
+    timer_free(s->timer);
+    g_free(s);
+}
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index 94f4d8bde4..c34be1c1ba 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -73,14 +73,19 @@ static void parse_drive(DeviceState *dev, const char *str, void **ptr,
 {
     BlockBackend *blk;
     bool blk_created = false;
+    int ret;
 
     blk = blk_by_name(str);
     if (!blk) {
         BlockDriverState *bs = bdrv_lookup_bs(NULL, str, NULL);
         if (bs) {
-            blk = blk_new();
-            blk_insert_bs(blk, bs);
+            blk = blk_new(0, BLK_PERM_ALL);
             blk_created = true;
+
+            ret = blk_insert_bs(blk, bs, errp);
+            if (ret < 0) {
+                goto fail;
+            }
         }
     }
     if (!blk) {
diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index 06ba02e2a3..1e7fb33246 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -37,6 +37,7 @@
 #include "hw/boards.h"
 #include "hw/sysbus.h"
 #include "qapi-event.h"
+#include "migration/migration.h"
 
 int qdev_hotplug = 0;
 static bool qdev_hot_added = false;
@@ -102,9 +103,23 @@ static void bus_add_child(BusState *bus, DeviceState *child)
 
 void qdev_set_parent_bus(DeviceState *dev, BusState *bus)
 {
+    bool replugging = dev->parent_bus != NULL;
+
+    if (replugging) {
+        /* Keep a reference to the device while it's not plugged into
+         * any bus, to avoid it potentially evaporating when it is
+         * dereffed in bus_remove_child().
+         */
+        object_ref(OBJECT(dev));
+        bus_remove_child(dev->parent_bus, dev);
+        object_unref(OBJECT(dev->parent_bus));
+    }
     dev->parent_bus = bus;
     object_ref(OBJECT(bus));
     bus_add_child(bus, dev);
+    if (replugging) {
+        object_unref(OBJECT(dev));
+    }
 }
 
 /* Create a new device.  This only initializes the device state
@@ -889,6 +904,7 @@ static void device_set_realized(Object *obj, bool value, Error **errp)
     Error *local_err = NULL;
     bool unattached_parent = false;
     static int unattached_count;
+    int ret;
 
     if (dev->hotplugged && !dc->hotpluggable) {
         error_setg(errp, QERR_DEVICE_NO_HOTPLUG, object_get_typename(obj));
@@ -896,6 +912,11 @@ static void device_set_realized(Object *obj, bool value, Error **errp)
     }
 
     if (value && !dev->realized) {
+        ret = check_migratable(obj, &local_err);
+        if (ret < 0) {
+            goto fail;
+        }
+
         if (!obj->parent) {
             gchar *name = g_strdup_printf("device[%d]", unattached_count++);
 
diff --git a/hw/core/register.c b/hw/core/register.c
index 4bfbc508de..dc335a79a9 100644
--- a/hw/core/register.c
+++ b/hw/core/register.c
@@ -59,6 +59,15 @@ static inline uint64_t register_read_val(RegisterInfo *reg)
     return 0; /* unreachable */
 }
 
+static inline uint64_t register_enabled_mask(int data_size, unsigned size)
+{
+    if (data_size < size) {
+        size = data_size;
+    }
+
+    return MAKE_64BIT_MASK(0, size * 8);
+}
+
 void register_write(RegisterInfo *reg, uint64_t val, uint64_t we,
                     const char *prefix, bool debug)
 {
@@ -192,11 +201,7 @@ void register_write_memory(void *opaque, hwaddr addr,
     }
 
     /* Generate appropriate write enable mask */
-    if (reg->data_size < size) {
-        we = MAKE_64BIT_MASK(0, reg->data_size * 8);
-    } else {
-        we = MAKE_64BIT_MASK(0, size * 8);
-    }
+    we = register_enabled_mask(reg->data_size, size);
 
     register_write(reg, value, we, reg_array->prefix,
                    reg_array->debug);
@@ -208,6 +213,7 @@ uint64_t register_read_memory(void *opaque, hwaddr addr,
     RegisterInfoArray *reg_array = opaque;
     RegisterInfo *reg = NULL;
     uint64_t read_val;
+    uint64_t re;
     int i;
 
     for (i = 0; i < reg_array->num_elements; i++) {
@@ -223,7 +229,10 @@ uint64_t register_read_memory(void *opaque, hwaddr addr,
         return 0;
     }
 
-    read_val = register_read(reg, size * 8, reg_array->prefix,
+    /* Generate appropriate read enable mask */
+    re = register_enabled_mask(reg->data_size, size);
+
+    read_val = register_read(reg, re, reg_array->prefix,
                              reg_array->debug);
 
     return extract64(read_val, 0, size * 8);
@@ -274,9 +283,18 @@ void register_finalize_block(RegisterInfoArray *r_array)
     g_free(r_array);
 }
 
+static void register_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+
+    /* Reason: needs to be wired up to work */
+    dc->cannot_instantiate_with_device_add_yet = true;
+}
+
 static const TypeInfo register_info = {
     .name  = TYPE_REGISTER,
     .parent = TYPE_DEVICE,
+    .class_init = register_class_init,
 };
 
 static void register_register_types(void)
diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c
index 1deb52070a..b9e7cb1df1 100644
--- a/hw/display/cirrus_vga.c
+++ b/hw/display/cirrus_vga.c
@@ -900,6 +900,10 @@ static int cirrus_bitblt_cputovideo(CirrusVGAState * s)
 {
     int w;
 
+    if (blit_is_unsafe(s, true)) {
+        return 0;
+    }
+
     s->cirrus_blt_mode &= ~CIRRUS_BLTMODE_MEMSYSSRC;
     s->cirrus_srcptr = &s->cirrus_bltbuf[0];
     s->cirrus_srcptr_end = &s->cirrus_bltbuf[0];
@@ -925,6 +929,10 @@ static int cirrus_bitblt_cputovideo(CirrusVGAState * s)
 	}
         s->cirrus_srccounter = s->cirrus_blt_srcpitch * s->cirrus_blt_height;
     }
+
+    /* the blit_is_unsafe call above should catch this */
+    assert(s->cirrus_blt_srcpitch <= CIRRUS_BLTBUFSIZE);
+
     s->cirrus_srcptr = s->cirrus_bltbuf;
     s->cirrus_srcptr_end = s->cirrus_bltbuf + s->cirrus_blt_srcpitch;
     cirrus_update_memory_access(s);
diff --git a/hw/display/milkymist-tmu2.c b/hw/display/milkymist-tmu2.c
index 7528665510..59120ddb67 100644
--- a/hw/display/milkymist-tmu2.c
+++ b/hw/display/milkymist-tmu2.c
@@ -293,7 +293,7 @@ static void tmu2_start(MilkymistTMU2State *s)
     cpu_physical_memory_unmap(mesh, mesh_len, 0, mesh_len);
 
     /* Write back the OpenGL framebuffer to the QEMU framebuffer */
-    fb_len = 2 * s->regs[R_DSTHRES] * s->regs[R_DSTVRES];
+    fb_len = 2ULL * s->regs[R_DSTHRES] * s->regs[R_DSTVRES];
     fb = cpu_physical_memory_map(s->regs[R_DSTFBUF], &fb_len, 1);
     if (fb == NULL) {
         glDeleteTextures(1, &texture);
diff --git a/hw/display/virtio-gpu-3d.c b/hw/display/virtio-gpu-3d.c
index ecb09d17a1..f49b7fe8cd 100644
--- a/hw/display/virtio-gpu-3d.c
+++ b/hw/display/virtio-gpu-3d.c
@@ -177,16 +177,15 @@ static void virgl_cmd_set_scanout(VirtIOGPU *g,
         qemu_console_resize(g->scanout[ss.scanout_id].con,
                             ss.r.width, ss.r.height);
         virgl_renderer_force_ctx_0();
-        dpy_gl_scanout(g->scanout[ss.scanout_id].con, info.tex_id,
-                       info.flags & 1 /* FIXME: Y_0_TOP */,
-                       info.width, info.height,
-                       ss.r.x, ss.r.y, ss.r.width, ss.r.height);
+        dpy_gl_scanout_texture(g->scanout[ss.scanout_id].con, info.tex_id,
+                               info.flags & 1 /* FIXME: Y_0_TOP */,
+                               info.width, info.height,
+                               ss.r.x, ss.r.y, ss.r.width, ss.r.height);
     } else {
         if (ss.scanout_id != 0) {
             dpy_gfx_replace_surface(g->scanout[ss.scanout_id].con, NULL);
         }
-        dpy_gl_scanout(g->scanout[ss.scanout_id].con, 0, false,
-                       0, 0, 0, 0, 0, 0);
+        dpy_gl_scanout_disable(g->scanout[ss.scanout_id].con);
     }
     g->scanout[ss.scanout_id].resource_id = ss.resource_id;
 }
@@ -597,7 +596,7 @@ void virtio_gpu_virgl_reset(VirtIOGPU *g)
         if (i != 0) {
             dpy_gfx_replace_surface(g->scanout[i].con, NULL);
         }
-        dpy_gl_scanout(g->scanout[i].con, 0, false, 0, 0, 0, 0, 0, 0);
+        dpy_gl_scanout_disable(g->scanout[i].con);
     }
 }
 
diff --git a/hw/gpio/Makefile.objs b/hw/gpio/Makefile.objs
index a43c7cf442..fa0a72e6d0 100644
--- a/hw/gpio/Makefile.objs
+++ b/hw/gpio/Makefile.objs
@@ -7,3 +7,4 @@ common-obj-$(CONFIG_GPIO_KEY) += gpio_key.o
 
 obj-$(CONFIG_OMAP) += omap_gpio.o
 obj-$(CONFIG_IMX) += imx_gpio.o
+obj-$(CONFIG_RASPI) += bcm2835_gpio.o
diff --git a/hw/gpio/bcm2835_gpio.c b/hw/gpio/bcm2835_gpio.c
new file mode 100644
index 0000000000..acc2e3cf9e
--- /dev/null
+++ b/hw/gpio/bcm2835_gpio.c
@@ -0,0 +1,353 @@
+/*
+ * Raspberry Pi (BCM2835) GPIO Controller
+ *
+ * Copyright (c) 2017 Antfield SAS
+ *
+ * Authors:
+ *  Clement Deschamps <clement.deschamps@antfield.fr>
+ *  Luc Michel <luc.michel@antfield.fr>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/timer.h"
+#include "qapi/error.h"
+#include "hw/sysbus.h"
+#include "hw/sd/sd.h"
+#include "hw/gpio/bcm2835_gpio.h"
+
+#define GPFSEL0   0x00
+#define GPFSEL1   0x04
+#define GPFSEL2   0x08
+#define GPFSEL3   0x0C
+#define GPFSEL4   0x10
+#define GPFSEL5   0x14
+#define GPSET0    0x1C
+#define GPSET1    0x20
+#define GPCLR0    0x28
+#define GPCLR1    0x2C
+#define GPLEV0    0x34
+#define GPLEV1    0x38
+#define GPEDS0    0x40
+#define GPEDS1    0x44
+#define GPREN0    0x4C
+#define GPREN1    0x50
+#define GPFEN0    0x58
+#define GPFEN1    0x5C
+#define GPHEN0    0x64
+#define GPHEN1    0x68
+#define GPLEN0    0x70
+#define GPLEN1    0x74
+#define GPAREN0   0x7C
+#define GPAREN1   0x80
+#define GPAFEN0   0x88
+#define GPAFEN1   0x8C
+#define GPPUD     0x94
+#define GPPUDCLK0 0x98
+#define GPPUDCLK1 0x9C
+
+static uint32_t gpfsel_get(BCM2835GpioState *s, uint8_t reg)
+{
+    int i;
+    uint32_t value = 0;
+    for (i = 0; i < 10; i++) {
+        uint32_t index = 10 * reg + i;
+        if (index < sizeof(s->fsel)) {
+            value |= (s->fsel[index] & 0x7) << (3 * i);
+        }
+    }
+    return value;
+}
+
+static void gpfsel_set(BCM2835GpioState *s, uint8_t reg, uint32_t value)
+{
+    int i;
+    for (i = 0; i < 10; i++) {
+        uint32_t index = 10 * reg + i;
+        if (index < sizeof(s->fsel)) {
+            int fsel = (value >> (3 * i)) & 0x7;
+            s->fsel[index] = fsel;
+        }
+    }
+
+    /* SD controller selection (48-53) */
+    if (s->sd_fsel != 0
+            && (s->fsel[48] == 0) /* SD_CLK_R */
+            && (s->fsel[49] == 0) /* SD_CMD_R */
+            && (s->fsel[50] == 0) /* SD_DATA0_R */
+            && (s->fsel[51] == 0) /* SD_DATA1_R */
+            && (s->fsel[52] == 0) /* SD_DATA2_R */
+            && (s->fsel[53] == 0) /* SD_DATA3_R */
+            ) {
+        /* SDHCI controller selected */
+        sdbus_reparent_card(s->sdbus_sdhost, s->sdbus_sdhci);
+        s->sd_fsel = 0;
+    } else if (s->sd_fsel != 4
+            && (s->fsel[48] == 4) /* SD_CLK_R */
+            && (s->fsel[49] == 4) /* SD_CMD_R */
+            && (s->fsel[50] == 4) /* SD_DATA0_R */
+            && (s->fsel[51] == 4) /* SD_DATA1_R */
+            && (s->fsel[52] == 4) /* SD_DATA2_R */
+            && (s->fsel[53] == 4) /* SD_DATA3_R */
+            ) {
+        /* SDHost controller selected */
+        sdbus_reparent_card(s->sdbus_sdhci, s->sdbus_sdhost);
+        s->sd_fsel = 4;
+    }
+}
+
+static int gpfsel_is_out(BCM2835GpioState *s, int index)
+{
+    if (index >= 0 && index < 54) {
+        return s->fsel[index] == 1;
+    }
+    return 0;
+}
+
+static void gpset(BCM2835GpioState *s,
+        uint32_t val, uint8_t start, uint8_t count, uint32_t *lev)
+{
+    uint32_t changes = val & ~*lev;
+    uint32_t cur = 1;
+
+    int i;
+    for (i = 0; i < count; i++) {
+        if ((changes & cur) && (gpfsel_is_out(s, start + i))) {
+            qemu_set_irq(s->out[start + i], 1);
+        }
+        cur <<= 1;
+    }
+
+    *lev |= val;
+}
+
+static void gpclr(BCM2835GpioState *s,
+        uint32_t val, uint8_t start, uint8_t count, uint32_t *lev)
+{
+    uint32_t changes = val & *lev;
+    uint32_t cur = 1;
+
+    int i;
+    for (i = 0; i < count; i++) {
+        if ((changes & cur) && (gpfsel_is_out(s, start + i))) {
+            qemu_set_irq(s->out[start + i], 0);
+        }
+        cur <<= 1;
+    }
+
+    *lev &= ~val;
+}
+
+static uint64_t bcm2835_gpio_read(void *opaque, hwaddr offset,
+        unsigned size)
+{
+    BCM2835GpioState *s = (BCM2835GpioState *)opaque;
+
+    switch (offset) {
+    case GPFSEL0:
+    case GPFSEL1:
+    case GPFSEL2:
+    case GPFSEL3:
+    case GPFSEL4:
+    case GPFSEL5:
+        return gpfsel_get(s, offset / 4);
+    case GPSET0:
+    case GPSET1:
+        /* Write Only */
+        return 0;
+    case GPCLR0:
+    case GPCLR1:
+        /* Write Only */
+        return 0;
+    case GPLEV0:
+        return s->lev0;
+    case GPLEV1:
+        return s->lev1;
+    case GPEDS0:
+    case GPEDS1:
+    case GPREN0:
+    case GPREN1:
+    case GPFEN0:
+    case GPFEN1:
+    case GPHEN0:
+    case GPHEN1:
+    case GPLEN0:
+    case GPLEN1:
+    case GPAREN0:
+    case GPAREN1:
+    case GPAFEN0:
+    case GPAFEN1:
+    case GPPUD:
+    case GPPUDCLK0:
+    case GPPUDCLK1:
+        /* Not implemented */
+        return 0;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n",
+                __func__, offset);
+        break;
+    }
+
+    return 0;
+}
+
+static void bcm2835_gpio_write(void *opaque, hwaddr offset,
+        uint64_t value, unsigned size)
+{
+    BCM2835GpioState *s = (BCM2835GpioState *)opaque;
+
+    switch (offset) {
+    case GPFSEL0:
+    case GPFSEL1:
+    case GPFSEL2:
+    case GPFSEL3:
+    case GPFSEL4:
+    case GPFSEL5:
+        gpfsel_set(s, offset / 4, value);
+        break;
+    case GPSET0:
+        gpset(s, value, 0, 32, &s->lev0);
+        break;
+    case GPSET1:
+        gpset(s, value, 32, 22, &s->lev1);
+        break;
+    case GPCLR0:
+        gpclr(s, value, 0, 32, &s->lev0);
+        break;
+    case GPCLR1:
+        gpclr(s, value, 32, 22, &s->lev1);
+        break;
+    case GPLEV0:
+    case GPLEV1:
+        /* Read Only */
+        break;
+    case GPEDS0:
+    case GPEDS1:
+    case GPREN0:
+    case GPREN1:
+    case GPFEN0:
+    case GPFEN1:
+    case GPHEN0:
+    case GPHEN1:
+    case GPLEN0:
+    case GPLEN1:
+    case GPAREN0:
+    case GPAREN1:
+    case GPAFEN0:
+    case GPAFEN1:
+    case GPPUD:
+    case GPPUDCLK0:
+    case GPPUDCLK1:
+        /* Not implemented */
+        break;
+    default:
+        goto err_out;
+    }
+    return;
+
+err_out:
+    qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n",
+            __func__, offset);
+}
+
+static void bcm2835_gpio_reset(DeviceState *dev)
+{
+    BCM2835GpioState *s = BCM2835_GPIO(dev);
+
+    int i;
+    for (i = 0; i < 6; i++) {
+        gpfsel_set(s, i, 0);
+    }
+
+    s->sd_fsel = 0;
+
+    /* SDHCI is selected by default */
+    sdbus_reparent_card(&s->sdbus, s->sdbus_sdhci);
+
+    s->lev0 = 0;
+    s->lev1 = 0;
+}
+
+static const MemoryRegionOps bcm2835_gpio_ops = {
+    .read = bcm2835_gpio_read,
+    .write = bcm2835_gpio_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static const VMStateDescription vmstate_bcm2835_gpio = {
+    .name = "bcm2835_gpio",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8_ARRAY(fsel, BCM2835GpioState, 54),
+        VMSTATE_UINT32(lev0, BCM2835GpioState),
+        VMSTATE_UINT32(lev1, BCM2835GpioState),
+        VMSTATE_UINT8(sd_fsel, BCM2835GpioState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void bcm2835_gpio_init(Object *obj)
+{
+    BCM2835GpioState *s = BCM2835_GPIO(obj);
+    DeviceState *dev = DEVICE(obj);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+
+    qbus_create_inplace(&s->sdbus, sizeof(s->sdbus),
+                        TYPE_SD_BUS, DEVICE(s), "sd-bus");
+
+    memory_region_init_io(&s->iomem, obj,
+            &bcm2835_gpio_ops, s, "bcm2835_gpio", 0x1000);
+    sysbus_init_mmio(sbd, &s->iomem);
+    qdev_init_gpio_out(dev, s->out, 54);
+}
+
+static void bcm2835_gpio_realize(DeviceState *dev, Error **errp)
+{
+    BCM2835GpioState *s = BCM2835_GPIO(dev);
+    Object *obj;
+    Error *err = NULL;
+
+    obj = object_property_get_link(OBJECT(dev), "sdbus-sdhci", &err);
+    if (obj == NULL) {
+        error_setg(errp, "%s: required sdhci link not found: %s",
+                __func__, error_get_pretty(err));
+        return;
+    }
+    s->sdbus_sdhci = SD_BUS(obj);
+
+    obj = object_property_get_link(OBJECT(dev), "sdbus-sdhost", &err);
+    if (obj == NULL) {
+        error_setg(errp, "%s: required sdhost link not found: %s",
+                __func__, error_get_pretty(err));
+        return;
+    }
+    s->sdbus_sdhost = SD_BUS(obj);
+}
+
+static void bcm2835_gpio_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->vmsd = &vmstate_bcm2835_gpio;
+    dc->realize = &bcm2835_gpio_realize;
+    dc->reset = &bcm2835_gpio_reset;
+}
+
+static const TypeInfo bcm2835_gpio_info = {
+    .name          = TYPE_BCM2835_GPIO,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(BCM2835GpioState),
+    .instance_init = bcm2835_gpio_init,
+    .class_init    = bcm2835_gpio_class_init,
+};
+
+static void bcm2835_gpio_register_types(void)
+{
+    type_register_static(&bcm2835_gpio_info);
+}
+
+type_init(bcm2835_gpio_register_types)
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 1c928abb28..2073108577 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -42,6 +42,7 @@
 #include "hw/acpi/memory_hotplug.h"
 #include "sysemu/tpm.h"
 #include "hw/acpi/tpm.h"
+#include "hw/acpi/vmgenid.h"
 #include "sysemu/tpm_backend.h"
 #include "hw/timer/mc146818rtc_regs.h"
 #include "sysemu/numa.h"
@@ -462,7 +463,7 @@ static void *acpi_set_bsel(PCIBus *bus, void *opaque)
 
         *bus_bsel = (*bsel_alloc)++;
         object_property_add_uint32_ptr(OBJECT(bus), ACPI_PCIHP_PROP_BSEL,
-                                       bus_bsel, NULL);
+                                       bus_bsel, &error_abort);
     }
 
     return bsel_alloc;
@@ -471,7 +472,7 @@ static void *acpi_set_bsel(PCIBus *bus, void *opaque)
 static void acpi_set_pci_info(void)
 {
     PCIBus *bus = find_i440fx(); /* TODO: Q35 support */
-    unsigned bsel_alloc = 0;
+    unsigned bsel_alloc = ACPI_PCIHP_BSEL_DEFAULT;
 
     if (bus) {
         /* Scan all PCI buses. Set property to enable acpi based hotplug. */
@@ -1803,7 +1804,7 @@ static Aml *build_q35_osc_method(void)
     Aml *else_ctx;
     Aml *method;
     Aml *a_cwd1 = aml_name("CDW1");
-    Aml *a_ctrl = aml_name("CTRL");
+    Aml *a_ctrl = aml_local(0);
 
     method = aml_method("_OSC", 4, AML_NOTSERIALIZED);
     aml_append(method, aml_create_dword_field(aml_arg(3), aml_int(0), "CDW1"));
@@ -1813,7 +1814,6 @@ static Aml *build_q35_osc_method(void)
     aml_append(if_ctx, aml_create_dword_field(aml_arg(3), aml_int(4), "CDW2"));
     aml_append(if_ctx, aml_create_dword_field(aml_arg(3), aml_int(8), "CDW3"));
 
-    aml_append(if_ctx, aml_store(aml_name("CDW2"), aml_name("SUPP")));
     aml_append(if_ctx, aml_store(aml_name("CDW3"), a_ctrl));
 
     /*
@@ -1898,8 +1898,6 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
         aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03")));
         aml_append(dev, aml_name_decl("_ADR", aml_int(0)));
         aml_append(dev, aml_name_decl("_UID", aml_int(1)));
-        aml_append(dev, aml_name_decl("SUPP", aml_int(0)));
-        aml_append(dev, aml_name_decl("CTRL", aml_int(0)));
         aml_append(dev, build_q35_osc_method());
         aml_append(sb_scope, dev);
         aml_append(dsdt, sb_scope);
@@ -1964,6 +1962,9 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
             aml_append(dev, aml_name_decl("_UID", aml_int(bus_num)));
             aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03")));
             aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num)));
+            if (pci_bus_is_express(bus)) {
+                aml_append(dev, build_q35_osc_method());
+            }
 
             if (numa_node != NUMA_NODE_UNASSIGNED) {
                 aml_append(dev, aml_name_decl("_PXM", aml_int(numa_node)));
@@ -2610,6 +2611,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
     size_t aml_len = 0;
     GArray *tables_blob = tables->table_data;
     AcpiSlicOem slic_oem = { .id = NULL, .table_id = NULL };
+    Object *vmgenid_dev;
 
     acpi_get_pm_info(&pm);
     acpi_get_misc_info(&misc);
@@ -2653,6 +2655,13 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
     acpi_add_table(table_offsets, tables_blob);
     build_madt(tables_blob, tables->linker, pcms);
 
+    vmgenid_dev = find_vmgenid_dev();
+    if (vmgenid_dev) {
+        acpi_add_table(table_offsets, tables_blob);
+        vmgenid_build_acpi(VMGENID(vmgenid_dev), tables_blob,
+                           tables->vmgenid, tables->linker);
+    }
+
     if (misc.has_hpet) {
         acpi_add_table(table_offsets, tables_blob);
         build_hpet(tables_blob, tables->linker);
@@ -2823,6 +2832,7 @@ void acpi_setup(void)
     PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
     AcpiBuildTables tables;
     AcpiBuildState *build_state;
+    Object *vmgenid_dev;
 
     if (!pcms->fw_cfg) {
         ACPI_BUILD_DPRINTF("No fw cfg. Bailing out.\n");
@@ -2859,6 +2869,12 @@ void acpi_setup(void)
     fw_cfg_add_file(pcms->fw_cfg, ACPI_BUILD_TPMLOG_FILE,
                     tables.tcpalog->data, acpi_data_len(tables.tcpalog));
 
+    vmgenid_dev = find_vmgenid_dev();
+    if (vmgenid_dev) {
+        vmgenid_add_fw_cfg(VMGENID(vmgenid_dev), pcms->fw_cfg,
+                           tables.vmgenid);
+    }
+
     if (!pcmc->rsdp_in_ram) {
         /*
          * Keep for compatibility with old machine types.
diff --git a/hw/i386/kvmvapic.c b/hw/i386/kvmvapic.c
index 7135633863..82a49556af 100644
--- a/hw/i386/kvmvapic.c
+++ b/hw/i386/kvmvapic.c
@@ -457,8 +457,8 @@ static void patch_instruction(VAPICROMState *s, X86CPU *cpu, target_ulong ip)
     resume_all_vcpus();
 
     if (!kvm_enabled()) {
-        /* tb_lock will be reset when cpu_loop_exit_noexc longjmps
-         * back into the cpu_exec loop. */
+        /* Both tb_lock and iothread_mutex will be reset when
+         *  longjmps back into the cpu_exec loop. */
         tb_lock();
         tb_gen_code(cs, current_pc, current_cs_base, current_flags, 1);
         cpu_loop_exit_noexc(cs);
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 60b0946be3..d24388e05f 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -707,7 +707,8 @@ static void pc_build_smbios(PCMachineState *pcms)
     size_t smbios_tables_len, smbios_anchor_len;
     struct smbios_phys_mem_area *mem_array;
     unsigned i, array_count;
-    X86CPU *cpu = X86_CPU(pcms->possible_cpus->cpus[0].cpu);
+    MachineState *ms = MACHINE(pcms);
+    X86CPU *cpu = X86_CPU(ms->possible_cpus->cpus[0].cpu);
 
     /* tell smbios about cpuid version and features */
     smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]);
@@ -1111,7 +1112,7 @@ static void pc_new_cpu(const char *typename, int64_t apic_id, Error **errp)
 void pc_hot_add_cpu(const int64_t id, Error **errp)
 {
     ObjectClass *oc;
-    PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
+    MachineState *ms = MACHINE(qdev_get_machine());
     int64_t apic_id = x86_cpu_apic_id_from_index(id);
     Error *local_err = NULL;
 
@@ -1127,8 +1128,8 @@ void pc_hot_add_cpu(const int64_t id, Error **errp)
         return;
     }
 
-    assert(pcms->possible_cpus->cpus[0].cpu); /* BSP is always present */
-    oc = OBJECT_CLASS(CPU_GET_CLASS(pcms->possible_cpus->cpus[0].cpu));
+    assert(ms->possible_cpus->cpus[0].cpu); /* BSP is always present */
+    oc = OBJECT_CLASS(CPU_GET_CLASS(ms->possible_cpus->cpus[0].cpu));
     pc_new_cpu(object_class_get_name(oc), apic_id, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
@@ -1143,7 +1144,9 @@ void pc_cpus_init(PCMachineState *pcms)
     ObjectClass *oc;
     const char *typename;
     gchar **model_pieces;
+    const CPUArchIdList *possible_cpus;
     MachineState *machine = MACHINE(pcms);
+    MachineClass *mc = MACHINE_GET_CLASS(pcms);
 
     /* init CPUs */
     if (machine->cpu_model == NULL) {
@@ -1178,20 +1181,16 @@ void pc_cpus_init(PCMachineState *pcms)
      * This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init().
      */
     pcms->apic_id_limit = x86_cpu_apic_id_from_index(max_cpus - 1) + 1;
-    pcms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
-                                    sizeof(CPUArchId) * max_cpus);
-    for (i = 0; i < max_cpus; i++) {
-        pcms->possible_cpus->cpus[i].arch_id = x86_cpu_apic_id_from_index(i);
-        pcms->possible_cpus->len++;
-        if (i < smp_cpus) {
-            pc_new_cpu(typename, x86_cpu_apic_id_from_index(i), &error_fatal);
-        }
+    possible_cpus = mc->possible_cpu_arch_ids(machine);
+    for (i = 0; i < smp_cpus; i++) {
+        pc_new_cpu(typename, possible_cpus->cpus[i].arch_id, &error_fatal);
     }
 }
 
 static void pc_build_feature_control_file(PCMachineState *pcms)
 {
-    X86CPU *cpu = X86_CPU(pcms->possible_cpus->cpus[0].cpu);
+    MachineState *ms = MACHINE(pcms);
+    X86CPU *cpu = X86_CPU(ms->possible_cpus->cpus[0].cpu);
     CPUX86State *env = &cpu->env;
     uint32_t unused, ecx, edx;
     uint64_t feature_control_bits = 0;
@@ -1787,21 +1786,19 @@ static int pc_apic_cmp(const void *a, const void *b)
 }
 
 /* returns pointer to CPUArchId descriptor that matches CPU's apic_id
- * in pcms->possible_cpus->cpus, if pcms->possible_cpus->cpus has no
+ * in ms->possible_cpus->cpus, if ms->possible_cpus->cpus has no
  * entry corresponding to CPU's apic_id returns NULL.
  */
-static CPUArchId *pc_find_cpu_slot(PCMachineState *pcms, CPUState *cpu,
-                                   int *idx)
+static CPUArchId *pc_find_cpu_slot(MachineState *ms, uint32_t id, int *idx)
 {
-    CPUClass *cc = CPU_GET_CLASS(cpu);
     CPUArchId apic_id, *found_cpu;
 
-    apic_id.arch_id = cc->get_arch_id(CPU(cpu));
-    found_cpu = bsearch(&apic_id, pcms->possible_cpus->cpus,
-        pcms->possible_cpus->len, sizeof(*pcms->possible_cpus->cpus),
+    apic_id.arch_id = id;
+    found_cpu = bsearch(&apic_id, ms->possible_cpus->cpus,
+        ms->possible_cpus->len, sizeof(*ms->possible_cpus->cpus),
         pc_apic_cmp);
     if (found_cpu && idx) {
-        *idx = found_cpu - pcms->possible_cpus->cpus;
+        *idx = found_cpu - ms->possible_cpus->cpus;
     }
     return found_cpu;
 }
@@ -1812,6 +1809,7 @@ static void pc_cpu_plug(HotplugHandler *hotplug_dev,
     CPUArchId *found_cpu;
     HotplugHandlerClass *hhc;
     Error *local_err = NULL;
+    X86CPU *cpu = X86_CPU(dev);
     PCMachineState *pcms = PC_MACHINE(hotplug_dev);
 
     if (pcms->acpi_dev) {
@@ -1831,8 +1829,8 @@ static void pc_cpu_plug(HotplugHandler *hotplug_dev,
         fw_cfg_modify_i16(pcms->fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus);
     }
 
-    found_cpu = pc_find_cpu_slot(pcms, CPU(dev), NULL);
-    found_cpu->cpu = CPU(dev);
+    found_cpu = pc_find_cpu_slot(MACHINE(pcms), cpu->apic_id, NULL);
+    found_cpu->cpu = OBJECT(dev);
 out:
     error_propagate(errp, local_err);
 }
@@ -1842,9 +1840,10 @@ static void pc_cpu_unplug_request_cb(HotplugHandler *hotplug_dev,
     int idx = -1;
     HotplugHandlerClass *hhc;
     Error *local_err = NULL;
+    X86CPU *cpu = X86_CPU(dev);
     PCMachineState *pcms = PC_MACHINE(hotplug_dev);
 
-    pc_find_cpu_slot(pcms, CPU(dev), &idx);
+    pc_find_cpu_slot(MACHINE(pcms), cpu->apic_id, &idx);
     assert(idx != -1);
     if (idx == 0) {
         error_setg(&local_err, "Boot CPU is unpluggable");
@@ -1869,6 +1868,7 @@ static void pc_cpu_unplug_cb(HotplugHandler *hotplug_dev,
     CPUArchId *found_cpu;
     HotplugHandlerClass *hhc;
     Error *local_err = NULL;
+    X86CPU *cpu = X86_CPU(dev);
     PCMachineState *pcms = PC_MACHINE(hotplug_dev);
 
     hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
@@ -1878,7 +1878,7 @@ static void pc_cpu_unplug_cb(HotplugHandler *hotplug_dev,
         goto out;
     }
 
-    found_cpu = pc_find_cpu_slot(pcms, CPU(dev), NULL);
+    found_cpu = pc_find_cpu_slot(MACHINE(pcms), cpu->apic_id, NULL);
     found_cpu->cpu = NULL;
     object_unparent(OBJECT(dev));
 
@@ -1936,13 +1936,15 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev,
         cpu->apic_id = apicid_from_topo_ids(smp_cores, smp_threads, &topo);
     }
 
-    cpu_slot = pc_find_cpu_slot(pcms, CPU(dev), &idx);
+    cpu_slot = pc_find_cpu_slot(MACHINE(pcms), cpu->apic_id, &idx);
     if (!cpu_slot) {
+        MachineState *ms = MACHINE(pcms);
+
         x86_topo_ids_from_apicid(cpu->apic_id, smp_cores, smp_threads, &topo);
         error_setg(errp, "Invalid CPU [socket: %u, core: %u, thread: %u] with"
                   " APIC ID %" PRIu32 ", valid index range 0:%d",
                    topo.pkg_id, topo.core_id, topo.smt_id, cpu->apic_id,
-                   pcms->possible_cpus->len - 1);
+                   ms->possible_cpus->len - 1);
         return;
     }
 
@@ -1953,7 +1955,7 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev,
     }
 
     /* if 'address' properties socket-id/core-id/thread-id are not set, set them
-     * so that query_hotpluggable_cpus would show correct values
+     * so that machine_query_hotpluggable_cpus would show correct values
      */
     /* TODO: move socket_id/core_id/thread_id checks into x86_cpu_realizefn()
      * once -smp refactoring is complete and there will be CPU private
@@ -2251,55 +2253,37 @@ static unsigned pc_cpu_index_to_socket_id(unsigned cpu_index)
     return topo.pkg_id;
 }
 
-static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *machine)
-{
-    PCMachineState *pcms = PC_MACHINE(machine);
-    assert(pcms->possible_cpus);
-    return pcms->possible_cpus;
-}
-
-static HotpluggableCPUList *pc_query_hotpluggable_cpus(MachineState *machine)
+static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms)
 {
     int i;
-    CPUState *cpu;
-    HotpluggableCPUList *head = NULL;
-    PCMachineState *pcms = PC_MACHINE(machine);
-    const char *cpu_type;
 
-    cpu = pcms->possible_cpus->cpus[0].cpu;
-    assert(cpu); /* BSP is always present */
-    cpu_type = object_class_get_name(OBJECT_CLASS(CPU_GET_CLASS(cpu)));
+    if (ms->possible_cpus) {
+        /*
+         * make sure that max_cpus hasn't changed since the first use, i.e.
+         * -smp hasn't been parsed after it
+        */
+        assert(ms->possible_cpus->len == max_cpus);
+        return ms->possible_cpus;
+    }
 
-    for (i = 0; i < pcms->possible_cpus->len; i++) {
+    ms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
+                                  sizeof(CPUArchId) * max_cpus);
+    ms->possible_cpus->len = max_cpus;
+    for (i = 0; i < ms->possible_cpus->len; i++) {
         X86CPUTopoInfo topo;
-        HotpluggableCPUList *list_item = g_new0(typeof(*list_item), 1);
-        HotpluggableCPU *cpu_item = g_new0(typeof(*cpu_item), 1);
-        CpuInstanceProperties *cpu_props = g_new0(typeof(*cpu_props), 1);
-        const uint32_t apic_id = pcms->possible_cpus->cpus[i].arch_id;
-
-        x86_topo_ids_from_apicid(apic_id, smp_cores, smp_threads, &topo);
-
-        cpu_item->type = g_strdup(cpu_type);
-        cpu_item->vcpus_count = 1;
-        cpu_props->has_socket_id = true;
-        cpu_props->socket_id = topo.pkg_id;
-        cpu_props->has_core_id = true;
-        cpu_props->core_id = topo.core_id;
-        cpu_props->has_thread_id = true;
-        cpu_props->thread_id = topo.smt_id;
-        cpu_item->props = cpu_props;
-
-        cpu = pcms->possible_cpus->cpus[i].cpu;
-        if (cpu) {
-            cpu_item->has_qom_path = true;
-            cpu_item->qom_path = object_get_canonical_path(OBJECT(cpu));
-        }
 
-        list_item->value = cpu_item;
-        list_item->next = head;
-        head = list_item;
+        ms->possible_cpus->cpus[i].vcpus_count = 1;
+        ms->possible_cpus->cpus[i].arch_id = x86_cpu_apic_id_from_index(i);
+        x86_topo_ids_from_apicid(ms->possible_cpus->cpus[i].arch_id,
+                                 smp_cores, smp_threads, &topo);
+        ms->possible_cpus->cpus[i].props.has_socket_id = true;
+        ms->possible_cpus->cpus[i].props.socket_id = topo.pkg_id;
+        ms->possible_cpus->cpus[i].props.has_core_id = true;
+        ms->possible_cpus->cpus[i].props.core_id = topo.core_id;
+        ms->possible_cpus->cpus[i].props.has_thread_id = true;
+        ms->possible_cpus->cpus[i].props.thread_id = topo.smt_id;
     }
-    return head;
+    return ms->possible_cpus;
 }
 
 static void x86_nmi(NMIState *n, int cpu_index, Error **errp)
@@ -2342,7 +2326,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
     mc->get_hotplug_handler = pc_get_hotpug_handler;
     mc->cpu_index_to_socket_id = pc_cpu_index_to_socket_id;
     mc->possible_cpu_arch_ids = pc_possible_cpu_arch_ids;
-    mc->query_hotpluggable_cpus = pc_query_hotpluggable_cpus;
+    mc->has_hotpluggable_cpus = true;
     mc->default_boot_order = "cad";
     mc->hot_add_cpu = pc_hot_add_cpu;
     mc->block_default_type = IF_IDE;
diff --git a/hw/ide/core.c b/hw/ide/core.c
index cfa5de6ebf..db509b3e15 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -1120,7 +1120,7 @@ static void ide_cfata_metadata_write(IDEState *s)
 }
 
 /* called when the inserted state of the media has changed */
-static void ide_cd_change_cb(void *opaque, bool load)
+static void ide_cd_change_cb(void *opaque, bool load, Error **errp)
 {
     IDEState *s = opaque;
     uint64_t nb_sectors;
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
index dbaa75cf59..4383cd111d 100644
--- a/hw/ide/qdev.c
+++ b/hw/ide/qdev.c
@@ -170,7 +170,7 @@ static int ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind)
             return -1;
         } else {
             /* Anonymous BlockBackend for an empty drive */
-            dev->conf.blk = blk_new();
+            dev->conf.blk = blk_new(0, BLK_PERM_ALL);
         }
     }
 
@@ -196,7 +196,12 @@ static int ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind)
             return -1;
         }
     }
-    blkconf_apply_backend_options(&dev->conf);
+    blkconf_apply_backend_options(&dev->conf, kind == IDE_CD, kind != IDE_CD,
+                                  &err);
+    if (err) {
+        error_report_err(err);
+        return -1;
+    }
 
     if (ide_init_drive(s, dev->conf.blk, kind,
                        dev->version, dev->serial, dev->model, dev->wwn,
diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
index 8948106ac4..adedd0da5f 100644
--- a/hw/intc/Makefile.objs
+++ b/hw/intc/Makefile.objs
@@ -24,7 +24,7 @@ obj-$(CONFIG_APIC) += apic.o apic_common.o
 obj-$(CONFIG_ARM_GIC_KVM) += arm_gic_kvm.o
 obj-$(call land,$(CONFIG_ARM_GIC_KVM),$(TARGET_AARCH64)) += arm_gicv3_kvm.o
 obj-$(call land,$(CONFIG_ARM_GIC_KVM),$(TARGET_AARCH64)) += arm_gicv3_its_kvm.o
-obj-$(CONFIG_STELLARIS) += armv7m_nvic.o
+obj-$(CONFIG_ARM_V7M) += armv7m_nvic.o
 obj-$(CONFIG_EXYNOS4) += exynos4210_gic.o exynos4210_combiner.o
 obj-$(CONFIG_GRLIB) += grlib_irqmp.o
 obj-$(CONFIG_IOAPIC) += ioapic.o
diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c
index 521aac3cc6..8e5a9d8a3e 100644
--- a/hw/intc/arm_gic.c
+++ b/hw/intc/arm_gic.c
@@ -156,17 +156,6 @@ static void gic_set_irq_11mpcore(GICState *s, int irq, int level,
     }
 }
 
-static void gic_set_irq_nvic(GICState *s, int irq, int level,
-                                 int cm, int target)
-{
-    if (level) {
-        GIC_SET_LEVEL(irq, cm);
-        GIC_SET_PENDING(irq, target);
-    } else {
-        GIC_CLEAR_LEVEL(irq, cm);
-    }
-}
-
 static void gic_set_irq_generic(GICState *s, int irq, int level,
                                 int cm, int target)
 {
@@ -214,8 +203,6 @@ static void gic_set_irq(void *opaque, int irq, int level)
 
     if (s->revision == REV_11MPCORE) {
         gic_set_irq_11mpcore(s, irq, level, cm, target);
-    } else if (s->revision == REV_NVIC) {
-        gic_set_irq_nvic(s, irq, level, cm, target);
     } else {
         gic_set_irq_generic(s, irq, level, cm, target);
     }
@@ -367,7 +354,7 @@ uint32_t gic_acknowledge_irq(GICState *s, int cpu, MemTxAttrs attrs)
         return 1023;
     }
 
-    if (s->revision == REV_11MPCORE || s->revision == REV_NVIC) {
+    if (s->revision == REV_11MPCORE) {
         /* Clear pending flags for both level and edge triggered interrupts.
          * Level triggered IRQs will be reasserted once they become inactive.
          */
@@ -589,11 +576,6 @@ void gic_complete_irq(GICState *s, int cpu, int irq, MemTxAttrs attrs)
             DPRINTF("Set %d pending mask %x\n", irq, cm);
             GIC_SET_PENDING(irq, cm);
         }
-    } else if (s->revision == REV_NVIC) {
-        if (GIC_TEST_LEVEL(irq, cm)) {
-            DPRINTF("Set nvic %d pending mask %x\n", irq, cm);
-            GIC_SET_PENDING(irq, cm);
-        }
     }
 
     group = gic_has_groups(s) && GIC_TEST_GROUP(irq, cm);
@@ -768,7 +750,7 @@ static uint32_t gic_dist_readb(void *opaque, hwaddr offset, MemTxAttrs attrs)
     } else if (offset < 0xf10) {
         goto bad_reg;
     } else if (offset < 0xf30) {
-        if (s->revision == REV_11MPCORE || s->revision == REV_NVIC) {
+        if (s->revision == REV_11MPCORE) {
             goto bad_reg;
         }
 
@@ -802,9 +784,6 @@ static uint32_t gic_dist_readb(void *opaque, hwaddr offset, MemTxAttrs attrs)
             case 2:
                 res = gic_id_gicv2[(offset - 0xfd0) >> 2];
                 break;
-            case REV_NVIC:
-                /* Shouldn't be able to get here */
-                abort();
             default:
                 res = 0;
             }
@@ -1028,7 +1007,7 @@ static void gic_dist_writeb(void *opaque, hwaddr offset,
                 continue; /* Ignore Non-secure access of Group0 IRQ */
             }
 
-            if (s->revision == REV_11MPCORE || s->revision == REV_NVIC) {
+            if (s->revision == REV_11MPCORE) {
                 if (value & (1 << (i * 2))) {
                     GIC_SET_MODEL(irq + i);
                 } else {
@@ -1046,7 +1025,7 @@ static void gic_dist_writeb(void *opaque, hwaddr offset,
         goto bad_reg;
     } else if (offset < 0xf20) {
         /* GICD_CPENDSGIRn */
-        if (s->revision == REV_11MPCORE || s->revision == REV_NVIC) {
+        if (s->revision == REV_11MPCORE) {
             goto bad_reg;
         }
         irq = (offset - 0xf10);
@@ -1060,7 +1039,7 @@ static void gic_dist_writeb(void *opaque, hwaddr offset,
         }
     } else if (offset < 0xf30) {
         /* GICD_SPENDSGIRn */
-        if (s->revision == REV_11MPCORE || s->revision == REV_NVIC) {
+        if (s->revision == REV_11MPCORE) {
             goto bad_reg;
         }
         irq = (offset - 0xf20);
diff --git a/hw/intc/arm_gic_common.c b/hw/intc/arm_gic_common.c
index 4a8df44fb1..70f1134823 100644
--- a/hw/intc/arm_gic_common.c
+++ b/hw/intc/arm_gic_common.c
@@ -99,9 +99,7 @@ void gic_init_irqs_and_mmio(GICState *s, qemu_irq_handler handler,
      *  [N+32..N+63] PPIs for CPU 1
      *   ...
      */
-    if (s->revision != REV_NVIC) {
-        i += (GIC_INTERNAL * s->num_cpu);
-    }
+    i += (GIC_INTERNAL * s->num_cpu);
     qdev_init_gpio_in(DEVICE(s), handler, i);
 
     for (i = 0; i < s->num_cpu; i++) {
@@ -121,16 +119,12 @@ void gic_init_irqs_and_mmio(GICState *s, qemu_irq_handler handler,
     memory_region_init_io(&s->iomem, OBJECT(s), ops, s, "gic_dist", 0x1000);
     sysbus_init_mmio(sbd, &s->iomem);
 
-    if (s->revision != REV_NVIC) {
-        /* This is the main CPU interface "for this core". It is always
-         * present because it is required by both software emulation and KVM.
-         * NVIC is not handled here because its CPU interface is different,
-         * neither it can use KVM.
-         */
-        memory_region_init_io(&s->cpuiomem[0], OBJECT(s), ops ? &ops[1] : NULL,
-                              s, "gic_cpu", s->revision == 2 ? 0x2000 : 0x100);
-        sysbus_init_mmio(sbd, &s->cpuiomem[0]);
-    }
+    /* This is the main CPU interface "for this core". It is always
+     * present because it is required by both software emulation and KVM.
+     */
+    memory_region_init_io(&s->cpuiomem[0], OBJECT(s), ops ? &ops[1] : NULL,
+                          s, "gic_cpu", s->revision == 2 ? 0x2000 : 0x100);
+    sysbus_init_mmio(sbd, &s->cpuiomem[0]);
 }
 
 static void arm_gic_common_realize(DeviceState *dev, Error **errp)
@@ -162,7 +156,7 @@ static void arm_gic_common_realize(DeviceState *dev, Error **errp)
     }
 
     if (s->security_extn &&
-        (s->revision == REV_11MPCORE || s->revision == REV_NVIC)) {
+        (s->revision == REV_11MPCORE)) {
         error_setg(errp, "this GIC revision does not implement "
                    "the security extensions");
         return;
@@ -255,7 +249,6 @@ static Property arm_gic_common_properties[] = {
     DEFINE_PROP_UINT32("num-irq", GICState, num_irq, 32),
     /* Revision can be 1 or 2 for GIC architecture specification
      * versions 1 or 2, or 0 to indicate the legacy 11MPCore GIC.
-     * (Internally, 0xffffffff also indicates "not a GIC but an NVIC".)
      */
     DEFINE_PROP_UINT32("revision", GICState, revision, 1),
     /* True if the GIC should implement the security extensions */
diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
index 16b9b0f7eb..c6493d6c07 100644
--- a/hw/intc/arm_gicv3_common.c
+++ b/hw/intc/arm_gicv3_common.c
@@ -70,6 +70,38 @@ static const VMStateDescription vmstate_gicv3_cpu_virt = {
     }
 };
 
+static int icc_sre_el1_reg_pre_load(void *opaque)
+{
+    GICv3CPUState *cs = opaque;
+
+   /*
+    * If the sre_el1 subsection is not transferred this
+    * means SRE_EL1 is 0x7 (which might not be the same as
+    * our reset value).
+    */
+    cs->icc_sre_el1 = 0x7;
+    return 0;
+}
+
+static bool icc_sre_el1_reg_needed(void *opaque)
+{
+    GICv3CPUState *cs = opaque;
+
+    return cs->icc_sre_el1 != 7;
+}
+
+const VMStateDescription vmstate_gicv3_cpu_sre_el1 = {
+    .name = "arm_gicv3_cpu/sre_el1",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .pre_load = icc_sre_el1_reg_pre_load,
+    .needed = icc_sre_el1_reg_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64(icc_sre_el1, GICv3CPUState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static const VMStateDescription vmstate_gicv3_cpu = {
     .name = "arm_gicv3_cpu",
     .version_id = 1,
@@ -100,6 +132,10 @@ static const VMStateDescription vmstate_gicv3_cpu = {
     .subsections = (const VMStateDescription * []) {
         &vmstate_gicv3_cpu_virt,
         NULL
+    },
+    .subsections = (const VMStateDescription * []) {
+        &vmstate_gicv3_cpu_sre_el1,
+        NULL
     }
 };
 
@@ -216,6 +252,8 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp)
 
         s->cpu[i].cpu = cpu;
         s->cpu[i].gic = s;
+        /* Store GICv3CPUState in CPUARMState gicv3state pointer */
+        gicv3_set_gicv3state(cpu, &s->cpu[i]);
 
         /* Pre-construct the GICR_TYPER:
          * For our implementation:
diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c
index c25ee03556..0b208560bd 100644
--- a/hw/intc/arm_gicv3_cpuif.c
+++ b/hw/intc/arm_gicv3_cpuif.c
@@ -14,10 +14,19 @@
 
 #include "qemu/osdep.h"
 #include "qemu/bitops.h"
+#include "qemu/main-loop.h"
 #include "trace.h"
 #include "gicv3_internal.h"
 #include "cpu.h"
 
+void gicv3_set_gicv3state(CPUState *cpu, GICv3CPUState *s)
+{
+    ARMCPU *arm_cpu = ARM_CPU(cpu);
+    CPUARMState *env = &arm_cpu->env;
+
+    env->gicv3state = (void *)s;
+};
+
 static GICv3CPUState *icc_cs_from_env(CPUARMState *env)
 {
     /* Given the CPU, find the right GICv3CPUState struct.
@@ -733,6 +742,8 @@ void gicv3_cpuif_update(GICv3CPUState *cs)
     ARMCPU *cpu = ARM_CPU(cs->cpu);
     CPUARMState *env = &cpu->env;
 
+    g_assert(qemu_mutex_iothread_locked());
+
     trace_gicv3_cpuif_update(gicv3_redist_affid(cs), cs->hppi.irq,
                              cs->hppi.grp, cs->hppi.prio);
 
diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index d69dc47370..81f0403117 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -23,8 +23,10 @@
 #include "qapi/error.h"
 #include "hw/intc/arm_gicv3_common.h"
 #include "hw/sysbus.h"
+#include "qemu/error-report.h"
 #include "sysemu/kvm.h"
 #include "kvm_arm.h"
+#include "gicv3_internal.h"
 #include "vgic_common.h"
 #include "migration/migration.h"
 
@@ -44,6 +46,32 @@
 #define KVM_ARM_GICV3_GET_CLASS(obj) \
      OBJECT_GET_CLASS(KVMARMGICv3Class, (obj), TYPE_KVM_ARM_GICV3)
 
+#define   KVM_DEV_ARM_VGIC_SYSREG(op0, op1, crn, crm, op2)         \
+                             (ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \
+                              ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \
+                              ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \
+                              ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \
+                              ARM64_SYS_REG_SHIFT_MASK(op2, OP2))
+
+#define ICC_PMR_EL1     \
+    KVM_DEV_ARM_VGIC_SYSREG(3, 0, 4, 6, 0)
+#define ICC_BPR0_EL1    \
+    KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 8, 3)
+#define ICC_AP0R_EL1(n) \
+    KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 8, 4 | n)
+#define ICC_AP1R_EL1(n) \
+    KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 9, n)
+#define ICC_BPR1_EL1    \
+    KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 3)
+#define ICC_CTLR_EL1    \
+    KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 4)
+#define ICC_SRE_EL1 \
+    KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 5)
+#define ICC_IGRPEN0_EL1 \
+    KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 6)
+#define ICC_IGRPEN1_EL1 \
+    KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 7)
+
 typedef struct KVMARMGICv3Class {
     ARMGICv3CommonClass parent_class;
     DeviceRealize parent_realize;
@@ -57,16 +85,549 @@ static void kvm_arm_gicv3_set_irq(void *opaque, int irq, int level)
     kvm_arm_gic_set_irq(s->num_irq, irq, level);
 }
 
+#define KVM_VGIC_ATTR(reg, typer) \
+    ((typer & KVM_DEV_ARM_VGIC_V3_MPIDR_MASK) | (reg))
+
+static inline void kvm_gicd_access(GICv3State *s, int offset,
+                                   uint32_t *val, bool write)
+{
+    kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+                      KVM_VGIC_ATTR(offset, 0),
+                      val, write);
+}
+
+static inline void kvm_gicr_access(GICv3State *s, int offset, int cpu,
+                                   uint32_t *val, bool write)
+{
+    kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
+                      KVM_VGIC_ATTR(offset, s->cpu[cpu].gicr_typer),
+                      val, write);
+}
+
+static inline void kvm_gicc_access(GICv3State *s, uint64_t reg, int cpu,
+                                   uint64_t *val, bool write)
+{
+    kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+                      KVM_VGIC_ATTR(reg, s->cpu[cpu].gicr_typer),
+                      val, write);
+}
+
+static inline void kvm_gic_line_level_access(GICv3State *s, int irq, int cpu,
+                                             uint32_t *val, bool write)
+{
+    kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
+                      KVM_VGIC_ATTR(irq, s->cpu[cpu].gicr_typer) |
+                      (VGIC_LEVEL_INFO_LINE_LEVEL <<
+                       KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT),
+                      val, write);
+}
+
+/* Loop through each distributor IRQ related register; since bits
+ * corresponding to SPIs and PPIs are RAZ/WI when affinity routing
+ * is enabled, we skip those.
+ */
+#define for_each_dist_irq_reg(_irq, _max, _field_width) \
+    for (_irq = GIC_INTERNAL; _irq < _max; _irq += (32 / _field_width))
+
+static void kvm_dist_get_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
+{
+    uint32_t reg, *field;
+    int irq;
+
+    field = (uint32_t *)bmp;
+    for_each_dist_irq_reg(irq, s->num_irq, 8) {
+        kvm_gicd_access(s, offset, &reg, false);
+        *field = reg;
+        offset += 4;
+        field++;
+    }
+}
+
+static void kvm_dist_put_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
+{
+    uint32_t reg, *field;
+    int irq;
+
+    field = (uint32_t *)bmp;
+    for_each_dist_irq_reg(irq, s->num_irq, 8) {
+        reg = *field;
+        kvm_gicd_access(s, offset, &reg, true);
+        offset += 4;
+        field++;
+    }
+}
+
+static void kvm_dist_get_edge_trigger(GICv3State *s, uint32_t offset,
+                                      uint32_t *bmp)
+{
+    uint32_t reg;
+    int irq;
+
+    for_each_dist_irq_reg(irq, s->num_irq, 2) {
+        kvm_gicd_access(s, offset, &reg, false);
+        reg = half_unshuffle32(reg >> 1);
+        if (irq % 32 != 0) {
+            reg = (reg << 16);
+        }
+        *gic_bmp_ptr32(bmp, irq) |=  reg;
+        offset += 4;
+    }
+}
+
+static void kvm_dist_put_edge_trigger(GICv3State *s, uint32_t offset,
+                                      uint32_t *bmp)
+{
+    uint32_t reg;
+    int irq;
+
+    for_each_dist_irq_reg(irq, s->num_irq, 2) {
+        reg = *gic_bmp_ptr32(bmp, irq);
+        if (irq % 32 != 0) {
+            reg = (reg & 0xffff0000) >> 16;
+        } else {
+            reg = reg & 0xffff;
+        }
+        reg = half_shuffle32(reg) << 1;
+        kvm_gicd_access(s, offset, &reg, true);
+        offset += 4;
+    }
+}
+
+static void kvm_gic_get_line_level_bmp(GICv3State *s, uint32_t *bmp)
+{
+    uint32_t reg;
+    int irq;
+
+    for_each_dist_irq_reg(irq, s->num_irq, 1) {
+        kvm_gic_line_level_access(s, irq, 0, &reg, false);
+        *gic_bmp_ptr32(bmp, irq) = reg;
+    }
+}
+
+static void kvm_gic_put_line_level_bmp(GICv3State *s, uint32_t *bmp)
+{
+    uint32_t reg;
+    int irq;
+
+    for_each_dist_irq_reg(irq, s->num_irq, 1) {
+        reg = *gic_bmp_ptr32(bmp, irq);
+        kvm_gic_line_level_access(s, irq, 0, &reg, true);
+    }
+}
+
+/* Read a bitmap register group from the kernel VGIC. */
+static void kvm_dist_getbmp(GICv3State *s, uint32_t offset, uint32_t *bmp)
+{
+    uint32_t reg;
+    int irq;
+
+    for_each_dist_irq_reg(irq, s->num_irq, 1) {
+        kvm_gicd_access(s, offset, &reg, false);
+        *gic_bmp_ptr32(bmp, irq) = reg;
+        offset += 4;
+    }
+}
+
+static void kvm_dist_putbmp(GICv3State *s, uint32_t offset,
+                            uint32_t clroffset, uint32_t *bmp)
+{
+    uint32_t reg;
+    int irq;
+
+    for_each_dist_irq_reg(irq, s->num_irq, 1) {
+        /* If this bitmap is a set/clear register pair, first write to the
+         * clear-reg to clear all bits before using the set-reg to write
+         * the 1 bits.
+         */
+        if (clroffset != 0) {
+            reg = 0;
+            kvm_gicd_access(s, clroffset, &reg, true);
+        }
+        reg = *gic_bmp_ptr32(bmp, irq);
+        kvm_gicd_access(s, offset, &reg, true);
+        offset += 4;
+    }
+}
+
+static void kvm_arm_gicv3_check(GICv3State *s)
+{
+    uint32_t reg;
+    uint32_t num_irq;
+
+    /* Sanity checking s->num_irq */
+    kvm_gicd_access(s, GICD_TYPER, &reg, false);
+    num_irq = ((reg & 0x1f) + 1) * 32;
+
+    if (num_irq < s->num_irq) {
+        error_report("Model requests %u IRQs, but kernel supports max %u",
+                     s->num_irq, num_irq);
+        abort();
+    }
+}
+
 static void kvm_arm_gicv3_put(GICv3State *s)
 {
-    /* TODO */
-    DPRINTF("Cannot put kernel gic state, no kernel interface\n");
+    uint32_t regl, regh, reg;
+    uint64_t reg64, redist_typer;
+    int ncpu, i;
+
+    kvm_arm_gicv3_check(s);
+
+    kvm_gicr_access(s, GICR_TYPER, 0, &regl, false);
+    kvm_gicr_access(s, GICR_TYPER + 4, 0, &regh, false);
+    redist_typer = ((uint64_t)regh << 32) | regl;
+
+    reg = s->gicd_ctlr;
+    kvm_gicd_access(s, GICD_CTLR, &reg, true);
+
+    if (redist_typer & GICR_TYPER_PLPIS) {
+        /* Set base addresses before LPIs are enabled by GICR_CTLR write */
+        for (ncpu = 0; ncpu < s->num_cpu; ncpu++) {
+            GICv3CPUState *c = &s->cpu[ncpu];
+
+            reg64 = c->gicr_propbaser;
+            regl = (uint32_t)reg64;
+            kvm_gicr_access(s, GICR_PROPBASER, ncpu, &regl, true);
+            regh = (uint32_t)(reg64 >> 32);
+            kvm_gicr_access(s, GICR_PROPBASER + 4, ncpu, &regh, true);
+
+            reg64 = c->gicr_pendbaser;
+            if (!c->gicr_ctlr & GICR_CTLR_ENABLE_LPIS) {
+                /* Setting PTZ is advised if LPIs are disabled, to reduce
+                 * GIC initialization time.
+                 */
+                reg64 |= GICR_PENDBASER_PTZ;
+            }
+            regl = (uint32_t)reg64;
+            kvm_gicr_access(s, GICR_PENDBASER, ncpu, &regl, true);
+            regh = (uint32_t)(reg64 >> 32);
+            kvm_gicr_access(s, GICR_PENDBASER + 4, ncpu, &regh, true);
+        }
+    }
+
+    /* Redistributor state (one per CPU) */
+
+    for (ncpu = 0; ncpu < s->num_cpu; ncpu++) {
+        GICv3CPUState *c = &s->cpu[ncpu];
+
+        reg = c->gicr_ctlr;
+        kvm_gicr_access(s, GICR_CTLR, ncpu, &reg, true);
+
+        reg = c->gicr_statusr[GICV3_NS];
+        kvm_gicr_access(s, GICR_STATUSR, ncpu, &reg, true);
+
+        reg = c->gicr_waker;
+        kvm_gicr_access(s, GICR_WAKER, ncpu, &reg, true);
+
+        reg = c->gicr_igroupr0;
+        kvm_gicr_access(s, GICR_IGROUPR0, ncpu, &reg, true);
+
+        reg = ~0;
+        kvm_gicr_access(s, GICR_ICENABLER0, ncpu, &reg, true);
+        reg = c->gicr_ienabler0;
+        kvm_gicr_access(s, GICR_ISENABLER0, ncpu, &reg, true);
+
+        /* Restore config before pending so we treat level/edge correctly */
+        reg = half_shuffle32(c->edge_trigger >> 16) << 1;
+        kvm_gicr_access(s, GICR_ICFGR1, ncpu, &reg, true);
+
+        reg = c->level;
+        kvm_gic_line_level_access(s, 0, ncpu, &reg, true);
+
+        reg = ~0;
+        kvm_gicr_access(s, GICR_ICPENDR0, ncpu, &reg, true);
+        reg = c->gicr_ipendr0;
+        kvm_gicr_access(s, GICR_ISPENDR0, ncpu, &reg, true);
+
+        reg = ~0;
+        kvm_gicr_access(s, GICR_ICACTIVER0, ncpu, &reg, true);
+        reg = c->gicr_iactiver0;
+        kvm_gicr_access(s, GICR_ISACTIVER0, ncpu, &reg, true);
+
+        for (i = 0; i < GIC_INTERNAL; i += 4) {
+            reg = c->gicr_ipriorityr[i] |
+                (c->gicr_ipriorityr[i + 1] << 8) |
+                (c->gicr_ipriorityr[i + 2] << 16) |
+                (c->gicr_ipriorityr[i + 3] << 24);
+            kvm_gicr_access(s, GICR_IPRIORITYR + i, ncpu, &reg, true);
+        }
+    }
+
+    /* Distributor state (shared between all CPUs */
+    reg = s->gicd_statusr[GICV3_NS];
+    kvm_gicd_access(s, GICD_STATUSR, &reg, true);
+
+    /* s->enable bitmap -> GICD_ISENABLERn */
+    kvm_dist_putbmp(s, GICD_ISENABLER, GICD_ICENABLER, s->enabled);
+
+    /* s->group bitmap -> GICD_IGROUPRn */
+    kvm_dist_putbmp(s, GICD_IGROUPR, 0, s->group);
+
+    /* Restore targets before pending to ensure the pending state is set on
+     * the appropriate CPU interfaces in the kernel
+     */
+
+    /* s->gicd_irouter[irq] -> GICD_IROUTERn
+     * We can't use kvm_dist_put() here because the registers are 64-bit
+     */
+    for (i = GIC_INTERNAL; i < s->num_irq; i++) {
+        uint32_t offset;
+
+        offset = GICD_IROUTER + (sizeof(uint32_t) * i);
+        reg = (uint32_t)s->gicd_irouter[i];
+        kvm_gicd_access(s, offset, &reg, true);
+
+        offset = GICD_IROUTER + (sizeof(uint32_t) * i) + 4;
+        reg = (uint32_t)(s->gicd_irouter[i] >> 32);
+        kvm_gicd_access(s, offset, &reg, true);
+    }
+
+    /* s->trigger bitmap -> GICD_ICFGRn
+     * (restore configuration registers before pending IRQs so we treat
+     * level/edge correctly)
+     */
+    kvm_dist_put_edge_trigger(s, GICD_ICFGR, s->edge_trigger);
+
+    /* s->level bitmap ->  line_level */
+    kvm_gic_put_line_level_bmp(s, s->level);
+
+    /* s->pending bitmap -> GICD_ISPENDRn */
+    kvm_dist_putbmp(s, GICD_ISPENDR, GICD_ICPENDR, s->pending);
+
+    /* s->active bitmap -> GICD_ISACTIVERn */
+    kvm_dist_putbmp(s, GICD_ISACTIVER, GICD_ICACTIVER, s->active);
+
+    /* s->gicd_ipriority[] -> GICD_IPRIORITYRn */
+    kvm_dist_put_priority(s, GICD_IPRIORITYR, s->gicd_ipriority);
+
+    /* CPU Interface state (one per CPU) */
+
+    for (ncpu = 0; ncpu < s->num_cpu; ncpu++) {
+        GICv3CPUState *c = &s->cpu[ncpu];
+        int num_pri_bits;
+
+        kvm_gicc_access(s, ICC_SRE_EL1, ncpu, &c->icc_sre_el1, true);
+        kvm_gicc_access(s, ICC_CTLR_EL1, ncpu,
+                        &c->icc_ctlr_el1[GICV3_NS], true);
+        kvm_gicc_access(s, ICC_IGRPEN0_EL1, ncpu,
+                        &c->icc_igrpen[GICV3_G0], true);
+        kvm_gicc_access(s, ICC_IGRPEN1_EL1, ncpu,
+                        &c->icc_igrpen[GICV3_G1NS], true);
+        kvm_gicc_access(s, ICC_PMR_EL1, ncpu, &c->icc_pmr_el1, true);
+        kvm_gicc_access(s, ICC_BPR0_EL1, ncpu, &c->icc_bpr[GICV3_G0], true);
+        kvm_gicc_access(s, ICC_BPR1_EL1, ncpu, &c->icc_bpr[GICV3_G1NS], true);
+
+        num_pri_bits = ((c->icc_ctlr_el1[GICV3_NS] &
+                        ICC_CTLR_EL1_PRIBITS_MASK) >>
+                        ICC_CTLR_EL1_PRIBITS_SHIFT) + 1;
+
+        switch (num_pri_bits) {
+        case 7:
+            reg64 = c->icc_apr[GICV3_G0][3];
+            kvm_gicc_access(s, ICC_AP0R_EL1(3), ncpu, &reg64, true);
+            reg64 = c->icc_apr[GICV3_G0][2];
+            kvm_gicc_access(s, ICC_AP0R_EL1(2), ncpu, &reg64, true);
+        case 6:
+            reg64 = c->icc_apr[GICV3_G0][1];
+            kvm_gicc_access(s, ICC_AP0R_EL1(1), ncpu, &reg64, true);
+        default:
+            reg64 = c->icc_apr[GICV3_G0][0];
+            kvm_gicc_access(s, ICC_AP0R_EL1(0), ncpu, &reg64, true);
+        }
+
+        switch (num_pri_bits) {
+        case 7:
+            reg64 = c->icc_apr[GICV3_G1NS][3];
+            kvm_gicc_access(s, ICC_AP1R_EL1(3), ncpu, &reg64, true);
+            reg64 = c->icc_apr[GICV3_G1NS][2];
+            kvm_gicc_access(s, ICC_AP1R_EL1(2), ncpu, &reg64, true);
+        case 6:
+            reg64 = c->icc_apr[GICV3_G1NS][1];
+            kvm_gicc_access(s, ICC_AP1R_EL1(1), ncpu, &reg64, true);
+        default:
+            reg64 = c->icc_apr[GICV3_G1NS][0];
+            kvm_gicc_access(s, ICC_AP1R_EL1(0), ncpu, &reg64, true);
+        }
+    }
 }
 
 static void kvm_arm_gicv3_get(GICv3State *s)
 {
-    /* TODO */
-    DPRINTF("Cannot get kernel gic state, no kernel interface\n");
+    uint32_t regl, regh, reg;
+    uint64_t reg64, redist_typer;
+    int ncpu, i;
+
+    kvm_arm_gicv3_check(s);
+
+    kvm_gicr_access(s, GICR_TYPER, 0, &regl, false);
+    kvm_gicr_access(s, GICR_TYPER + 4, 0, &regh, false);
+    redist_typer = ((uint64_t)regh << 32) | regl;
+
+    kvm_gicd_access(s, GICD_CTLR, &reg, false);
+    s->gicd_ctlr = reg;
+
+    /* Redistributor state (one per CPU) */
+
+    for (ncpu = 0; ncpu < s->num_cpu; ncpu++) {
+        GICv3CPUState *c = &s->cpu[ncpu];
+
+        kvm_gicr_access(s, GICR_CTLR, ncpu, &reg, false);
+        c->gicr_ctlr = reg;
+
+        kvm_gicr_access(s, GICR_STATUSR, ncpu, &reg, false);
+        c->gicr_statusr[GICV3_NS] = reg;
+
+        kvm_gicr_access(s, GICR_WAKER, ncpu, &reg, false);
+        c->gicr_waker = reg;
+
+        kvm_gicr_access(s, GICR_IGROUPR0, ncpu, &reg, false);
+        c->gicr_igroupr0 = reg;
+        kvm_gicr_access(s, GICR_ISENABLER0, ncpu, &reg, false);
+        c->gicr_ienabler0 = reg;
+        kvm_gicr_access(s, GICR_ICFGR1, ncpu, &reg, false);
+        c->edge_trigger = half_unshuffle32(reg >> 1) << 16;
+        kvm_gic_line_level_access(s, 0, ncpu, &reg, false);
+        c->level = reg;
+        kvm_gicr_access(s, GICR_ISPENDR0, ncpu, &reg, false);
+        c->gicr_ipendr0 = reg;
+        kvm_gicr_access(s, GICR_ISACTIVER0, ncpu, &reg, false);
+        c->gicr_iactiver0 = reg;
+
+        for (i = 0; i < GIC_INTERNAL; i += 4) {
+            kvm_gicr_access(s, GICR_IPRIORITYR + i, ncpu, &reg, false);
+            c->gicr_ipriorityr[i] = extract32(reg, 0, 8);
+            c->gicr_ipriorityr[i + 1] = extract32(reg, 8, 8);
+            c->gicr_ipriorityr[i + 2] = extract32(reg, 16, 8);
+            c->gicr_ipriorityr[i + 3] = extract32(reg, 24, 8);
+        }
+    }
+
+    if (redist_typer & GICR_TYPER_PLPIS) {
+        for (ncpu = 0; ncpu < s->num_cpu; ncpu++) {
+            GICv3CPUState *c = &s->cpu[ncpu];
+
+            kvm_gicr_access(s, GICR_PROPBASER, ncpu, &regl, false);
+            kvm_gicr_access(s, GICR_PROPBASER + 4, ncpu, &regh, false);
+            c->gicr_propbaser = ((uint64_t)regh << 32) | regl;
+
+            kvm_gicr_access(s, GICR_PENDBASER, ncpu, &regl, false);
+            kvm_gicr_access(s, GICR_PENDBASER + 4, ncpu, &regh, false);
+            c->gicr_pendbaser = ((uint64_t)regh << 32) | regl;
+        }
+    }
+
+    /* Distributor state (shared between all CPUs */
+
+    kvm_gicd_access(s, GICD_STATUSR, &reg, false);
+    s->gicd_statusr[GICV3_NS] = reg;
+
+    /* GICD_IGROUPRn -> s->group bitmap */
+    kvm_dist_getbmp(s, GICD_IGROUPR, s->group);
+
+    /* GICD_ISENABLERn -> s->enabled bitmap */
+    kvm_dist_getbmp(s, GICD_ISENABLER, s->enabled);
+
+    /* Line level of irq */
+    kvm_gic_get_line_level_bmp(s, s->level);
+    /* GICD_ISPENDRn -> s->pending bitmap */
+    kvm_dist_getbmp(s, GICD_ISPENDR, s->pending);
+
+    /* GICD_ISACTIVERn -> s->active bitmap */
+    kvm_dist_getbmp(s, GICD_ISACTIVER, s->active);
+
+    /* GICD_ICFGRn -> s->trigger bitmap */
+    kvm_dist_get_edge_trigger(s, GICD_ICFGR, s->edge_trigger);
+
+    /* GICD_IPRIORITYRn -> s->gicd_ipriority[] */
+    kvm_dist_get_priority(s, GICD_IPRIORITYR, s->gicd_ipriority);
+
+    /* GICD_IROUTERn -> s->gicd_irouter[irq] */
+    for (i = GIC_INTERNAL; i < s->num_irq; i++) {
+        uint32_t offset;
+
+        offset = GICD_IROUTER + (sizeof(uint32_t) * i);
+        kvm_gicd_access(s, offset, &regl, false);
+        offset = GICD_IROUTER + (sizeof(uint32_t) * i) + 4;
+        kvm_gicd_access(s, offset, &regh, false);
+        s->gicd_irouter[i] = ((uint64_t)regh << 32) | regl;
+    }
+
+    /*****************************************************************
+     * CPU Interface(s) State
+     */
+
+    for (ncpu = 0; ncpu < s->num_cpu; ncpu++) {
+        GICv3CPUState *c = &s->cpu[ncpu];
+        int num_pri_bits;
+
+        kvm_gicc_access(s, ICC_SRE_EL1, ncpu, &c->icc_sre_el1, false);
+        kvm_gicc_access(s, ICC_CTLR_EL1, ncpu,
+                        &c->icc_ctlr_el1[GICV3_NS], false);
+        kvm_gicc_access(s, ICC_IGRPEN0_EL1, ncpu,
+                        &c->icc_igrpen[GICV3_G0], false);
+        kvm_gicc_access(s, ICC_IGRPEN1_EL1, ncpu,
+                        &c->icc_igrpen[GICV3_G1NS], false);
+        kvm_gicc_access(s, ICC_PMR_EL1, ncpu, &c->icc_pmr_el1, false);
+        kvm_gicc_access(s, ICC_BPR0_EL1, ncpu, &c->icc_bpr[GICV3_G0], false);
+        kvm_gicc_access(s, ICC_BPR1_EL1, ncpu, &c->icc_bpr[GICV3_G1NS], false);
+        num_pri_bits = ((c->icc_ctlr_el1[GICV3_NS] &
+                        ICC_CTLR_EL1_PRIBITS_MASK) >>
+                        ICC_CTLR_EL1_PRIBITS_SHIFT) + 1;
+
+        switch (num_pri_bits) {
+        case 7:
+            kvm_gicc_access(s, ICC_AP0R_EL1(3), ncpu, &reg64, false);
+            c->icc_apr[GICV3_G0][3] = reg64;
+            kvm_gicc_access(s, ICC_AP0R_EL1(2), ncpu, &reg64, false);
+            c->icc_apr[GICV3_G0][2] = reg64;
+        case 6:
+            kvm_gicc_access(s, ICC_AP0R_EL1(1), ncpu, &reg64, false);
+            c->icc_apr[GICV3_G0][1] = reg64;
+        default:
+            kvm_gicc_access(s, ICC_AP0R_EL1(0), ncpu, &reg64, false);
+            c->icc_apr[GICV3_G0][0] = reg64;
+        }
+
+        switch (num_pri_bits) {
+        case 7:
+            kvm_gicc_access(s, ICC_AP1R_EL1(3), ncpu, &reg64, false);
+            c->icc_apr[GICV3_G1NS][3] = reg64;
+            kvm_gicc_access(s, ICC_AP1R_EL1(2), ncpu, &reg64, false);
+            c->icc_apr[GICV3_G1NS][2] = reg64;
+        case 6:
+            kvm_gicc_access(s, ICC_AP1R_EL1(1), ncpu, &reg64, false);
+            c->icc_apr[GICV3_G1NS][1] = reg64;
+        default:
+            kvm_gicc_access(s, ICC_AP1R_EL1(0), ncpu, &reg64, false);
+            c->icc_apr[GICV3_G1NS][0] = reg64;
+        }
+    }
+}
+
+static void arm_gicv3_icc_reset(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    ARMCPU *cpu;
+    GICv3State *s;
+    GICv3CPUState *c;
+
+    c = (GICv3CPUState *)env->gicv3state;
+    s = c->gic;
+    cpu = ARM_CPU(c->cpu);
+
+    /* Initialize to actual HW supported configuration */
+    kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+                      KVM_VGIC_ATTR(ICC_CTLR_EL1, cpu->mp_affinity),
+                      &c->icc_ctlr_el1[GICV3_NS], false);
+
+    c->icc_ctlr_el1[GICV3_S] = c->icc_ctlr_el1[GICV3_NS];
+    c->icc_pmr_el1 = 0;
+    c->icc_bpr[GICV3_G0] = GIC_MIN_BPR;
+    c->icc_bpr[GICV3_G1] = GIC_MIN_BPR;
+    c->icc_bpr[GICV3_G1NS] = GIC_MIN_BPR;
+
+    c->icc_sre_el1 = 0x7;
+    memset(c->icc_apr, 0, sizeof(c->icc_apr));
+    memset(c->icc_igrpen, 0, sizeof(c->icc_igrpen));
 }
 
 static void kvm_arm_gicv3_reset(DeviceState *dev)
@@ -77,9 +638,43 @@ static void kvm_arm_gicv3_reset(DeviceState *dev)
     DPRINTF("Reset\n");
 
     kgc->parent_reset(dev);
+
+    if (s->migration_blocker) {
+        DPRINTF("Cannot put kernel gic state, no kernel interface\n");
+        return;
+    }
+
     kvm_arm_gicv3_put(s);
 }
 
+/*
+ * CPU interface registers of GIC needs to be reset on CPU reset.
+ * For the calling arm_gicv3_icc_reset() on CPU reset, we register
+ * below ARMCPRegInfo. As we reset the whole cpu interface under single
+ * register reset, we define only one register of CPU interface instead
+ * of defining all the registers.
+ */
+static const ARMCPRegInfo gicv3_cpuif_reginfo[] = {
+    { .name = "ICC_CTLR_EL1", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 12, .opc2 = 4,
+      /*
+       * If ARM_CP_NOP is used, resetfn is not called,
+       * So ARM_CP_NO_RAW is appropriate type.
+       */
+      .type = ARM_CP_NO_RAW,
+      .access = PL1_RW,
+      .readfn = arm_cp_read_zero,
+      .writefn = arm_cp_write_ignore,
+      /*
+       * We hang the whole cpu interface reset routine off here
+       * rather than parcelling it out into one little function
+       * per register
+       */
+      .resetfn = arm_gicv3_icc_reset,
+    },
+    REGINFO_SENTINEL
+};
+
 static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
 {
     GICv3State *s = KVM_ARM_GICV3(dev);
@@ -103,16 +698,10 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
 
     gicv3_init_irqs_and_mmio(s, kvm_arm_gicv3_set_irq, NULL);
 
-    /* Block migration of a KVM GICv3 device: the API for saving and restoring
-     * the state in the kernel is not yet finalised in the kernel or
-     * implemented in QEMU.
-     */
-    error_setg(&s->migration_blocker, "vGICv3 migration is not implemented");
-    migrate_add_blocker(s->migration_blocker, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        error_free(s->migration_blocker);
-        return;
+    for (i = 0; i < s->num_cpu; i++) {
+        ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i));
+
+        define_arm_cp_regs(cpu, gicv3_cpuif_reginfo);
     }
 
     /* Try to create the device via the device control API */
@@ -145,6 +734,18 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
 
         kvm_irqchip_commit_routes(kvm_state);
     }
+
+    if (!kvm_device_check_attr(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+                               GICD_CTLR)) {
+        error_setg(&s->migration_blocker, "This operating system kernel does "
+                                          "not support vGICv3 migration");
+        migrate_add_blocker(s->migration_blocker, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            error_free(s->migration_blocker);
+            return;
+        }
+    }
 }
 
 static void kvm_arm_gicv3_class_init(ObjectClass *klass, void *data)
diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c
index fe5c303de9..32ffa0bf35 100644
--- a/hw/intc/armv7m_nvic.c
+++ b/hw/intc/armv7m_nvic.c
@@ -17,213 +17,425 @@
 #include "hw/sysbus.h"
 #include "qemu/timer.h"
 #include "hw/arm/arm.h"
-#include "exec/address-spaces.h"
-#include "gic_internal.h"
+#include "hw/arm/armv7m_nvic.h"
+#include "target/arm/cpu.h"
 #include "qemu/log.h"
+#include "trace.h"
 
-typedef struct {
-    GICState gic;
-    ARMCPU *cpu;
-    struct {
-        uint32_t control;
-        uint32_t reload;
-        int64_t tick;
-        QEMUTimer *timer;
-    } systick;
-    MemoryRegion sysregmem;
-    MemoryRegion gic_iomem_alias;
-    MemoryRegion container;
-    uint32_t num_irq;
-    qemu_irq sysresetreq;
-} nvic_state;
-
-#define TYPE_NVIC "armv7m_nvic"
-/**
- * NVICClass:
- * @parent_reset: the parent class' reset handler.
+/* IRQ number counting:
  *
- * A model of the v7M NVIC and System Controller
+ * the num-irq property counts the number of external IRQ lines
+ *
+ * NVICState::num_irq counts the total number of exceptions
+ * (external IRQs, the 15 internal exceptions including reset,
+ * and one for the unused exception number 0).
+ *
+ * NVIC_MAX_IRQ is the highest permitted number of external IRQ lines.
+ *
+ * NVIC_MAX_VECTORS is the highest permitted number of exceptions.
+ *
+ * Iterating through all exceptions should typically be done with
+ * for (i = 1; i < s->num_irq; i++) to avoid the unused slot 0.
+ *
+ * The external qemu_irq lines are the NVIC's external IRQ lines,
+ * so line 0 is exception 16.
+ *
+ * In the terminology of the architecture manual, "interrupts" are
+ * a subcategory of exception referring to the external interrupts
+ * (which are exception numbers NVIC_FIRST_IRQ and upward).
+ * For historical reasons QEMU tends to use "interrupt" and
+ * "exception" more or less interchangeably.
+ */
+#define NVIC_FIRST_IRQ 16
+#define NVIC_MAX_IRQ (NVIC_MAX_VECTORS - NVIC_FIRST_IRQ)
+
+/* Effective running priority of the CPU when no exception is active
+ * (higher than the highest possible priority value)
  */
-typedef struct NVICClass {
-    /*< private >*/
-    ARMGICClass parent_class;
-    /*< public >*/
-    DeviceRealize parent_realize;
-    void (*parent_reset)(DeviceState *dev);
-} NVICClass;
-
-#define NVIC_CLASS(klass) \
-    OBJECT_CLASS_CHECK(NVICClass, (klass), TYPE_NVIC)
-#define NVIC_GET_CLASS(obj) \
-    OBJECT_GET_CLASS(NVICClass, (obj), TYPE_NVIC)
-#define NVIC(obj) \
-    OBJECT_CHECK(nvic_state, (obj), TYPE_NVIC)
+#define NVIC_NOEXC_PRIO 0x100
 
 static const uint8_t nvic_id[] = {
     0x00, 0xb0, 0x1b, 0x00, 0x0d, 0xe0, 0x05, 0xb1
 };
 
-/* qemu timers run at 1GHz.   We want something closer to 1MHz.  */
-#define SYSTICK_SCALE 1000ULL
+static int nvic_pending_prio(NVICState *s)
+{
+    /* return the priority of the current pending interrupt,
+     * or NVIC_NOEXC_PRIO if no interrupt is pending
+     */
+    return s->vectpending ? s->vectors[s->vectpending].prio : NVIC_NOEXC_PRIO;
+}
 
-#define SYSTICK_ENABLE    (1 << 0)
-#define SYSTICK_TICKINT   (1 << 1)
-#define SYSTICK_CLKSOURCE (1 << 2)
-#define SYSTICK_COUNTFLAG (1 << 16)
+/* Return the value of the ISCR RETTOBASE bit:
+ * 1 if there is exactly one active exception
+ * 0 if there is more than one active exception
+ * UNKNOWN if there are no active exceptions (we choose 1,
+ * which matches the choice Cortex-M3 is documented as making).
+ *
+ * NB: some versions of the documentation talk about this
+ * counting "active exceptions other than the one shown by IPSR";
+ * this is only different in the obscure corner case where guest
+ * code has manually deactivated an exception and is about
+ * to fail an exception-return integrity check. The definition
+ * above is the one from the v8M ARM ARM and is also in line
+ * with the behaviour documented for the Cortex-M3.
+ */
+static bool nvic_rettobase(NVICState *s)
+{
+    int irq, nhand = 0;
 
-int system_clock_scale;
+    for (irq = ARMV7M_EXCP_RESET; irq < s->num_irq; irq++) {
+        if (s->vectors[irq].active) {
+            nhand++;
+            if (nhand == 2) {
+                return 0;
+            }
+        }
+    }
 
-/* Conversion factor from qemu timer to SysTick frequencies.  */
-static inline int64_t systick_scale(nvic_state *s)
-{
-    if (s->systick.control & SYSTICK_CLKSOURCE)
-        return system_clock_scale;
-    else
-        return 1000;
+    return 1;
 }
 
-static void systick_reload(nvic_state *s, int reset)
+/* Return the value of the ISCR ISRPENDING bit:
+ * 1 if an external interrupt is pending
+ * 0 if no external interrupt is pending
+ */
+static bool nvic_isrpending(NVICState *s)
 {
-    /* The Cortex-M3 Devices Generic User Guide says that "When the
-     * ENABLE bit is set to 1, the counter loads the RELOAD value from the
-     * SYST RVR register and then counts down". So, we need to check the
-     * ENABLE bit before reloading the value.
+    int irq;
+
+    /* We can shortcut if the highest priority pending interrupt
+     * happens to be external or if there is nothing pending.
      */
-    if ((s->systick.control & SYSTICK_ENABLE) == 0) {
-        return;
+    if (s->vectpending > NVIC_FIRST_IRQ) {
+        return true;
+    }
+    if (s->vectpending == 0) {
+        return false;
+    }
+
+    for (irq = NVIC_FIRST_IRQ; irq < s->num_irq; irq++) {
+        if (s->vectors[irq].pending) {
+            return true;
+        }
     }
+    return false;
+}
 
-    if (reset)
-        s->systick.tick = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-    s->systick.tick += (s->systick.reload + 1) * systick_scale(s);
-    timer_mod(s->systick.timer, s->systick.tick);
+/* Return a mask word which clears the subpriority bits from
+ * a priority value for an M-profile exception, leaving only
+ * the group priority.
+ */
+static inline uint32_t nvic_gprio_mask(NVICState *s)
+{
+    return ~0U << (s->prigroup + 1);
 }
 
-static void systick_timer_tick(void * opaque)
+/* Recompute vectpending and exception_prio */
+static void nvic_recompute_state(NVICState *s)
 {
-    nvic_state *s = (nvic_state *)opaque;
-    s->systick.control |= SYSTICK_COUNTFLAG;
-    if (s->systick.control & SYSTICK_TICKINT) {
-        /* Trigger the interrupt.  */
-        armv7m_nvic_set_pending(s, ARMV7M_EXCP_SYSTICK);
+    int i;
+    int pend_prio = NVIC_NOEXC_PRIO;
+    int active_prio = NVIC_NOEXC_PRIO;
+    int pend_irq = 0;
+
+    for (i = 1; i < s->num_irq; i++) {
+        VecInfo *vec = &s->vectors[i];
+
+        if (vec->enabled && vec->pending && vec->prio < pend_prio) {
+            pend_prio = vec->prio;
+            pend_irq = i;
+        }
+        if (vec->active && vec->prio < active_prio) {
+            active_prio = vec->prio;
+        }
     }
-    if (s->systick.reload == 0) {
-        s->systick.control &= ~SYSTICK_ENABLE;
+
+    s->vectpending = pend_irq;
+    s->exception_prio = active_prio & nvic_gprio_mask(s);
+
+    trace_nvic_recompute_state(s->vectpending, s->exception_prio);
+}
+
+/* Return the current execution priority of the CPU
+ * (equivalent to the pseudocode ExecutionPriority function).
+ * This is a value between -2 (NMI priority) and NVIC_NOEXC_PRIO.
+ */
+static inline int nvic_exec_prio(NVICState *s)
+{
+    CPUARMState *env = &s->cpu->env;
+    int running;
+
+    if (env->daif & PSTATE_F) { /* FAULTMASK */
+        running = -1;
+    } else if (env->daif & PSTATE_I) { /* PRIMASK */
+        running = 0;
+    } else if (env->v7m.basepri > 0) {
+        running = env->v7m.basepri & nvic_gprio_mask(s);
     } else {
-        systick_reload(s, 0);
+        running = NVIC_NOEXC_PRIO; /* lower than any possible priority */
     }
+    /* consider priority of active handler */
+    return MIN(running, s->exception_prio);
 }
 
-static void systick_reset(nvic_state *s)
+bool armv7m_nvic_can_take_pending_exception(void *opaque)
 {
-    s->systick.control = 0;
-    s->systick.reload = 0;
-    s->systick.tick = 0;
-    timer_del(s->systick.timer);
+    NVICState *s = opaque;
+
+    return nvic_exec_prio(s) > nvic_pending_prio(s);
+}
+
+/* caller must call nvic_irq_update() after this */
+static void set_prio(NVICState *s, unsigned irq, uint8_t prio)
+{
+    assert(irq > ARMV7M_EXCP_NMI); /* only use for configurable prios */
+    assert(irq < s->num_irq);
+
+    s->vectors[irq].prio = prio;
+
+    trace_nvic_set_prio(irq, prio);
+}
+
+/* Recompute state and assert irq line accordingly.
+ * Must be called after changes to:
+ *  vec->active, vec->enabled, vec->pending or vec->prio for any vector
+ *  prigroup
+ */
+static void nvic_irq_update(NVICState *s)
+{
+    int lvl;
+    int pend_prio;
+
+    nvic_recompute_state(s);
+    pend_prio = nvic_pending_prio(s);
+
+    /* Raise NVIC output if this IRQ would be taken, except that we
+     * ignore the effects of the BASEPRI, FAULTMASK and PRIMASK (which
+     * will be checked for in arm_v7m_cpu_exec_interrupt()); changes
+     * to those CPU registers don't cause us to recalculate the NVIC
+     * pending info.
+     */
+    lvl = (pend_prio < s->exception_prio);
+    trace_nvic_irq_update(s->vectpending, pend_prio, s->exception_prio, lvl);
+    qemu_set_irq(s->excpout, lvl);
+}
+
+static void armv7m_nvic_clear_pending(void *opaque, int irq)
+{
+    NVICState *s = (NVICState *)opaque;
+    VecInfo *vec;
+
+    assert(irq > ARMV7M_EXCP_RESET && irq < s->num_irq);
+
+    vec = &s->vectors[irq];
+    trace_nvic_clear_pending(irq, vec->enabled, vec->prio);
+    if (vec->pending) {
+        vec->pending = 0;
+        nvic_irq_update(s);
+    }
 }
 
-/* The external routines use the hardware vector numbering, ie. the first
-   IRQ is #16.  The internal GIC routines use #32 as the first IRQ.  */
 void armv7m_nvic_set_pending(void *opaque, int irq)
 {
-    nvic_state *s = (nvic_state *)opaque;
-    if (irq >= 16)
-        irq += 16;
-    gic_set_pending_private(&s->gic, 0, irq);
+    NVICState *s = (NVICState *)opaque;
+    VecInfo *vec;
+
+    assert(irq > ARMV7M_EXCP_RESET && irq < s->num_irq);
+
+    vec = &s->vectors[irq];
+    trace_nvic_set_pending(irq, vec->enabled, vec->prio);
+
+
+    if (irq >= ARMV7M_EXCP_HARD && irq < ARMV7M_EXCP_PENDSV) {
+        /* If a synchronous exception is pending then it may be
+         * escalated to HardFault if:
+         *  * it is equal or lower priority to current execution
+         *  * it is disabled
+         * (ie we need to take it immediately but we can't do so).
+         * Asynchronous exceptions (and interrupts) simply remain pending.
+         *
+         * For QEMU, we don't have any imprecise (asynchronous) faults,
+         * so we can assume that PREFETCH_ABORT and DATA_ABORT are always
+         * synchronous.
+         * Debug exceptions are awkward because only Debug exceptions
+         * resulting from the BKPT instruction should be escalated,
+         * but we don't currently implement any Debug exceptions other
+         * than those that result from BKPT, so we treat all debug exceptions
+         * as needing escalation.
+         *
+         * This all means we can identify whether to escalate based only on
+         * the exception number and don't (yet) need the caller to explicitly
+         * tell us whether this exception is synchronous or not.
+         */
+        int running = nvic_exec_prio(s);
+        bool escalate = false;
+
+        if (vec->prio >= running) {
+            trace_nvic_escalate_prio(irq, vec->prio, running);
+            escalate = true;
+        } else if (!vec->enabled) {
+            trace_nvic_escalate_disabled(irq);
+            escalate = true;
+        }
+
+        if (escalate) {
+            if (running < 0) {
+                /* We want to escalate to HardFault but we can't take a
+                 * synchronous HardFault at this point either. This is a
+                 * Lockup condition due to a guest bug. We don't model
+                 * Lockup, so report via cpu_abort() instead.
+                 */
+                cpu_abort(&s->cpu->parent_obj,
+                          "Lockup: can't escalate %d to HardFault "
+                          "(current priority %d)\n", irq, running);
+            }
+
+            /* We can do the escalation, so we take HardFault instead */
+            irq = ARMV7M_EXCP_HARD;
+            vec = &s->vectors[irq];
+            s->cpu->env.v7m.hfsr |= R_V7M_HFSR_FORCED_MASK;
+        }
+    }
+
+    if (!vec->pending) {
+        vec->pending = 1;
+        nvic_irq_update(s);
+    }
 }
 
 /* Make pending IRQ active.  */
-int armv7m_nvic_acknowledge_irq(void *opaque)
+void armv7m_nvic_acknowledge_irq(void *opaque)
 {
-    nvic_state *s = (nvic_state *)opaque;
-    uint32_t irq;
-
-    irq = gic_acknowledge_irq(&s->gic, 0, MEMTXATTRS_UNSPECIFIED);
-    if (irq == 1023)
-        hw_error("Interrupt but no vector\n");
-    if (irq >= 32)
-        irq -= 16;
-    return irq;
+    NVICState *s = (NVICState *)opaque;
+    CPUARMState *env = &s->cpu->env;
+    const int pending = s->vectpending;
+    const int running = nvic_exec_prio(s);
+    int pendgroupprio;
+    VecInfo *vec;
+
+    assert(pending > ARMV7M_EXCP_RESET && pending < s->num_irq);
+
+    vec = &s->vectors[pending];
+
+    assert(vec->enabled);
+    assert(vec->pending);
+
+    pendgroupprio = vec->prio & nvic_gprio_mask(s);
+    assert(pendgroupprio < running);
+
+    trace_nvic_acknowledge_irq(pending, vec->prio);
+
+    vec->active = 1;
+    vec->pending = 0;
+
+    env->v7m.exception = s->vectpending;
+
+    nvic_irq_update(s);
 }
 
-void armv7m_nvic_complete_irq(void *opaque, int irq)
+int armv7m_nvic_complete_irq(void *opaque, int irq)
 {
-    nvic_state *s = (nvic_state *)opaque;
-    if (irq >= 16)
-        irq += 16;
-    gic_complete_irq(&s->gic, 0, irq, MEMTXATTRS_UNSPECIFIED);
+    NVICState *s = (NVICState *)opaque;
+    VecInfo *vec;
+    int ret;
+
+    assert(irq > ARMV7M_EXCP_RESET && irq < s->num_irq);
+
+    vec = &s->vectors[irq];
+
+    trace_nvic_complete_irq(irq);
+
+    if (!vec->active) {
+        /* Tell the caller this was an illegal exception return */
+        return -1;
+    }
+
+    ret = nvic_rettobase(s);
+
+    vec->active = 0;
+    if (vec->level) {
+        /* Re-pend the exception if it's still held high; only
+         * happens for extenal IRQs
+         */
+        assert(irq >= NVIC_FIRST_IRQ);
+        vec->pending = 1;
+    }
+
+    nvic_irq_update(s);
+
+    return ret;
+}
+
+/* callback when external interrupt line is changed */
+static void set_irq_level(void *opaque, int n, int level)
+{
+    NVICState *s = opaque;
+    VecInfo *vec;
+
+    n += NVIC_FIRST_IRQ;
+
+    assert(n >= NVIC_FIRST_IRQ && n < s->num_irq);
+
+    trace_nvic_set_irq_level(n, level);
+
+    /* The pending status of an external interrupt is
+     * latched on rising edge and exception handler return.
+     *
+     * Pulsing the IRQ will always run the handler
+     * once, and the handler will re-run until the
+     * level is low when the handler completes.
+     */
+    vec = &s->vectors[n];
+    if (level != vec->level) {
+        vec->level = level;
+        if (level) {
+            armv7m_nvic_set_pending(s, n);
+        }
+    }
 }
 
-static uint32_t nvic_readl(nvic_state *s, uint32_t offset)
+static uint32_t nvic_readl(NVICState *s, uint32_t offset)
 {
     ARMCPU *cpu = s->cpu;
     uint32_t val;
-    int irq;
 
     switch (offset) {
     case 4: /* Interrupt Control Type.  */
-        return (s->num_irq / 32) - 1;
-    case 0x10: /* SysTick Control and Status.  */
-        val = s->systick.control;
-        s->systick.control &= ~SYSTICK_COUNTFLAG;
-        return val;
-    case 0x14: /* SysTick Reload Value.  */
-        return s->systick.reload;
-    case 0x18: /* SysTick Current Value.  */
-        {
-            int64_t t;
-            if ((s->systick.control & SYSTICK_ENABLE) == 0)
-                return 0;
-            t = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-            if (t >= s->systick.tick)
-                return 0;
-            val = ((s->systick.tick - (t + 1)) / systick_scale(s)) + 1;
-            /* The interrupt in triggered when the timer reaches zero.
-               However the counter is not reloaded until the next clock
-               tick.  This is a hack to return zero during the first tick.  */
-            if (val > s->systick.reload)
-                val = 0;
-            return val;
-        }
-    case 0x1c: /* SysTick Calibration Value.  */
-        return 10000;
+        return ((s->num_irq - NVIC_FIRST_IRQ) / 32) - 1;
     case 0xd00: /* CPUID Base.  */
         return cpu->midr;
     case 0xd04: /* Interrupt Control State.  */
         /* VECTACTIVE */
         val = cpu->env.v7m.exception;
-        if (val == 1023) {
-            val = 0;
-        } else if (val >= 32) {
-            val -= 16;
-        }
         /* VECTPENDING */
-        if (s->gic.current_pending[0] != 1023)
-            val |= (s->gic.current_pending[0] << 12);
-        /* ISRPENDING and RETTOBASE */
-        for (irq = 32; irq < s->num_irq; irq++) {
-            if (s->gic.irq_state[irq].pending) {
-                val |= (1 << 22);
-                break;
-            }
-            if (irq != cpu->env.v7m.exception && s->gic.irq_state[irq].active) {
-                val |= (1 << 11);
-            }
+        val |= (s->vectpending & 0xff) << 12;
+        /* ISRPENDING - set if any external IRQ is pending */
+        if (nvic_isrpending(s)) {
+            val |= (1 << 22);
+        }
+        /* RETTOBASE - set if only one handler is active */
+        if (nvic_rettobase(s)) {
+            val |= (1 << 11);
         }
         /* PENDSTSET */
-        if (s->gic.irq_state[ARMV7M_EXCP_SYSTICK].pending)
+        if (s->vectors[ARMV7M_EXCP_SYSTICK].pending) {
             val |= (1 << 26);
+        }
         /* PENDSVSET */
-        if (s->gic.irq_state[ARMV7M_EXCP_PENDSV].pending)
+        if (s->vectors[ARMV7M_EXCP_PENDSV].pending) {
             val |= (1 << 28);
+        }
         /* NMIPENDSET */
-        if (s->gic.irq_state[ARMV7M_EXCP_NMI].pending)
+        if (s->vectors[ARMV7M_EXCP_NMI].pending) {
             val |= (1 << 31);
+        }
+        /* ISRPREEMPT not implemented */
         return val;
     case 0xd08: /* Vector Table Offset.  */
         return cpu->env.v7m.vecbase;
     case 0xd0c: /* Application Interrupt/Reset Control.  */
-        return 0xfa050000;
+        return 0xfa050000 | (s->prigroup << 8);
     case 0xd10: /* System Control.  */
         /* TODO: Implement SLEEPONEXIT.  */
         return 0;
@@ -231,20 +443,48 @@ static uint32_t nvic_readl(nvic_state *s, uint32_t offset)
         return cpu->env.v7m.ccr;
     case 0xd24: /* System Handler Status.  */
         val = 0;
-        if (s->gic.irq_state[ARMV7M_EXCP_MEM].active) val |= (1 << 0);
-        if (s->gic.irq_state[ARMV7M_EXCP_BUS].active) val |= (1 << 1);
-        if (s->gic.irq_state[ARMV7M_EXCP_USAGE].active) val |= (1 << 3);
-        if (s->gic.irq_state[ARMV7M_EXCP_SVC].active) val |= (1 << 7);
-        if (s->gic.irq_state[ARMV7M_EXCP_DEBUG].active) val |= (1 << 8);
-        if (s->gic.irq_state[ARMV7M_EXCP_PENDSV].active) val |= (1 << 10);
-        if (s->gic.irq_state[ARMV7M_EXCP_SYSTICK].active) val |= (1 << 11);
-        if (s->gic.irq_state[ARMV7M_EXCP_USAGE].pending) val |= (1 << 12);
-        if (s->gic.irq_state[ARMV7M_EXCP_MEM].pending) val |= (1 << 13);
-        if (s->gic.irq_state[ARMV7M_EXCP_BUS].pending) val |= (1 << 14);
-        if (s->gic.irq_state[ARMV7M_EXCP_SVC].pending) val |= (1 << 15);
-        if (s->gic.irq_state[ARMV7M_EXCP_MEM].enabled) val |= (1 << 16);
-        if (s->gic.irq_state[ARMV7M_EXCP_BUS].enabled) val |= (1 << 17);
-        if (s->gic.irq_state[ARMV7M_EXCP_USAGE].enabled) val |= (1 << 18);
+        if (s->vectors[ARMV7M_EXCP_MEM].active) {
+            val |= (1 << 0);
+        }
+        if (s->vectors[ARMV7M_EXCP_BUS].active) {
+            val |= (1 << 1);
+        }
+        if (s->vectors[ARMV7M_EXCP_USAGE].active) {
+            val |= (1 << 3);
+        }
+        if (s->vectors[ARMV7M_EXCP_SVC].active) {
+            val |= (1 << 7);
+        }
+        if (s->vectors[ARMV7M_EXCP_DEBUG].active) {
+            val |= (1 << 8);
+        }
+        if (s->vectors[ARMV7M_EXCP_PENDSV].active) {
+            val |= (1 << 10);
+        }
+        if (s->vectors[ARMV7M_EXCP_SYSTICK].active) {
+            val |= (1 << 11);
+        }
+        if (s->vectors[ARMV7M_EXCP_USAGE].pending) {
+            val |= (1 << 12);
+        }
+        if (s->vectors[ARMV7M_EXCP_MEM].pending) {
+            val |= (1 << 13);
+        }
+        if (s->vectors[ARMV7M_EXCP_BUS].pending) {
+            val |= (1 << 14);
+        }
+        if (s->vectors[ARMV7M_EXCP_SVC].pending) {
+            val |= (1 << 15);
+        }
+        if (s->vectors[ARMV7M_EXCP_MEM].enabled) {
+            val |= (1 << 16);
+        }
+        if (s->vectors[ARMV7M_EXCP_BUS].enabled) {
+            val |= (1 << 17);
+        }
+        if (s->vectors[ARMV7M_EXCP_USAGE].enabled) {
+            val |= (1 << 18);
+        }
         return val;
     case 0xd28: /* Configurable Fault Status.  */
         return cpu->env.v7m.cfsr;
@@ -294,43 +534,11 @@ static uint32_t nvic_readl(nvic_state *s, uint32_t offset)
     }
 }
 
-static void nvic_writel(nvic_state *s, uint32_t offset, uint32_t value)
+static void nvic_writel(NVICState *s, uint32_t offset, uint32_t value)
 {
     ARMCPU *cpu = s->cpu;
-    uint32_t oldval;
+
     switch (offset) {
-    case 0x10: /* SysTick Control and Status.  */
-        oldval = s->systick.control;
-        s->systick.control &= 0xfffffff8;
-        s->systick.control |= value & 7;
-        if ((oldval ^ value) & SYSTICK_ENABLE) {
-            int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-            if (value & SYSTICK_ENABLE) {
-                if (s->systick.tick) {
-                    s->systick.tick += now;
-                    timer_mod(s->systick.timer, s->systick.tick);
-                } else {
-                    systick_reload(s, 1);
-                }
-            } else {
-                timer_del(s->systick.timer);
-                s->systick.tick -= now;
-                if (s->systick.tick < 0)
-                  s->systick.tick = 0;
-            }
-        } else if ((oldval ^ value) & SYSTICK_CLKSOURCE) {
-            /* This is a hack. Force the timer to be reloaded
-               when the reference clock is changed.  */
-            systick_reload(s, 1);
-        }
-        break;
-    case 0x14: /* SysTick Reload Value.  */
-        s->systick.reload = value;
-        break;
-    case 0x18: /* SysTick Current Value.  Writes reload the timer.  */
-        systick_reload(s, 1);
-        s->systick.control &= ~SYSTICK_COUNTFLAG;
-        break;
     case 0xd04: /* Interrupt Control State.  */
         if (value & (1 << 31)) {
             armv7m_nvic_set_pending(s, ARMV7M_EXCP_NMI);
@@ -338,14 +546,12 @@ static void nvic_writel(nvic_state *s, uint32_t offset, uint32_t value)
         if (value & (1 << 28)) {
             armv7m_nvic_set_pending(s, ARMV7M_EXCP_PENDSV);
         } else if (value & (1 << 27)) {
-            s->gic.irq_state[ARMV7M_EXCP_PENDSV].pending = 0;
-            gic_update(&s->gic);
+            armv7m_nvic_clear_pending(s, ARMV7M_EXCP_PENDSV);
         }
         if (value & (1 << 26)) {
             armv7m_nvic_set_pending(s, ARMV7M_EXCP_SYSTICK);
         } else if (value & (1 << 25)) {
-            s->gic.irq_state[ARMV7M_EXCP_SYSTICK].pending = 0;
-            gic_update(&s->gic);
+            armv7m_nvic_clear_pending(s, ARMV7M_EXCP_SYSTICK);
         }
         break;
     case 0xd08: /* Vector Table Offset.  */
@@ -357,14 +563,17 @@ static void nvic_writel(nvic_state *s, uint32_t offset, uint32_t value)
                 qemu_irq_pulse(s->sysresetreq);
             }
             if (value & 2) {
-                qemu_log_mask(LOG_UNIMP, "VECTCLRACTIVE unimplemented\n");
+                qemu_log_mask(LOG_GUEST_ERROR,
+                              "Setting VECTCLRACTIVE when not in DEBUG mode "
+                              "is UNPREDICTABLE\n");
             }
             if (value & 1) {
-                qemu_log_mask(LOG_UNIMP, "AIRCR system reset unimplemented\n");
-            }
-            if (value & 0x700) {
-                qemu_log_mask(LOG_UNIMP, "PRIGROUP unimplemented\n");
+                qemu_log_mask(LOG_GUEST_ERROR,
+                              "Setting VECTRESET when not in DEBUG mode "
+                              "is UNPREDICTABLE\n");
             }
+            s->prigroup = extract32(value, 8, 3);
+            nvic_irq_update(s);
         }
         break;
     case 0xd10: /* System Control.  */
@@ -383,11 +592,21 @@ static void nvic_writel(nvic_state *s, uint32_t offset, uint32_t value)
         cpu->env.v7m.ccr = value;
         break;
     case 0xd24: /* System Handler Control.  */
-        /* TODO: Real hardware allows you to set/clear the active bits
-           under some circumstances.  We don't implement this.  */
-        s->gic.irq_state[ARMV7M_EXCP_MEM].enabled = (value & (1 << 16)) != 0;
-        s->gic.irq_state[ARMV7M_EXCP_BUS].enabled = (value & (1 << 17)) != 0;
-        s->gic.irq_state[ARMV7M_EXCP_USAGE].enabled = (value & (1 << 18)) != 0;
+        s->vectors[ARMV7M_EXCP_MEM].active = (value & (1 << 0)) != 0;
+        s->vectors[ARMV7M_EXCP_BUS].active = (value & (1 << 1)) != 0;
+        s->vectors[ARMV7M_EXCP_USAGE].active = (value & (1 << 3)) != 0;
+        s->vectors[ARMV7M_EXCP_SVC].active = (value & (1 << 7)) != 0;
+        s->vectors[ARMV7M_EXCP_DEBUG].active = (value & (1 << 8)) != 0;
+        s->vectors[ARMV7M_EXCP_PENDSV].active = (value & (1 << 10)) != 0;
+        s->vectors[ARMV7M_EXCP_SYSTICK].active = (value & (1 << 11)) != 0;
+        s->vectors[ARMV7M_EXCP_USAGE].pending = (value & (1 << 12)) != 0;
+        s->vectors[ARMV7M_EXCP_MEM].pending = (value & (1 << 13)) != 0;
+        s->vectors[ARMV7M_EXCP_BUS].pending = (value & (1 << 14)) != 0;
+        s->vectors[ARMV7M_EXCP_SVC].pending = (value & (1 << 15)) != 0;
+        s->vectors[ARMV7M_EXCP_MEM].enabled = (value & (1 << 16)) != 0;
+        s->vectors[ARMV7M_EXCP_BUS].enabled = (value & (1 << 17)) != 0;
+        s->vectors[ARMV7M_EXCP_USAGE].enabled = (value & (1 << 18)) != 0;
+        nvic_irq_update(s);
         break;
     case 0xd28: /* Configurable Fault Status.  */
         cpu->env.v7m.cfsr &= ~value; /* W1C */
@@ -409,13 +628,16 @@ static void nvic_writel(nvic_state *s, uint32_t offset, uint32_t value)
                       "NVIC: Aux fault status registers unimplemented\n");
         break;
     case 0xf00: /* Software Triggered Interrupt Register */
+    {
         /* user mode can only write to STIR if CCR.USERSETMPEND permits it */
-        if ((value & 0x1ff) < s->num_irq &&
+        int excnum = (value & 0x1ff) + NVIC_FIRST_IRQ;
+        if (excnum < s->num_irq &&
             (arm_current_el(&cpu->env) ||
              (cpu->env.v7m.ccr & R_V7M_CCR_USERSETMPEND_MASK))) {
-            gic_set_pending_private(&s->gic, 0, value & 0x1ff);
+            armv7m_nvic_set_pending(s, excnum);
         }
         break;
+    }
     default:
         qemu_log_mask(LOG_GUEST_ERROR,
                       "NVIC: Bad write offset 0x%x\n", offset);
@@ -425,46 +647,142 @@ static void nvic_writel(nvic_state *s, uint32_t offset, uint32_t value)
 static uint64_t nvic_sysreg_read(void *opaque, hwaddr addr,
                                  unsigned size)
 {
-    nvic_state *s = (nvic_state *)opaque;
+    NVICState *s = (NVICState *)opaque;
     uint32_t offset = addr;
-    int i;
+    unsigned i, startvec, end;
     uint32_t val;
 
     switch (offset) {
+    /* reads of set and clear both return the status */
+    case 0x100 ... 0x13f: /* NVIC Set enable */
+        offset += 0x80;
+        /* fall through */
+    case 0x180 ... 0x1bf: /* NVIC Clear enable */
+        val = 0;
+        startvec = offset - 0x180 + NVIC_FIRST_IRQ; /* vector # */
+
+        for (i = 0, end = size * 8; i < end && startvec + i < s->num_irq; i++) {
+            if (s->vectors[startvec + i].enabled) {
+                val |= (1 << i);
+            }
+        }
+        break;
+    case 0x200 ... 0x23f: /* NVIC Set pend */
+        offset += 0x80;
+        /* fall through */
+    case 0x280 ... 0x2bf: /* NVIC Clear pend */
+        val = 0;
+        startvec = offset - 0x280 + NVIC_FIRST_IRQ; /* vector # */
+        for (i = 0, end = size * 8; i < end && startvec + i < s->num_irq; i++) {
+            if (s->vectors[startvec + i].pending) {
+                val |= (1 << i);
+            }
+        }
+        break;
+    case 0x300 ... 0x33f: /* NVIC Active */
+        val = 0;
+        startvec = offset - 0x300 + NVIC_FIRST_IRQ; /* vector # */
+
+        for (i = 0, end = size * 8; i < end && startvec + i < s->num_irq; i++) {
+            if (s->vectors[startvec + i].active) {
+                val |= (1 << i);
+            }
+        }
+        break;
+    case 0x400 ... 0x5ef: /* NVIC Priority */
+        val = 0;
+        startvec = offset - 0x400 + NVIC_FIRST_IRQ; /* vector # */
+
+        for (i = 0; i < size && startvec + i < s->num_irq; i++) {
+            val |= s->vectors[startvec + i].prio << (8 * i);
+        }
+        break;
     case 0xd18 ... 0xd23: /* System Handler Priority.  */
         val = 0;
         for (i = 0; i < size; i++) {
-            val |= s->gic.priority1[(offset - 0xd14) + i][0] << (i * 8);
+            val |= s->vectors[(offset - 0xd14) + i].prio << (i * 8);
         }
-        return val;
+        break;
     case 0xfe0 ... 0xfff: /* ID.  */
         if (offset & 3) {
-            return 0;
+            val = 0;
+        } else {
+            val = nvic_id[(offset - 0xfe0) >> 2];
+        }
+        break;
+    default:
+        if (size == 4) {
+            val = nvic_readl(s, offset);
+        } else {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "NVIC: Bad read of size %d at offset 0x%x\n",
+                          size, offset);
+            val = 0;
         }
-        return nvic_id[(offset - 0xfe0) >> 2];
-    }
-    if (size == 4) {
-        return nvic_readl(s, offset);
     }
-    qemu_log_mask(LOG_GUEST_ERROR,
-                  "NVIC: Bad read of size %d at offset 0x%x\n", size, offset);
-    return 0;
+
+    trace_nvic_sysreg_read(addr, val, size);
+    return val;
 }
 
 static void nvic_sysreg_write(void *opaque, hwaddr addr,
                               uint64_t value, unsigned size)
 {
-    nvic_state *s = (nvic_state *)opaque;
+    NVICState *s = (NVICState *)opaque;
     uint32_t offset = addr;
-    int i;
+    unsigned i, startvec, end;
+    unsigned setval = 0;
+
+    trace_nvic_sysreg_write(addr, value, size);
 
     switch (offset) {
+    case 0x100 ... 0x13f: /* NVIC Set enable */
+        offset += 0x80;
+        setval = 1;
+        /* fall through */
+    case 0x180 ... 0x1bf: /* NVIC Clear enable */
+        startvec = 8 * (offset - 0x180) + NVIC_FIRST_IRQ;
+
+        for (i = 0, end = size * 8; i < end && startvec + i < s->num_irq; i++) {
+            if (value & (1 << i)) {
+                s->vectors[startvec + i].enabled = setval;
+            }
+        }
+        nvic_irq_update(s);
+        return;
+    case 0x200 ... 0x23f: /* NVIC Set pend */
+        /* the special logic in armv7m_nvic_set_pending()
+         * is not needed since IRQs are never escalated
+         */
+        offset += 0x80;
+        setval = 1;
+        /* fall through */
+    case 0x280 ... 0x2bf: /* NVIC Clear pend */
+        startvec = 8 * (offset - 0x280) + NVIC_FIRST_IRQ; /* vector # */
+
+        for (i = 0, end = size * 8; i < end && startvec + i < s->num_irq; i++) {
+            if (value & (1 << i)) {
+                s->vectors[startvec + i].pending = setval;
+            }
+        }
+        nvic_irq_update(s);
+        return;
+    case 0x300 ... 0x33f: /* NVIC Active */
+        return; /* R/O */
+    case 0x400 ... 0x5ef: /* NVIC Priority */
+        startvec = 8 * (offset - 0x400) + NVIC_FIRST_IRQ; /* vector # */
+
+        for (i = 0; i < size && startvec + i < s->num_irq; i++) {
+            set_prio(s, startvec + i, (value >> (i * 8)) & 0xff);
+        }
+        nvic_irq_update(s);
+        return;
     case 0xd18 ... 0xd23: /* System Handler Priority.  */
         for (i = 0; i < size; i++) {
-            s->gic.priority1[(offset - 0xd14) + i][0] =
-                (value >> (i * 8)) & 0xff;
+            unsigned hdlidx = (offset - 0xd14) + i;
+            set_prio(s, hdlidx, (value >> (i * 8)) & 0xff);
         }
-        gic_update(&s->gic);
+        nvic_irq_update(s);
         return;
     }
     if (size == 4) {
@@ -481,61 +799,143 @@ static const MemoryRegionOps nvic_sysreg_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static const VMStateDescription vmstate_nvic = {
-    .name = "armv7m_nvic",
+static int nvic_post_load(void *opaque, int version_id)
+{
+    NVICState *s = opaque;
+    unsigned i;
+
+    /* Check for out of range priority settings */
+    if (s->vectors[ARMV7M_EXCP_RESET].prio != -3 ||
+        s->vectors[ARMV7M_EXCP_NMI].prio != -2 ||
+        s->vectors[ARMV7M_EXCP_HARD].prio != -1) {
+        return 1;
+    }
+    for (i = ARMV7M_EXCP_MEM; i < s->num_irq; i++) {
+        if (s->vectors[i].prio & ~0xff) {
+            return 1;
+        }
+    }
+
+    nvic_recompute_state(s);
+
+    return 0;
+}
+
+static const VMStateDescription vmstate_VecInfo = {
+    .name = "armv7m_nvic_info",
     .version_id = 1,
     .minimum_version_id = 1,
     .fields = (VMStateField[]) {
-        VMSTATE_UINT32(systick.control, nvic_state),
-        VMSTATE_UINT32(systick.reload, nvic_state),
-        VMSTATE_INT64(systick.tick, nvic_state),
-        VMSTATE_TIMER_PTR(systick.timer, nvic_state),
+        VMSTATE_INT16(prio, VecInfo),
+        VMSTATE_UINT8(enabled, VecInfo),
+        VMSTATE_UINT8(pending, VecInfo),
+        VMSTATE_UINT8(active, VecInfo),
+        VMSTATE_UINT8(level, VecInfo),
         VMSTATE_END_OF_LIST()
     }
 };
 
+static const VMStateDescription vmstate_nvic = {
+    .name = "armv7m_nvic",
+    .version_id = 4,
+    .minimum_version_id = 4,
+    .post_load = &nvic_post_load,
+    .fields = (VMStateField[]) {
+        VMSTATE_STRUCT_ARRAY(vectors, NVICState, NVIC_MAX_VECTORS, 1,
+                             vmstate_VecInfo, VecInfo),
+        VMSTATE_UINT32(prigroup, NVICState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static Property props_nvic[] = {
+    /* Number of external IRQ lines (so excluding the 16 internal exceptions) */
+    DEFINE_PROP_UINT32("num-irq", NVICState, num_irq, 64),
+    DEFINE_PROP_END_OF_LIST()
+};
+
 static void armv7m_nvic_reset(DeviceState *dev)
 {
-    nvic_state *s = NVIC(dev);
-    NVICClass *nc = NVIC_GET_CLASS(s);
-    nc->parent_reset(dev);
-    /* Common GIC reset resets to disabled; the NVIC doesn't have
-     * per-CPU interfaces so mark our non-existent CPU interface
-     * as enabled by default, and with a priority mask which allows
-     * all interrupts through.
+    NVICState *s = NVIC(dev);
+
+    s->vectors[ARMV7M_EXCP_NMI].enabled = 1;
+    s->vectors[ARMV7M_EXCP_HARD].enabled = 1;
+    /* MEM, BUS, and USAGE are enabled through
+     * the System Handler Control register
+     */
+    s->vectors[ARMV7M_EXCP_SVC].enabled = 1;
+    s->vectors[ARMV7M_EXCP_DEBUG].enabled = 1;
+    s->vectors[ARMV7M_EXCP_PENDSV].enabled = 1;
+    s->vectors[ARMV7M_EXCP_SYSTICK].enabled = 1;
+
+    s->vectors[ARMV7M_EXCP_RESET].prio = -3;
+    s->vectors[ARMV7M_EXCP_NMI].prio = -2;
+    s->vectors[ARMV7M_EXCP_HARD].prio = -1;
+
+    /* Strictly speaking the reset handler should be enabled.
+     * However, we don't simulate soft resets through the NVIC,
+     * and the reset vector should never be pended.
+     * So we leave it disabled to catch logic errors.
      */
-    s->gic.cpu_ctlr[0] = GICC_CTLR_EN_GRP0;
-    s->gic.priority_mask[0] = 0x100;
-    /* The NVIC as a whole is always enabled. */
-    s->gic.ctlr = 1;
-    systick_reset(s);
+
+    s->exception_prio = NVIC_NOEXC_PRIO;
+    s->vectpending = 0;
+}
+
+static void nvic_systick_trigger(void *opaque, int n, int level)
+{
+    NVICState *s = opaque;
+
+    if (level) {
+        /* SysTick just asked us to pend its exception.
+         * (This is different from an external interrupt line's
+         * behaviour.)
+         */
+        armv7m_nvic_set_pending(s, ARMV7M_EXCP_SYSTICK);
+    }
 }
 
 static void armv7m_nvic_realize(DeviceState *dev, Error **errp)
 {
-    nvic_state *s = NVIC(dev);
-    NVICClass *nc = NVIC_GET_CLASS(s);
-    Error *local_err = NULL;
+    NVICState *s = NVIC(dev);
+    SysBusDevice *systick_sbd;
+    Error *err = NULL;
 
     s->cpu = ARM_CPU(qemu_get_cpu(0));
     assert(s->cpu);
-    /* The NVIC always has only one CPU */
-    s->gic.num_cpu = 1;
-    /* Tell the common code we're an NVIC */
-    s->gic.revision = 0xffffffff;
-    s->num_irq = s->gic.num_irq;
-    nc->parent_realize(dev, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
+
+    if (s->num_irq > NVIC_MAX_IRQ) {
+        error_setg(errp, "num-irq %d exceeds NVIC maximum", s->num_irq);
+        return;
+    }
+
+    qdev_init_gpio_in(dev, set_irq_level, s->num_irq);
+
+    /* include space for internal exception vectors */
+    s->num_irq += NVIC_FIRST_IRQ;
+
+    object_property_set_bool(OBJECT(&s->systick), true, "realized", &err);
+    if (err != NULL) {
+        error_propagate(errp, err);
         return;
     }
-    gic_init_irqs_and_distributor(&s->gic);
-    /* The NVIC and system controller register area looks like this:
-     *  0..0xff : system control registers, including systick
-     *  0x100..0xcff : GIC-like registers
-     *  0xd00..0xfff : system control registers
-     * We use overlaying to put the GIC like registers
-     * over the top of the system control register region.
+    systick_sbd = SYS_BUS_DEVICE(&s->systick);
+    sysbus_connect_irq(systick_sbd, 0,
+                       qdev_get_gpio_in_named(dev, "systick-trigger", 0));
+
+    /* The NVIC and System Control Space (SCS) starts at 0xe000e000
+     * and looks like this:
+     *  0x004 - ICTR
+     *  0x010 - 0xff - systick
+     *  0x100..0x7ec - NVIC
+     *  0x7f0..0xcff - Reserved
+     *  0xd00..0xd3c - SCS registers
+     *  0xd40..0xeff - Reserved or Not implemented
+     *  0xf00 - STIR
+     *
+     * At the moment there is only one thing in the container region,
+     * but we leave it in place to allow us to pull systick out into
+     * its own device object later.
      */
     memory_region_init(&s->container, OBJECT(s), "nvic", 0x1000);
     /* The system register region goes at the bottom of the priority
@@ -544,19 +944,11 @@ static void armv7m_nvic_realize(DeviceState *dev, Error **errp)
     memory_region_init_io(&s->sysregmem, OBJECT(s), &nvic_sysreg_ops, s,
                           "nvic_sysregs", 0x1000);
     memory_region_add_subregion(&s->container, 0, &s->sysregmem);
-    /* Alias the GIC region so we can get only the section of it
-     * we need, and layer it on top of the system register region.
-     */
-    memory_region_init_alias(&s->gic_iomem_alias, OBJECT(s),
-                             "nvic-gic", &s->gic.iomem,
-                             0x100, 0xc00);
-    memory_region_add_subregion_overlap(&s->container, 0x100,
-                                        &s->gic_iomem_alias, 1);
-    /* Map the whole thing into system memory at the location required
-     * by the v7M architecture.
-     */
-    memory_region_add_subregion(get_system_memory(), 0xe000e000, &s->container);
-    s->systick.timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, systick_timer_tick, s);
+    memory_region_add_subregion_overlap(&s->container, 0x10,
+                                        sysbus_mmio_get_region(systick_sbd, 0),
+                                        1);
+
+    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->container);
 }
 
 static void armv7m_nvic_instance_init(Object *obj)
@@ -567,36 +959,35 @@ static void armv7m_nvic_instance_init(Object *obj)
      * any user-specified property setting, so just modify the
      * value in the GICState struct.
      */
-    GICState *s = ARM_GIC_COMMON(obj);
     DeviceState *dev = DEVICE(obj);
-    nvic_state *nvic = NVIC(obj);
-    /* The ARM v7m may have anything from 0 to 496 external interrupt
-     * IRQ lines. We default to 64. Other boards may differ and should
-     * set the num-irq property appropriately.
-     */
-    s->num_irq = 64;
+    NVICState *nvic = NVIC(obj);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+
+    object_initialize(&nvic->systick, sizeof(nvic->systick), TYPE_SYSTICK);
+    qdev_set_parent_bus(DEVICE(&nvic->systick), sysbus_get_default());
+
+    sysbus_init_irq(sbd, &nvic->excpout);
     qdev_init_gpio_out_named(dev, &nvic->sysresetreq, "SYSRESETREQ", 1);
+    qdev_init_gpio_in_named(dev, nvic_systick_trigger, "systick-trigger", 1);
 }
 
 static void armv7m_nvic_class_init(ObjectClass *klass, void *data)
 {
-    NVICClass *nc = NVIC_CLASS(klass);
     DeviceClass *dc = DEVICE_CLASS(klass);
 
-    nc->parent_reset = dc->reset;
-    nc->parent_realize = dc->realize;
     dc->vmsd  = &vmstate_nvic;
+    dc->props = props_nvic;
     dc->reset = armv7m_nvic_reset;
     dc->realize = armv7m_nvic_realize;
 }
 
 static const TypeInfo armv7m_nvic_info = {
     .name          = TYPE_NVIC,
-    .parent        = TYPE_ARM_GIC_COMMON,
+    .parent        = TYPE_SYS_BUS_DEVICE,
     .instance_init = armv7m_nvic_instance_init,
-    .instance_size = sizeof(nvic_state),
+    .instance_size = sizeof(NVICState),
     .class_init    = armv7m_nvic_class_init,
-    .class_size    = sizeof(NVICClass),
+    .class_size    = sizeof(SysBusDeviceClass),
 };
 
 static void armv7m_nvic_register_types(void)
diff --git a/hw/intc/gic_internal.h b/hw/intc/gic_internal.h
index 3f311740da..7fe87b13de 100644
--- a/hw/intc/gic_internal.h
+++ b/hw/intc/gic_internal.h
@@ -25,9 +25,7 @@
 
 #define ALL_CPU_MASK ((unsigned)(((1 << GIC_NCPU) - 1)))
 
-/* The NVIC has 16 internal vectors.  However these are not exposed
-   through the normal GIC interface.  */
-#define GIC_BASE_IRQ ((s->revision == REV_NVIC) ? 32 : 0)
+#define GIC_BASE_IRQ 0
 
 #define GIC_SET_ENABLED(irq, cm) s->irq_state[irq].enabled |= (cm)
 #define GIC_CLEAR_ENABLED(irq, cm) s->irq_state[irq].enabled &= ~(cm)
@@ -75,7 +73,6 @@
 
 /* The special cases for the revision property: */
 #define REV_11MPCORE 0
-#define REV_NVIC 0xffffffff
 
 void gic_set_pending_private(GICState *s, int cpu, int irq);
 uint32_t gic_acknowledge_irq(GICState *s, int cpu, MemTxAttrs attrs);
@@ -87,7 +84,7 @@ void gic_set_priority(GICState *s, int cpu, int irq, uint8_t val,
 
 static inline bool gic_test_pending(GICState *s, int irq, int cm)
 {
-    if (s->revision == REV_NVIC || s->revision == REV_11MPCORE) {
+    if (s->revision == REV_11MPCORE) {
         return s->irq_state[irq].pending & cm;
     } else {
         /* Edge-triggered interrupts are marked pending on a rising edge, but
diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h
index aeb801d133..05303a55c8 100644
--- a/hw/intc/gicv3_internal.h
+++ b/hw/intc/gicv3_internal.h
@@ -138,6 +138,7 @@
 #define ICC_CTLR_EL1_EOIMODE        (1U << 1)
 #define ICC_CTLR_EL1_PMHE           (1U << 6)
 #define ICC_CTLR_EL1_PRIBITS_SHIFT 8
+#define ICC_CTLR_EL1_PRIBITS_MASK   (7U << ICC_CTLR_EL1_PRIBITS_SHIFT)
 #define ICC_CTLR_EL1_IDBITS_SHIFT 11
 #define ICC_CTLR_EL1_SEIS           (1U << 14)
 #define ICC_CTLR_EL1_A3V            (1U << 15)
@@ -407,4 +408,6 @@ static inline void gicv3_cache_all_target_cpustates(GICv3State *s)
     }
 }
 
+void gicv3_set_gicv3state(CPUState *cpu, GICv3CPUState *s);
+
 #endif /* QEMU_ARM_GICV3_INTERNAL_H */
diff --git a/hw/intc/mips_gic.c b/hw/intc/mips_gic.c
index 6e257730f8..15e6e40f9f 100644
--- a/hw/intc/mips_gic.c
+++ b/hw/intc/mips_gic.c
@@ -20,31 +20,29 @@
 #include "kvm_mips.h"
 #include "hw/intc/mips_gic.h"
 
-static void mips_gic_set_vp_irq(MIPSGICState *gic, int vp, int pin, int level)
+static void mips_gic_set_vp_irq(MIPSGICState *gic, int vp, int pin)
 {
-    int ored_level = level;
+    int ored_level = 0;
     int i;
 
     /* ORing pending registers sharing same pin */
-    if (!ored_level) {
-        for (i = 0; i < gic->num_irq; i++) {
-            if ((gic->irq_state[i].map_pin & GIC_MAP_MSK) == pin &&
-                    gic->irq_state[i].map_vp == vp &&
-                    gic->irq_state[i].enabled) {
-                ored_level |= gic->irq_state[i].pending;
-            }
-            if (ored_level) {
-                /* no need to iterate all interrupts */
-                break;
-            }
+    for (i = 0; i < gic->num_irq; i++) {
+        if ((gic->irq_state[i].map_pin & GIC_MAP_MSK) == pin &&
+                gic->irq_state[i].map_vp == vp &&
+                gic->irq_state[i].enabled) {
+            ored_level |= gic->irq_state[i].pending;
         }
-        if (((gic->vps[vp].compare_map & GIC_MAP_MSK) == pin) &&
-                (gic->vps[vp].mask & GIC_VP_MASK_CMP_MSK)) {
-            /* ORing with local pending register (count/compare) */
-            ored_level |= (gic->vps[vp].pend & GIC_VP_MASK_CMP_MSK) >>
-                          GIC_VP_MASK_CMP_SHF;
+        if (ored_level) {
+            /* no need to iterate all interrupts */
+            break;
         }
     }
+    if (((gic->vps[vp].compare_map & GIC_MAP_MSK) == pin) &&
+            (gic->vps[vp].mask & GIC_VP_MASK_CMP_MSK)) {
+        /* ORing with local pending register (count/compare) */
+        ored_level |= (gic->vps[vp].pend & GIC_VP_MASK_CMP_MSK) >>
+                      GIC_VP_MASK_CMP_SHF;
+    }
     if (kvm_enabled())  {
         kvm_mips_set_ipi_interrupt(mips_env_get_cpu(gic->vps[vp].env),
                                    pin + GIC_CPU_PIN_OFFSET,
@@ -55,21 +53,27 @@ static void mips_gic_set_vp_irq(MIPSGICState *gic, int vp, int pin, int level)
     }
 }
 
-static void gic_set_irq(void *opaque, int n_IRQ, int level)
+static void gic_update_pin_for_irq(MIPSGICState *gic, int n_IRQ)
 {
-    MIPSGICState *gic = (MIPSGICState *) opaque;
     int vp = gic->irq_state[n_IRQ].map_vp;
     int pin = gic->irq_state[n_IRQ].map_pin & GIC_MAP_MSK;
 
+    if (vp < 0 || vp >= gic->num_vps) {
+        return;
+    }
+    mips_gic_set_vp_irq(gic, vp, pin);
+}
+
+static void gic_set_irq(void *opaque, int n_IRQ, int level)
+{
+    MIPSGICState *gic = (MIPSGICState *) opaque;
+
     gic->irq_state[n_IRQ].pending = (uint8_t) level;
     if (!gic->irq_state[n_IRQ].enabled) {
         /* GIC interrupt source disabled */
         return;
     }
-    if (vp < 0 || vp >= gic->num_vps) {
-        return;
-    }
-    mips_gic_set_vp_irq(gic, vp, pin, level);
+    gic_update_pin_for_irq(gic, n_IRQ);
 }
 
 #define OFFSET_CHECK(c)                         \
@@ -209,7 +213,7 @@ static void gic_timer_store_vp_compare(MIPSGICState *gic, uint32_t vp_index,
     gic->vps[vp_index].pend &= ~(1 << GIC_LOCAL_INT_COMPARE);
     if (gic->vps[vp_index].compare_map & GIC_MAP_TO_PIN_MSK) {
         uint32_t pin = (gic->vps[vp_index].compare_map & GIC_MAP_MSK);
-        mips_gic_set_vp_irq(gic, vp_index, pin, 0);
+        mips_gic_set_vp_irq(gic, vp_index, pin);
     }
     mips_gictimer_store_vp_compare(gic->gic_timer, vp_index, compare);
 }
@@ -286,6 +290,7 @@ static void gic_write(void *opaque, hwaddr addr, uint64_t data, unsigned size)
         OFFSET_CHECK((base + size * 8) <= gic->num_irq);
         for (i = 0; i < size * 8; i++) {
             gic->irq_state[base + i].enabled &= !((data >> i) & 1);
+            gic_update_pin_for_irq(gic, base + i);
         }
         break;
     case GIC_SH_WEDGE_OFS:
@@ -305,6 +310,7 @@ static void gic_write(void *opaque, hwaddr addr, uint64_t data, unsigned size)
         OFFSET_CHECK((base + size * 8) <= gic->num_irq);
         for (i = 0; i < size * 8; i++) {
             gic->irq_state[base + i].enabled |= (data >> i) & 1;
+            gic_update_pin_for_irq(gic, base + i);
         }
         break;
     case GIC_SH_MAP0_PIN_OFS ... GIC_SH_MAP255_PIN_OFS:
diff --git a/hw/intc/s390_flic.c b/hw/intc/s390_flic.c
index 6ab29efc65..bef4caf980 100644
--- a/hw/intc/s390_flic.c
+++ b/hw/intc/s390_flic.c
@@ -16,6 +16,8 @@
 #include "migration/qemu-file.h"
 #include "hw/s390x/s390_flic.h"
 #include "trace.h"
+#include "hw/qdev.h"
+#include "qapi/error.h"
 
 S390FLICState *s390_get_flic(void)
 {
@@ -85,6 +87,30 @@ static void qemu_s390_flic_class_init(ObjectClass *oc, void *data)
     fsc->clear_io_irq = qemu_s390_clear_io_flic;
 }
 
+static Property s390_flic_common_properties[] = {
+    DEFINE_PROP_UINT32("adapter_routes_max_batch", S390FLICState,
+                       adapter_routes_max_batch, ADAPTER_ROUTES_MAX_GSI),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void s390_flic_common_realize(DeviceState *dev, Error **errp)
+{
+    uint32_t max_batch = S390_FLIC_COMMON(dev)->adapter_routes_max_batch;
+
+    if (max_batch > ADAPTER_ROUTES_MAX_GSI) {
+        error_setg(errp, "flic adapter_routes_max_batch too big"
+                   "%d (%d allowed)", max_batch, ADAPTER_ROUTES_MAX_GSI);
+    }
+}
+
+static void s390_flic_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+
+    dc->props = s390_flic_common_properties;
+    dc->realize = s390_flic_common_realize;
+}
+
 static const TypeInfo qemu_s390_flic_info = {
     .name          = TYPE_QEMU_S390_FLIC,
     .parent        = TYPE_S390_FLIC_COMMON,
@@ -92,10 +118,12 @@ static const TypeInfo qemu_s390_flic_info = {
     .class_init    = qemu_s390_flic_class_init,
 };
 
+
 static const TypeInfo s390_flic_common_info = {
     .name          = TYPE_S390_FLIC_COMMON,
     .parent        = TYPE_SYS_BUS_DEVICE,
     .instance_size = sizeof(S390FLICState),
+    .class_init    = s390_flic_class_init,
     .class_size    = sizeof(S390FLICStateClass),
 };
 
diff --git a/hw/intc/s390_flic_kvm.c b/hw/intc/s390_flic_kvm.c
index e86a84e49a..cc44bc4e1e 100644
--- a/hw/intc/s390_flic_kvm.c
+++ b/hw/intc/s390_flic_kvm.c
@@ -293,6 +293,7 @@ static int kvm_flic_save(QEMUFile *f, void *opaque, size_t size,
     int len = FLIC_SAVE_INITIAL_SIZE;
     void *buf;
     int count;
+    int r = 0;
 
     flic_disable_wait_pfault((struct KVMS390FLICState *) opaque);
 
@@ -303,7 +304,7 @@ static int kvm_flic_save(QEMUFile *f, void *opaque, size_t size,
          * migration state */
         error_report("flic: couldn't allocate memory");
         qemu_put_be64(f, FLIC_FAILED);
-        return 0;
+        return -ENOMEM;
     }
 
     count = __get_all_irqs(flic, &buf, len);
@@ -314,6 +315,7 @@ static int kvm_flic_save(QEMUFile *f, void *opaque, size_t size,
          * target system to fail when attempting to load irqs from the
          * migration state */
         qemu_put_be64(f, FLIC_FAILED);
+        r = count;
     } else {
         qemu_put_be64(f, count);
         qemu_put_buffer(f, (uint8_t *) buf,
@@ -321,7 +323,7 @@ static int kvm_flic_save(QEMUFile *f, void *opaque, size_t size,
     }
     g_free(buf);
 
-    return 0;
+    return r;
 }
 
 /**
diff --git a/hw/intc/trace-events b/hw/intc/trace-events
index 39a538d048..729c1288f1 100644
--- a/hw/intc/trace-events
+++ b/hw/intc/trace-events
@@ -161,3 +161,18 @@ gicv3_redist_write(uint32_t cpu, uint64_t offset, uint64_t data, unsigned size,
 gicv3_redist_badwrite(uint32_t cpu, uint64_t offset, uint64_t data, unsigned size, bool secure) "GICv3 redistributor %x write: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u secure %d: error"
 gicv3_redist_set_irq(uint32_t cpu, int irq, int level) "GICv3 redistributor %x interrupt %d level changed to %d"
 gicv3_redist_send_sgi(uint32_t cpu, int irq) "GICv3 redistributor %x pending SGI %d"
+
+# hw/intc/armv7m_nvic.c
+nvic_recompute_state(int vectpending, int exception_prio) "NVIC state recomputed: vectpending %d exception_prio %d"
+nvic_set_prio(int irq, uint8_t prio) "NVIC set irq %d priority %d"
+nvic_irq_update(int vectpending, int pendprio, int exception_prio, int level) "NVIC vectpending %d pending prio %d exception_prio %d: setting irq line to %d"
+nvic_escalate_prio(int irq, int irqprio, int runprio) "NVIC escalating irq %d to HardFault: insufficient priority %d >= %d"
+nvic_escalate_disabled(int irq) "NVIC escalating irq %d to HardFault: disabled"
+nvic_set_pending(int irq, int en, int prio) "NVIC set pending irq %d (enabled: %d priority %d)"
+nvic_clear_pending(int irq, int en, int prio) "NVIC clear pending irq %d (enabled: %d priority %d)"
+nvic_set_pending_level(int irq) "NVIC set pending: irq %d higher prio than vectpending: setting irq line to 1"
+nvic_acknowledge_irq(int irq, int prio) "NVIC acknowledge IRQ: %d now active (prio %d)"
+nvic_complete_irq(int irq) "NVIC complete IRQ %d"
+nvic_set_irq_level(int irq, int level) "NVIC external irq %d level set to %d"
+nvic_sysreg_read(uint64_t addr, uint32_t value, unsigned size) "NVIC sysreg read addr 0x%" PRIx64 " data 0x%" PRIx32 " size %u"
+nvic_sysreg_write(uint64_t addr, uint32_t value, unsigned size) "NVIC sysreg write addr 0x%" PRIx64 " data 0x%" PRIx32 " size %u"
diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index 095c16a300..ffc0747c7f 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -49,40 +49,41 @@ int xics_get_cpu_index_by_dt_id(int cpu_dt_id)
     return -1;
 }
 
-void xics_cpu_destroy(XICSState *xics, PowerPCCPU *cpu)
+void xics_cpu_destroy(XICSFabric *xi, PowerPCCPU *cpu)
 {
     CPUState *cs = CPU(cpu);
-    ICPState *ss = &xics->ss[cs->cpu_index];
+    ICPState *icp = xics_icp_get(xi, cs->cpu_index);
 
-    assert(cs->cpu_index < xics->nr_servers);
-    assert(cs == ss->cs);
+    assert(icp);
+    assert(cs == icp->cs);
 
-    ss->output = NULL;
-    ss->cs = NULL;
+    icp->output = NULL;
+    icp->cs = NULL;
 }
 
-void xics_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
+void xics_cpu_setup(XICSFabric *xi, PowerPCCPU *cpu)
 {
     CPUState *cs = CPU(cpu);
     CPUPPCState *env = &cpu->env;
-    ICPState *ss = &xics->ss[cs->cpu_index];
-    XICSStateClass *info = XICS_COMMON_GET_CLASS(xics);
+    ICPState *icp = xics_icp_get(xi, cs->cpu_index);
+    ICPStateClass *icpc;
 
-    assert(cs->cpu_index < xics->nr_servers);
+    assert(icp);
 
-    ss->cs = cs;
+    icp->cs = cs;
 
-    if (info->cpu_setup) {
-        info->cpu_setup(xics, cpu);
+    icpc = ICP_GET_CLASS(icp);
+    if (icpc->cpu_setup) {
+        icpc->cpu_setup(icp, cpu);
     }
 
     switch (PPC_INPUT(env)) {
     case PPC_FLAGS_INPUT_POWER7:
-        ss->output = env->irq_inputs[POWER7_INPUT_INT];
+        icp->output = env->irq_inputs[POWER7_INPUT_INT];
         break;
 
     case PPC_FLAGS_INPUT_970:
-        ss->output = env->irq_inputs[PPC970_INPUT_INT];
+        icp->output = env->irq_inputs[PPC970_INPUT_INT];
         break;
 
     default:
@@ -92,185 +93,43 @@ void xics_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
     }
 }
 
-static void xics_common_pic_print_info(InterruptStatsProvider *obj,
-                                       Monitor *mon)
+void icp_pic_print_info(ICPState *icp, Monitor *mon)
 {
-    XICSState *xics = XICS_COMMON(obj);
-    ICSState *ics;
-    uint32_t i;
-
-    for (i = 0; i < xics->nr_servers; i++) {
-        ICPState *icp = &xics->ss[i];
-
-        if (!icp->output) {
-            continue;
-        }
-        monitor_printf(mon, "CPU %d XIRR=%08x (%p) PP=%02x MFRR=%02x\n",
-                       i, icp->xirr, icp->xirr_owner,
-                       icp->pending_priority, icp->mfrr);
-    }
-
-    QLIST_FOREACH(ics, &xics->ics, list) {
-        monitor_printf(mon, "ICS %4x..%4x %p\n",
-                       ics->offset, ics->offset + ics->nr_irqs - 1, ics);
-
-        if (!ics->irqs) {
-            continue;
-        }
-
-        for (i = 0; i < ics->nr_irqs; i++) {
-            ICSIRQState *irq = ics->irqs + i;
-
-            if (!(irq->flags & XICS_FLAGS_IRQ_MASK)) {
-                continue;
-            }
-            monitor_printf(mon, "  %4x %s %02x %02x\n",
-                           ics->offset + i,
-                           (irq->flags & XICS_FLAGS_IRQ_LSI) ?
-                           "LSI" : "MSI",
-                           irq->priority, irq->status);
-        }
-    }
-}
-
-/*
- * XICS Common class - parent for emulated XICS and KVM-XICS
- */
-static void xics_common_reset(DeviceState *d)
-{
-    XICSState *xics = XICS_COMMON(d);
-    ICSState *ics;
-    int i;
-
-    for (i = 0; i < xics->nr_servers; i++) {
-        device_reset(DEVICE(&xics->ss[i]));
-    }
-
-    QLIST_FOREACH(ics, &xics->ics, list) {
-        device_reset(DEVICE(ics));
-    }
-}
-
-static void xics_prop_get_nr_irqs(Object *obj, Visitor *v, const char *name,
-                                  void *opaque, Error **errp)
-{
-    XICSState *xics = XICS_COMMON(obj);
-    int64_t value = xics->nr_irqs;
+    int cpu_index = icp->cs ? icp->cs->cpu_index : -1;
 
-    visit_type_int(v, name, &value, errp);
-}
-
-static void xics_prop_set_nr_irqs(Object *obj, Visitor *v, const char *name,
-                                  void *opaque, Error **errp)
-{
-    XICSState *xics = XICS_COMMON(obj);
-    XICSStateClass *info = XICS_COMMON_GET_CLASS(xics);
-    Error *error = NULL;
-    int64_t value;
-
-    visit_type_int(v, name, &value, &error);
-    if (error) {
-        error_propagate(errp, error);
+    if (!icp->output) {
         return;
     }
-    if (xics->nr_irqs) {
-        error_setg(errp, "Number of interrupts is already set to %u",
-                   xics->nr_irqs);
-        return;
-    }
-
-    assert(info->set_nr_irqs);
-    info->set_nr_irqs(xics, value, errp);
-}
-
-void xics_set_nr_servers(XICSState *xics, uint32_t nr_servers,
-                         const char *typename, Error **errp)
-{
-    int i;
-
-    xics->nr_servers = nr_servers;
-
-    xics->ss = g_malloc0(xics->nr_servers * sizeof(ICPState));
-    for (i = 0; i < xics->nr_servers; i++) {
-        char name[32];
-        ICPState *icp = &xics->ss[i];
-
-        object_initialize(icp, sizeof(*icp), typename);
-        snprintf(name, sizeof(name), "icp[%d]", i);
-        object_property_add_child(OBJECT(xics), name, OBJECT(icp), errp);
-        icp->xics = xics;
-    }
+    monitor_printf(mon, "CPU %d XIRR=%08x (%p) PP=%02x MFRR=%02x\n",
+                   cpu_index, icp->xirr, icp->xirr_owner,
+                   icp->pending_priority, icp->mfrr);
 }
 
-static void xics_prop_get_nr_servers(Object *obj, Visitor *v,
-                                     const char *name, void *opaque,
-                                     Error **errp)
+void ics_pic_print_info(ICSState *ics, Monitor *mon)
 {
-    XICSState *xics = XICS_COMMON(obj);
-    int64_t value = xics->nr_servers;
-
-    visit_type_int(v, name, &value, errp);
-}
+    uint32_t i;
 
-static void xics_prop_set_nr_servers(Object *obj, Visitor *v,
-                                     const char *name, void *opaque,
-                                     Error **errp)
-{
-    XICSState *xics = XICS_COMMON(obj);
-    XICSStateClass *xsc = XICS_COMMON_GET_CLASS(xics);
-    Error *error = NULL;
-    int64_t value;
+    monitor_printf(mon, "ICS %4x..%4x %p\n",
+                   ics->offset, ics->offset + ics->nr_irqs - 1, ics);
 
-    visit_type_int(v, name, &value, &error);
-    if (error) {
-        error_propagate(errp, error);
+    if (!ics->irqs) {
         return;
     }
-    if (xics->nr_servers) {
-        error_setg(errp, "Number of servers is already set to %u",
-                   xics->nr_servers);
-        return;
-    }
-
-    assert(xsc->set_nr_servers);
-    xsc->set_nr_servers(xics, value, errp);
-}
-
-static void xics_common_initfn(Object *obj)
-{
-    XICSState *xics = XICS_COMMON(obj);
 
-    QLIST_INIT(&xics->ics);
-    object_property_add(obj, "nr_irqs", "int",
-                        xics_prop_get_nr_irqs, xics_prop_set_nr_irqs,
-                        NULL, NULL, NULL);
-    object_property_add(obj, "nr_servers", "int",
-                        xics_prop_get_nr_servers, xics_prop_set_nr_servers,
-                        NULL, NULL, NULL);
-}
-
-static void xics_common_class_init(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    InterruptStatsProviderClass *ic = INTERRUPT_STATS_PROVIDER_CLASS(oc);
+    for (i = 0; i < ics->nr_irqs; i++) {
+        ICSIRQState *irq = ics->irqs + i;
 
-    dc->reset = xics_common_reset;
-    ic->print_info = xics_common_pic_print_info;
+        if (!(irq->flags & XICS_FLAGS_IRQ_MASK)) {
+            continue;
+        }
+        monitor_printf(mon, "  %4x %s %02x %02x\n",
+                       ics->offset + i,
+                       (irq->flags & XICS_FLAGS_IRQ_LSI) ?
+                       "LSI" : "MSI",
+                       irq->priority, irq->status);
+    }
 }
 
-static const TypeInfo xics_common_info = {
-    .name          = TYPE_XICS_COMMON,
-    .parent        = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(XICSState),
-    .class_size    = sizeof(XICSStateClass),
-    .instance_init = xics_common_initfn,
-    .class_init    = xics_common_class_init,
-    .interfaces = (InterfaceInfo[]) {
-        { TYPE_INTERRUPT_STATS_PROVIDER },
-        { }
-    },
-};
-
 /*
  * ICP: Presentation layer
  */
@@ -278,8 +137,8 @@ static const TypeInfo xics_common_info = {
 #define XISR_MASK  0x00ffffff
 #define CPPR_MASK  0xff000000
 
-#define XISR(ss)   (((ss)->xirr) & XISR_MASK)
-#define CPPR(ss)   (((ss)->xirr) >> 24)
+#define XISR(icp)   (((icp)->xirr) & XISR_MASK)
+#define CPPR(icp)   (((icp)->xirr) >> 24)
 
 static void ics_reject(ICSState *ics, uint32_t nr)
 {
@@ -290,7 +149,7 @@ static void ics_reject(ICSState *ics, uint32_t nr)
     }
 }
 
-static void ics_resend(ICSState *ics)
+void ics_resend(ICSState *ics)
 {
     ICSStateClass *k = ICS_BASE_GET_CLASS(ics);
 
@@ -308,151 +167,152 @@ static void ics_eoi(ICSState *ics, int nr)
     }
 }
 
-static void icp_check_ipi(ICPState *ss)
+static void icp_check_ipi(ICPState *icp)
 {
-    if (XISR(ss) && (ss->pending_priority <= ss->mfrr)) {
+    if (XISR(icp) && (icp->pending_priority <= icp->mfrr)) {
         return;
     }
 
-    trace_xics_icp_check_ipi(ss->cs->cpu_index, ss->mfrr);
+    trace_xics_icp_check_ipi(icp->cs->cpu_index, icp->mfrr);
 
-    if (XISR(ss) && ss->xirr_owner) {
-        ics_reject(ss->xirr_owner, XISR(ss));
+    if (XISR(icp) && icp->xirr_owner) {
+        ics_reject(icp->xirr_owner, XISR(icp));
     }
 
-    ss->xirr = (ss->xirr & ~XISR_MASK) | XICS_IPI;
-    ss->pending_priority = ss->mfrr;
-    ss->xirr_owner = NULL;
-    qemu_irq_raise(ss->output);
+    icp->xirr = (icp->xirr & ~XISR_MASK) | XICS_IPI;
+    icp->pending_priority = icp->mfrr;
+    icp->xirr_owner = NULL;
+    qemu_irq_raise(icp->output);
 }
 
-static void icp_resend(ICPState *ss)
+void icp_resend(ICPState *icp)
 {
-    ICSState *ics;
+    XICSFabric *xi = icp->xics;
+    XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(xi);
 
-    if (ss->mfrr < CPPR(ss)) {
-        icp_check_ipi(ss);
-    }
-    QLIST_FOREACH(ics, &ss->xics->ics, list) {
-        ics_resend(ics);
+    if (icp->mfrr < CPPR(icp)) {
+        icp_check_ipi(icp);
     }
+
+    xic->ics_resend(xi);
 }
 
-void icp_set_cppr(ICPState *ss, uint8_t cppr)
+void icp_set_cppr(ICPState *icp, uint8_t cppr)
 {
     uint8_t old_cppr;
     uint32_t old_xisr;
 
-    old_cppr = CPPR(ss);
-    ss->xirr = (ss->xirr & ~CPPR_MASK) | (cppr << 24);
+    old_cppr = CPPR(icp);
+    icp->xirr = (icp->xirr & ~CPPR_MASK) | (cppr << 24);
 
     if (cppr < old_cppr) {
-        if (XISR(ss) && (cppr <= ss->pending_priority)) {
-            old_xisr = XISR(ss);
-            ss->xirr &= ~XISR_MASK; /* Clear XISR */
-            ss->pending_priority = 0xff;
-            qemu_irq_lower(ss->output);
-            if (ss->xirr_owner) {
-                ics_reject(ss->xirr_owner, old_xisr);
-                ss->xirr_owner = NULL;
+        if (XISR(icp) && (cppr <= icp->pending_priority)) {
+            old_xisr = XISR(icp);
+            icp->xirr &= ~XISR_MASK; /* Clear XISR */
+            icp->pending_priority = 0xff;
+            qemu_irq_lower(icp->output);
+            if (icp->xirr_owner) {
+                ics_reject(icp->xirr_owner, old_xisr);
+                icp->xirr_owner = NULL;
             }
         }
     } else {
-        if (!XISR(ss)) {
-            icp_resend(ss);
+        if (!XISR(icp)) {
+            icp_resend(icp);
         }
     }
 }
 
-void icp_set_mfrr(ICPState *ss, uint8_t mfrr)
+void icp_set_mfrr(ICPState *icp, uint8_t mfrr)
 {
-    ss->mfrr = mfrr;
-    if (mfrr < CPPR(ss)) {
-        icp_check_ipi(ss);
+    icp->mfrr = mfrr;
+    if (mfrr < CPPR(icp)) {
+        icp_check_ipi(icp);
     }
 }
 
-uint32_t icp_accept(ICPState *ss)
+uint32_t icp_accept(ICPState *icp)
 {
-    uint32_t xirr = ss->xirr;
+    uint32_t xirr = icp->xirr;
 
-    qemu_irq_lower(ss->output);
-    ss->xirr = ss->pending_priority << 24;
-    ss->pending_priority = 0xff;
-    ss->xirr_owner = NULL;
+    qemu_irq_lower(icp->output);
+    icp->xirr = icp->pending_priority << 24;
+    icp->pending_priority = 0xff;
+    icp->xirr_owner = NULL;
 
-    trace_xics_icp_accept(xirr, ss->xirr);
+    trace_xics_icp_accept(xirr, icp->xirr);
 
     return xirr;
 }
 
-uint32_t icp_ipoll(ICPState *ss, uint32_t *mfrr)
+uint32_t icp_ipoll(ICPState *icp, uint32_t *mfrr)
 {
     if (mfrr) {
-        *mfrr = ss->mfrr;
+        *mfrr = icp->mfrr;
     }
-    return ss->xirr;
+    return icp->xirr;
 }
 
-void icp_eoi(ICPState *ss, uint32_t xirr)
+void icp_eoi(ICPState *icp, uint32_t xirr)
 {
+    XICSFabric *xi = icp->xics;
+    XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(xi);
     ICSState *ics;
     uint32_t irq;
 
     /* Send EOI -> ICS */
-    ss->xirr = (ss->xirr & ~CPPR_MASK) | (xirr & CPPR_MASK);
-    trace_xics_icp_eoi(ss->cs->cpu_index, xirr, ss->xirr);
+    icp->xirr = (icp->xirr & ~CPPR_MASK) | (xirr & CPPR_MASK);
+    trace_xics_icp_eoi(icp->cs->cpu_index, xirr, icp->xirr);
     irq = xirr & XISR_MASK;
-    QLIST_FOREACH(ics, &ss->xics->ics, list) {
-        if (ics_valid_irq(ics, irq)) {
-            ics_eoi(ics, irq);
-        }
+
+    ics = xic->ics_get(xi, irq);
+    if (ics) {
+        ics_eoi(ics, irq);
     }
-    if (!XISR(ss)) {
-        icp_resend(ss);
+    if (!XISR(icp)) {
+        icp_resend(icp);
     }
 }
 
 static void icp_irq(ICSState *ics, int server, int nr, uint8_t priority)
 {
-    XICSState *xics = ics->xics;
-    ICPState *ss = xics->ss + server;
+    ICPState *icp = xics_icp_get(ics->xics, server);
 
     trace_xics_icp_irq(server, nr, priority);
 
-    if ((priority >= CPPR(ss))
-        || (XISR(ss) && (ss->pending_priority <= priority))) {
+    if ((priority >= CPPR(icp))
+        || (XISR(icp) && (icp->pending_priority <= priority))) {
         ics_reject(ics, nr);
     } else {
-        if (XISR(ss) && ss->xirr_owner) {
-            ics_reject(ss->xirr_owner, XISR(ss));
-            ss->xirr_owner = NULL;
+        if (XISR(icp) && icp->xirr_owner) {
+            ics_reject(icp->xirr_owner, XISR(icp));
+            icp->xirr_owner = NULL;
         }
-        ss->xirr = (ss->xirr & ~XISR_MASK) | (nr & XISR_MASK);
-        ss->xirr_owner = ics;
-        ss->pending_priority = priority;
-        trace_xics_icp_raise(ss->xirr, ss->pending_priority);
-        qemu_irq_raise(ss->output);
+        icp->xirr = (icp->xirr & ~XISR_MASK) | (nr & XISR_MASK);
+        icp->xirr_owner = ics;
+        icp->pending_priority = priority;
+        trace_xics_icp_raise(icp->xirr, icp->pending_priority);
+        qemu_irq_raise(icp->output);
     }
 }
 
 static void icp_dispatch_pre_save(void *opaque)
 {
-    ICPState *ss = opaque;
-    ICPStateClass *info = ICP_GET_CLASS(ss);
+    ICPState *icp = opaque;
+    ICPStateClass *info = ICP_GET_CLASS(icp);
 
     if (info->pre_save) {
-        info->pre_save(ss);
+        info->pre_save(icp);
     }
 }
 
 static int icp_dispatch_post_load(void *opaque, int version_id)
 {
-    ICPState *ss = opaque;
-    ICPStateClass *info = ICP_GET_CLASS(ss);
+    ICPState *icp = opaque;
+    ICPStateClass *info = ICP_GET_CLASS(icp);
 
     if (info->post_load) {
-        return info->post_load(ss, version_id);
+        return info->post_load(icp, version_id);
     }
 
     return 0;
@@ -485,12 +345,30 @@ static void icp_reset(DeviceState *dev)
     qemu_set_irq(icp->output, 0);
 }
 
+static void icp_realize(DeviceState *dev, Error **errp)
+{
+    ICPState *icp = ICP(dev);
+    Object *obj;
+    Error *err = NULL;
+
+    obj = object_property_get_link(OBJECT(dev), "xics", &err);
+    if (!obj) {
+        error_setg(errp, "%s: required link 'xics' not found: %s",
+                   __func__, error_get_pretty(err));
+        return;
+    }
+
+    icp->xics = XICS_FABRIC(obj);
+}
+
+
 static void icp_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->reset = icp_reset;
     dc->vmsd = &vmstate_icp_server;
+    dc->realize = icp_realize;
 }
 
 static const TypeInfo icp_info = {
@@ -663,17 +541,6 @@ static void ics_simple_reset(DeviceState *dev)
     }
 }
 
-static int ics_simple_post_load(ICSState *ics, int version_id)
-{
-    int i;
-
-    for (i = 0; i < ics->xics->nr_servers; i++) {
-        icp_resend(&ics->xics->ss[i]);
-    }
-
-    return 0;
-}
-
 static void ics_simple_dispatch_pre_save(void *opaque)
 {
     ICSState *ics = opaque;
@@ -746,15 +613,20 @@ static void ics_simple_realize(DeviceState *dev, Error **errp)
     ics->qirqs = qemu_allocate_irqs(ics_simple_set_irq, ics, ics->nr_irqs);
 }
 
+static Property ics_simple_properties[] = {
+    DEFINE_PROP_UINT32("nr-irqs", ICSState, nr_irqs, 0),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static void ics_simple_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     ICSStateClass *isc = ICS_BASE_CLASS(klass);
 
-    dc->realize = ics_simple_realize;
+    isc->realize = ics_simple_realize;
+    dc->props = ics_simple_properties;
     dc->vmsd = &vmstate_ics_simple;
     dc->reset = ics_simple_reset;
-    isc->post_load = ics_simple_post_load;
     isc->reject = ics_simple_reject;
     isc->resend = ics_simple_resend;
     isc->eoi = ics_simple_eoi;
@@ -769,38 +641,69 @@ static const TypeInfo ics_simple_info = {
     .instance_init = ics_simple_initfn,
 };
 
+static void ics_base_realize(DeviceState *dev, Error **errp)
+{
+    ICSStateClass *icsc = ICS_BASE_GET_CLASS(dev);
+    ICSState *ics = ICS_BASE(dev);
+    Object *obj;
+    Error *err = NULL;
+
+    obj = object_property_get_link(OBJECT(dev), "xics", &err);
+    if (!obj) {
+        error_setg(errp, "%s: required link 'xics' not found: %s",
+                   __func__, error_get_pretty(err));
+        return;
+    }
+    ics->xics = XICS_FABRIC(obj);
+
+
+    if (icsc->realize) {
+        icsc->realize(dev, errp);
+    }
+}
+
+static void ics_base_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = ics_base_realize;
+}
+
 static const TypeInfo ics_base_info = {
     .name = TYPE_ICS_BASE,
     .parent = TYPE_DEVICE,
     .abstract = true,
     .instance_size = sizeof(ICSState),
+    .class_init = ics_base_class_init,
     .class_size = sizeof(ICSStateClass),
 };
 
+static const TypeInfo xics_fabric_info = {
+    .name = TYPE_XICS_FABRIC,
+    .parent = TYPE_INTERFACE,
+    .class_size = sizeof(XICSFabricClass),
+};
+
 /*
  * Exported functions
  */
-ICSState *xics_find_source(XICSState *xics, int irq)
+qemu_irq xics_get_qirq(XICSFabric *xi, int irq)
 {
-    ICSState *ics;
+    XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(xi);
+    ICSState *ics = xic->ics_get(xi, irq);
 
-    QLIST_FOREACH(ics, &xics->ics, list) {
-        if (ics_valid_irq(ics, irq)) {
-            return ics;
-        }
+    if (ics) {
+        return ics->qirqs[irq - ics->offset];
     }
+
     return NULL;
 }
 
-qemu_irq xics_get_qirq(XICSState *xics, int irq)
+ICPState *xics_icp_get(XICSFabric *xi, int server)
 {
-    ICSState *ics = xics_find_source(xics, irq);
+    XICSFabricClass *xic = XICS_FABRIC_GET_CLASS(xi);
 
-    if (ics) {
-        return ics->qirqs[irq - ics->offset];
-    }
-
-    return NULL;
+    return xic->icp_get(xi, server);
 }
 
 void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
@@ -813,10 +716,10 @@ void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
 
 static void xics_register_types(void)
 {
-    type_register_static(&xics_common_info);
     type_register_static(&ics_simple_info);
     type_register_static(&ics_base_info);
     type_register_static(&icp_info);
+    type_register_static(&xics_fabric_info);
 }
 
 type_init(xics_register_types)
diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
index 17694eaa87..0a3daca3bb 100644
--- a/hw/intc/xics_kvm.c
+++ b/hw/intc/xics_kvm.c
@@ -40,16 +40,12 @@
 
 #include <sys/ioctl.h>
 
-typedef struct KVMXICSState {
-    XICSState parent_obj;
-
-    int kernel_xics_fd;
-} KVMXICSState;
+static int kernel_xics_fd = -1;
 
 /*
  * ICP-KVM
  */
-static void icp_get_kvm_state(ICPState *ss)
+static void icp_get_kvm_state(ICPState *icp)
 {
     uint64_t state;
     struct kvm_one_reg reg = {
@@ -59,25 +55,25 @@ static void icp_get_kvm_state(ICPState *ss)
     int ret;
 
     /* ICP for this CPU thread is not in use, exiting */
-    if (!ss->cs) {
+    if (!icp->cs) {
         return;
     }
 
-    ret = kvm_vcpu_ioctl(ss->cs, KVM_GET_ONE_REG, &reg);
+    ret = kvm_vcpu_ioctl(icp->cs, KVM_GET_ONE_REG, &reg);
     if (ret != 0) {
         error_report("Unable to retrieve KVM interrupt controller state"
-                " for CPU %ld: %s", kvm_arch_vcpu_id(ss->cs), strerror(errno));
+                " for CPU %ld: %s", kvm_arch_vcpu_id(icp->cs), strerror(errno));
         exit(1);
     }
 
-    ss->xirr = state >> KVM_REG_PPC_ICP_XISR_SHIFT;
-    ss->mfrr = (state >> KVM_REG_PPC_ICP_MFRR_SHIFT)
+    icp->xirr = state >> KVM_REG_PPC_ICP_XISR_SHIFT;
+    icp->mfrr = (state >> KVM_REG_PPC_ICP_MFRR_SHIFT)
         & KVM_REG_PPC_ICP_MFRR_MASK;
-    ss->pending_priority = (state >> KVM_REG_PPC_ICP_PPRI_SHIFT)
+    icp->pending_priority = (state >> KVM_REG_PPC_ICP_PPRI_SHIFT)
         & KVM_REG_PPC_ICP_PPRI_MASK;
 }
 
-static int icp_set_kvm_state(ICPState *ss, int version_id)
+static int icp_set_kvm_state(ICPState *icp, int version_id)
 {
     uint64_t state;
     struct kvm_one_reg reg = {
@@ -87,18 +83,18 @@ static int icp_set_kvm_state(ICPState *ss, int version_id)
     int ret;
 
     /* ICP for this CPU thread is not in use, exiting */
-    if (!ss->cs) {
+    if (!icp->cs) {
         return 0;
     }
 
-    state = ((uint64_t)ss->xirr << KVM_REG_PPC_ICP_XISR_SHIFT)
-        | ((uint64_t)ss->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT)
-        | ((uint64_t)ss->pending_priority << KVM_REG_PPC_ICP_PPRI_SHIFT);
+    state = ((uint64_t)icp->xirr << KVM_REG_PPC_ICP_XISR_SHIFT)
+        | ((uint64_t)icp->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT)
+        | ((uint64_t)icp->pending_priority << KVM_REG_PPC_ICP_PPRI_SHIFT);
 
-    ret = kvm_vcpu_ioctl(ss->cs, KVM_SET_ONE_REG, &reg);
+    ret = kvm_vcpu_ioctl(icp->cs, KVM_SET_ONE_REG, &reg);
     if (ret != 0) {
         error_report("Unable to restore KVM interrupt controller state (0x%"
-                PRIx64 ") for CPU %ld: %s", state, kvm_arch_vcpu_id(ss->cs),
+                PRIx64 ") for CPU %ld: %s", state, kvm_arch_vcpu_id(icp->cs),
                 strerror(errno));
         return ret;
     }
@@ -122,6 +118,34 @@ static void icp_kvm_reset(DeviceState *dev)
     icp_set_kvm_state(icp, 1);
 }
 
+static void icp_kvm_cpu_setup(ICPState *icp, PowerPCCPU *cpu)
+{
+    CPUState *cs = CPU(cpu);
+    int ret;
+
+    if (kernel_xics_fd == -1) {
+        abort();
+    }
+
+    /*
+     * If we are reusing a parked vCPU fd corresponding to the CPU
+     * which was hot-removed earlier we don't have to renable
+     * KVM_CAP_IRQ_XICS capability again.
+     */
+    if (icp->cap_irq_xics_enabled) {
+        return;
+    }
+
+    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, kernel_xics_fd,
+                              kvm_arch_vcpu_id(cs));
+    if (ret < 0) {
+        error_report("Unable to connect CPU%ld to kernel XICS: %s",
+                     kvm_arch_vcpu_id(cs), strerror(errno));
+        exit(1);
+    }
+    icp->cap_irq_xics_enabled = true;
+}
+
 static void icp_kvm_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
@@ -130,6 +154,7 @@ static void icp_kvm_class_init(ObjectClass *klass, void *data)
     dc->reset = icp_kvm_reset;
     icpc->pre_save = icp_get_kvm_state;
     icpc->post_load = icp_set_kvm_state;
+    icpc->cpu_setup = icp_kvm_cpu_setup;
 }
 
 static const TypeInfo icp_kvm_info = {
@@ -145,7 +170,6 @@ static const TypeInfo icp_kvm_info = {
  */
 static void ics_get_kvm_state(ICSState *ics)
 {
-    KVMXICSState *xicskvm = XICS_SPAPR_KVM(ics->xics);
     uint64_t state;
     struct kvm_device_attr attr = {
         .flags = 0,
@@ -160,7 +184,7 @@ static void ics_get_kvm_state(ICSState *ics)
 
         attr.attr = i + ics->offset;
 
-        ret = ioctl(xicskvm->kernel_xics_fd, KVM_GET_DEVICE_ATTR, &attr);
+        ret = ioctl(kernel_xics_fd, KVM_GET_DEVICE_ATTR, &attr);
         if (ret != 0) {
             error_report("Unable to retrieve KVM interrupt controller state"
                     " for IRQ %d: %s", i + ics->offset, strerror(errno));
@@ -204,7 +228,6 @@ static void ics_get_kvm_state(ICSState *ics)
 
 static int ics_set_kvm_state(ICSState *ics, int version_id)
 {
-    KVMXICSState *xicskvm = XICS_SPAPR_KVM(ics->xics);
     uint64_t state;
     struct kvm_device_attr attr = {
         .flags = 0,
@@ -238,7 +261,7 @@ static int ics_set_kvm_state(ICSState *ics, int version_id)
             }
         }
 
-        ret = ioctl(xicskvm->kernel_xics_fd, KVM_SET_DEVICE_ATTR, &attr);
+        ret = ioctl(kernel_xics_fd, KVM_SET_DEVICE_ATTR, &attr);
         if (ret != 0) {
             error_report("Unable to restore KVM interrupt controller state"
                     " for IRQs %d: %s", i + ics->offset, strerror(errno));
@@ -308,7 +331,7 @@ static void ics_kvm_class_init(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
     ICSStateClass *icsc = ICS_BASE_CLASS(klass);
 
-    dc->realize = ics_kvm_realize;
+    icsc->realize = ics_kvm_realize;
     dc->reset = ics_kvm_reset;
     icsc->pre_save = ics_get_kvm_state;
     icsc->post_load = ics_set_kvm_state;
@@ -324,57 +347,6 @@ static const TypeInfo ics_kvm_info = {
 /*
  * XICS-KVM
  */
-static void xics_kvm_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
-{
-    CPUState *cs;
-    ICPState *ss;
-    KVMXICSState *xicskvm = XICS_SPAPR_KVM(xics);
-    int ret;
-
-    cs = CPU(cpu);
-    ss = &xics->ss[cs->cpu_index];
-
-    assert(cs->cpu_index < xics->nr_servers);
-    if (xicskvm->kernel_xics_fd == -1) {
-        abort();
-    }
-
-    /*
-     * If we are reusing a parked vCPU fd corresponding to the CPU
-     * which was hot-removed earlier we don't have to renable
-     * KVM_CAP_IRQ_XICS capability again.
-     */
-    if (ss->cap_irq_xics_enabled) {
-        return;
-    }
-
-    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, xicskvm->kernel_xics_fd,
-                              kvm_arch_vcpu_id(cs));
-    if (ret < 0) {
-        error_report("Unable to connect CPU%ld to kernel XICS: %s",
-                     kvm_arch_vcpu_id(cs), strerror(errno));
-        exit(1);
-    }
-    ss->cap_irq_xics_enabled = true;
-}
-
-static void xics_kvm_set_nr_irqs(XICSState *xics, uint32_t nr_irqs,
-                                 Error **errp)
-{
-    ICSState *ics = QLIST_FIRST(&xics->ics);
-
-    /* This needs to be deprecated ... */
-    xics->nr_irqs = nr_irqs;
-    if (ics) {
-        ics->nr_irqs = nr_irqs;
-    }
-}
-
-static void xics_kvm_set_nr_servers(XICSState *xics, uint32_t nr_servers,
-                                    Error **errp)
-{
-    xics_set_nr_servers(xics, nr_servers, TYPE_KVM_ICP, errp);
-}
 
 static void rtas_dummy(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                        uint32_t token,
@@ -385,13 +357,9 @@ static void rtas_dummy(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                  __func__);
 }
 
-static void xics_kvm_realize(DeviceState *dev, Error **errp)
+int xics_kvm_init(sPAPRMachineState *spapr, Error **errp)
 {
-    KVMXICSState *xicskvm = XICS_SPAPR_KVM(dev);
-    XICSState *xics = XICS_COMMON(dev);
-    ICSState *ics;
-    int i, rc;
-    Error *error = NULL;
+    int rc;
     struct kvm_create_device xics_create_device = {
         .type = KVM_DEV_TYPE_XICS,
         .flags = 0,
@@ -439,72 +407,24 @@ static void xics_kvm_realize(DeviceState *dev, Error **errp)
         goto fail;
     }
 
-    xicskvm->kernel_xics_fd = xics_create_device.fd;
-
-    QLIST_FOREACH(ics, &xics->ics, list) {
-        object_property_set_bool(OBJECT(ics), true, "realized", &error);
-        if (error) {
-            error_propagate(errp, error);
-            goto fail;
-        }
-    }
-
-    assert(xics->nr_servers);
-    for (i = 0; i < xics->nr_servers; i++) {
-        object_property_set_bool(OBJECT(&xics->ss[i]), true, "realized",
-                                 &error);
-        if (error) {
-            error_propagate(errp, error);
-            goto fail;
-        }
-    }
+    kernel_xics_fd = xics_create_device.fd;
 
     kvm_kernel_irqchip = true;
     kvm_msi_via_irqfd_allowed = true;
     kvm_gsi_direct_mapping = true;
 
-    return;
+    return rc;
 
 fail:
     kvmppc_define_rtas_kernel_token(0, "ibm,set-xive");
     kvmppc_define_rtas_kernel_token(0, "ibm,get-xive");
     kvmppc_define_rtas_kernel_token(0, "ibm,int-on");
     kvmppc_define_rtas_kernel_token(0, "ibm,int-off");
+    return -1;
 }
 
-static void xics_kvm_initfn(Object *obj)
-{
-    XICSState *xics = XICS_COMMON(obj);
-    ICSState *ics;
-
-    ics = ICS_SIMPLE(object_new(TYPE_ICS_KVM));
-    object_property_add_child(obj, "ics", OBJECT(ics), NULL);
-    ics->xics = xics;
-    QLIST_INSERT_HEAD(&xics->ics, ics, list);
-}
-
-static void xics_kvm_class_init(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    XICSStateClass *xsc = XICS_COMMON_CLASS(oc);
-
-    dc->realize = xics_kvm_realize;
-    xsc->cpu_setup = xics_kvm_cpu_setup;
-    xsc->set_nr_irqs = xics_kvm_set_nr_irqs;
-    xsc->set_nr_servers = xics_kvm_set_nr_servers;
-}
-
-static const TypeInfo xics_spapr_kvm_info = {
-    .name          = TYPE_XICS_SPAPR_KVM,
-    .parent        = TYPE_XICS_COMMON,
-    .instance_size = sizeof(KVMXICSState),
-    .class_init    = xics_kvm_class_init,
-    .instance_init = xics_kvm_initfn,
-};
-
 static void xics_kvm_register_types(void)
 {
-    type_register_static(&xics_spapr_kvm_info);
     type_register_static(&ics_kvm_info);
     type_register_static(&icp_kvm_info);
 }
diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
index 2e3f1c5e95..84d24b2837 100644
--- a/hw/intc/xics_spapr.c
+++ b/hw/intc/xics_spapr.c
@@ -44,7 +44,7 @@ static target_ulong h_cppr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                            target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
-    ICPState *icp = &spapr->xics->ss[cs->cpu_index];
+    ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index);
     target_ulong cppr = args[0];
 
     icp_set_cppr(icp, cppr);
@@ -56,12 +56,13 @@ static target_ulong h_ipi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 {
     target_ulong server = xics_get_cpu_index_by_dt_id(args[0]);
     target_ulong mfrr = args[1];
+    ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), server);
 
-    if (server >= spapr->xics->nr_servers) {
+    if (!icp) {
         return H_PARAMETER;
     }
 
-    icp_set_mfrr(spapr->xics->ss + server, mfrr);
+    icp_set_mfrr(icp, mfrr);
     return H_SUCCESS;
 }
 
@@ -69,7 +70,7 @@ static target_ulong h_xirr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                            target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
-    ICPState *icp = &spapr->xics->ss[cs->cpu_index];
+    ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index);
     uint32_t xirr = icp_accept(icp);
 
     args[0] = xirr;
@@ -80,7 +81,7 @@ static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                              target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
-    ICPState *icp = &spapr->xics->ss[cs->cpu_index];
+    ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index);
     uint32_t xirr = icp_accept(icp);
 
     args[0] = xirr;
@@ -92,7 +93,7 @@ static target_ulong h_eoi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                           target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
-    ICPState *icp = &spapr->xics->ss[cs->cpu_index];
+    ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index);
     target_ulong xirr = args[0];
 
     icp_eoi(icp, xirr);
@@ -103,7 +104,7 @@ static target_ulong h_ipoll(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                             target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
-    ICPState *icp = &spapr->xics->ss[cs->cpu_index];
+    ICPState *icp = xics_icp_get(XICS_FABRIC(spapr), cs->cpu_index);
     uint32_t mfrr;
     uint32_t xirr = icp_ipoll(icp, &mfrr);
 
@@ -118,7 +119,7 @@ static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                           uint32_t nargs, target_ulong args,
                           uint32_t nret, target_ulong rets)
 {
-    ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
+    ICSState *ics = spapr->ics;
     uint32_t nr, srcno, server, priority;
 
     if ((nargs != 3) || (nret != 1)) {
@@ -134,7 +135,7 @@ static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     server = xics_get_cpu_index_by_dt_id(rtas_ld(args, 1));
     priority = rtas_ld(args, 2);
 
-    if (!ics_valid_irq(ics, nr) || (server >= ics->xics->nr_servers)
+    if (!ics_valid_irq(ics, nr) || !xics_icp_get(XICS_FABRIC(spapr), server)
         || (priority > 0xff)) {
         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
         return;
@@ -151,7 +152,7 @@ static void rtas_get_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                           uint32_t nargs, target_ulong args,
                           uint32_t nret, target_ulong rets)
 {
-    ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
+    ICSState *ics = spapr->ics;
     uint32_t nr, srcno;
 
     if ((nargs != 1) || (nret != 3)) {
@@ -181,7 +182,7 @@ static void rtas_int_off(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                          uint32_t nargs, target_ulong args,
                          uint32_t nret, target_ulong rets)
 {
-    ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
+    ICSState *ics = spapr->ics;
     uint32_t nr, srcno;
 
     if ((nargs != 1) || (nret != 1)) {
@@ -212,7 +213,7 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                         uint32_t nargs, target_ulong args,
                         uint32_t nret, target_ulong rets)
 {
-    ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
+    ICSState *ics = spapr->ics;
     uint32_t nr, srcno;
 
     if ((nargs != 1) || (nret != 1)) {
@@ -239,36 +240,8 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 }
 
-static void xics_spapr_set_nr_irqs(XICSState *xics, uint32_t nr_irqs,
-                                   Error **errp)
+int xics_spapr_init(sPAPRMachineState *spapr, Error **errp)
 {
-    ICSState *ics = QLIST_FIRST(&xics->ics);
-
-    /* This needs to be deprecated ... */
-    xics->nr_irqs = nr_irqs;
-    if (ics) {
-        ics->nr_irqs = nr_irqs;
-    }
-}
-
-static void xics_spapr_set_nr_servers(XICSState *xics, uint32_t nr_servers,
-                                      Error **errp)
-{
-    xics_set_nr_servers(xics, nr_servers, TYPE_ICP, errp);
-}
-
-static void xics_spapr_realize(DeviceState *dev, Error **errp)
-{
-    XICSState *xics = XICS_SPAPR(dev);
-    ICSState *ics;
-    Error *error = NULL;
-    int i;
-
-    if (!xics->nr_servers) {
-        error_setg(errp, "Number of servers needs to be greater 0");
-        return;
-    }
-
     /* Registration of global state belongs into realize */
     spapr_rtas_register(RTAS_IBM_SET_XIVE, "ibm,set-xive", rtas_set_xive);
     spapr_rtas_register(RTAS_IBM_GET_XIVE, "ibm,get-xive", rtas_get_xive);
@@ -281,55 +254,9 @@ static void xics_spapr_realize(DeviceState *dev, Error **errp)
     spapr_register_hypercall(H_XIRR_X, h_xirr_x);
     spapr_register_hypercall(H_EOI, h_eoi);
     spapr_register_hypercall(H_IPOLL, h_ipoll);
-
-    QLIST_FOREACH(ics, &xics->ics, list) {
-        object_property_set_bool(OBJECT(ics), true, "realized", &error);
-        if (error) {
-            error_propagate(errp, error);
-            return;
-        }
-    }
-
-    for (i = 0; i < xics->nr_servers; i++) {
-        object_property_set_bool(OBJECT(&xics->ss[i]), true, "realized",
-                                 &error);
-        if (error) {
-            error_propagate(errp, error);
-            return;
-        }
-    }
-}
-
-static void xics_spapr_initfn(Object *obj)
-{
-    XICSState *xics = XICS_SPAPR(obj);
-    ICSState *ics;
-
-    ics = ICS_SIMPLE(object_new(TYPE_ICS_SIMPLE));
-    object_property_add_child(obj, "ics", OBJECT(ics), NULL);
-    ics->xics = xics;
-    QLIST_INSERT_HEAD(&xics->ics, ics, list);
-}
-
-static void xics_spapr_class_init(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    XICSStateClass *xsc = XICS_SPAPR_CLASS(oc);
-
-    dc->realize = xics_spapr_realize;
-    xsc->set_nr_irqs = xics_spapr_set_nr_irqs;
-    xsc->set_nr_servers = xics_spapr_set_nr_servers;
+    return 0;
 }
 
-static const TypeInfo xics_spapr_info = {
-    .name          = TYPE_XICS_SPAPR,
-    .parent        = TYPE_XICS_COMMON,
-    .instance_size = sizeof(XICSState),
-    .class_size = sizeof(XICSStateClass),
-    .class_init    = xics_spapr_class_init,
-    .instance_init = xics_spapr_initfn,
-};
-
 #define ICS_IRQ_FREE(ics, srcno)   \
     (!((ics)->irqs[(srcno)].flags & (XICS_FLAGS_IRQ_MASK)))
 
@@ -354,9 +281,8 @@ static int ics_find_free_block(ICSState *ics, int num, int alignnum)
     return -1;
 }
 
-int xics_spapr_alloc(XICSState *xics, int irq_hint, bool lsi, Error **errp)
+int spapr_ics_alloc(ICSState *ics, int irq_hint, bool lsi, Error **errp)
 {
-    ICSState *ics = QLIST_FIRST(&xics->ics);
     int irq;
 
     if (!ics) {
@@ -387,10 +313,9 @@ int xics_spapr_alloc(XICSState *xics, int irq_hint, bool lsi, Error **errp)
  * Allocate block of consecutive IRQs, and return the number of the first IRQ in
  * the block. If align==true, aligns the first IRQ number to num.
  */
-int xics_spapr_alloc_block(XICSState *xics, int num, bool lsi, bool align,
-                           Error **errp)
+int spapr_ics_alloc_block(ICSState *ics, int num, bool lsi,
+                          bool align, Error **errp)
 {
-    ICSState *ics = QLIST_FIRST(&xics->ics);
     int i, first = -1;
 
     if (!ics) {
@@ -440,20 +365,18 @@ static void ics_free(ICSState *ics, int srcno, int num)
     }
 }
 
-void xics_spapr_free(XICSState *xics, int irq, int num)
+void spapr_ics_free(ICSState *ics, int irq, int num)
 {
-    ICSState *ics = xics_find_source(xics, irq);
-
-    if (ics) {
+    if (ics_valid_irq(ics, irq)) {
         trace_xics_ics_free(0, irq, num);
         ics_free(ics, irq - ics->offset, num);
     }
 }
 
-void spapr_dt_xics(XICSState *xics, void *fdt, uint32_t phandle)
+void spapr_dt_xics(int nr_servers, void *fdt, uint32_t phandle)
 {
     uint32_t interrupt_server_ranges_prop[] = {
-        0, cpu_to_be32(xics->nr_servers),
+        0, cpu_to_be32(nr_servers),
     };
     int node;
 
@@ -470,10 +393,3 @@ void spapr_dt_xics(XICSState *xics, void *fdt, uint32_t phandle)
     _FDT(fdt_setprop_cell(fdt, node, "linux,phandle", phandle));
     _FDT(fdt_setprop_cell(fdt, node, "phandle", phandle));
 }
-
-static void xics_spapr_register_types(void)
-{
-    type_register_static(&xics_spapr_info);
-}
-
-type_init(xics_spapr_register_types)
diff --git a/hw/mips/Makefile.objs b/hw/mips/Makefile.objs
index 9352a1c062..48cd2ef50e 100644
--- a/hw/mips/Makefile.objs
+++ b/hw/mips/Makefile.objs
@@ -4,3 +4,4 @@ obj-$(CONFIG_JAZZ) += mips_jazz.o
 obj-$(CONFIG_FULONG) += mips_fulong2e.o
 obj-y += gt64xxx_pci.o
 obj-$(CONFIG_MIPS_CPS) += cps.o
+obj-$(CONFIG_MIPS_BOSTON) += boston.o
diff --git a/hw/mips/boston.c b/hw/mips/boston.c
new file mode 100644
index 0000000000..83f7b82386
--- /dev/null
+++ b/hw/mips/boston.c
@@ -0,0 +1,577 @@
+/*
+ * MIPS Boston development board emulation.
+ *
+ * Copyright (c) 2016 Imagination Technologies
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+#include "exec/address-spaces.h"
+#include "hw/boards.h"
+#include "hw/char/serial.h"
+#include "hw/hw.h"
+#include "hw/ide/pci.h"
+#include "hw/ide/ahci.h"
+#include "hw/loader.h"
+#include "hw/loader-fit.h"
+#include "hw/mips/cps.h"
+#include "hw/mips/cpudevs.h"
+#include "hw/pci-host/xilinx-pcie.h"
+#include "qapi/error.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "sysemu/char.h"
+#include "sysemu/device_tree.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/qtest.h"
+
+#include <libfdt.h>
+
+#define TYPE_MIPS_BOSTON "mips-boston"
+#define BOSTON(obj) OBJECT_CHECK(BostonState, (obj), TYPE_MIPS_BOSTON)
+
+typedef struct {
+    SysBusDevice parent_obj;
+
+    MachineState *mach;
+    MIPSCPSState *cps;
+    SerialState *uart;
+
+    CharBackend lcd_display;
+    char lcd_content[8];
+    bool lcd_inited;
+
+    hwaddr kernel_entry;
+    hwaddr fdt_base;
+} BostonState;
+
+enum boston_plat_reg {
+    PLAT_FPGA_BUILD     = 0x00,
+    PLAT_CORE_CL        = 0x04,
+    PLAT_WRAPPER_CL     = 0x08,
+    PLAT_SYSCLK_STATUS  = 0x0c,
+    PLAT_SOFTRST_CTL    = 0x10,
+#define PLAT_SOFTRST_CTL_SYSRESET       (1 << 4)
+    PLAT_DDR3_STATUS    = 0x14,
+#define PLAT_DDR3_STATUS_LOCKED         (1 << 0)
+#define PLAT_DDR3_STATUS_CALIBRATED     (1 << 2)
+    PLAT_PCIE_STATUS    = 0x18,
+#define PLAT_PCIE_STATUS_PCIE0_LOCKED   (1 << 0)
+#define PLAT_PCIE_STATUS_PCIE1_LOCKED   (1 << 8)
+#define PLAT_PCIE_STATUS_PCIE2_LOCKED   (1 << 16)
+    PLAT_FLASH_CTL      = 0x1c,
+    PLAT_SPARE0         = 0x20,
+    PLAT_SPARE1         = 0x24,
+    PLAT_SPARE2         = 0x28,
+    PLAT_SPARE3         = 0x2c,
+    PLAT_MMCM_DIV       = 0x30,
+#define PLAT_MMCM_DIV_CLK0DIV_SHIFT     0
+#define PLAT_MMCM_DIV_INPUT_SHIFT       8
+#define PLAT_MMCM_DIV_MUL_SHIFT         16
+#define PLAT_MMCM_DIV_CLK1DIV_SHIFT     24
+    PLAT_BUILD_CFG      = 0x34,
+#define PLAT_BUILD_CFG_IOCU_EN          (1 << 0)
+#define PLAT_BUILD_CFG_PCIE0_EN         (1 << 1)
+#define PLAT_BUILD_CFG_PCIE1_EN         (1 << 2)
+#define PLAT_BUILD_CFG_PCIE2_EN         (1 << 3)
+    PLAT_DDR_CFG        = 0x38,
+#define PLAT_DDR_CFG_SIZE               (0xf << 0)
+#define PLAT_DDR_CFG_MHZ                (0xfff << 4)
+    PLAT_NOC_PCIE0_ADDR = 0x3c,
+    PLAT_NOC_PCIE1_ADDR = 0x40,
+    PLAT_NOC_PCIE2_ADDR = 0x44,
+    PLAT_SYS_CTL        = 0x48,
+};
+
+static void boston_lcd_event(void *opaque, int event)
+{
+    BostonState *s = opaque;
+    if (event == CHR_EVENT_OPENED && !s->lcd_inited) {
+        qemu_chr_fe_printf(&s->lcd_display, "        ");
+        s->lcd_inited = true;
+    }
+}
+
+static uint64_t boston_lcd_read(void *opaque, hwaddr addr,
+                                unsigned size)
+{
+    BostonState *s = opaque;
+    uint64_t val = 0;
+
+    switch (size) {
+    case 8:
+        val |= (uint64_t)s->lcd_content[(addr + 7) & 0x7] << 56;
+        val |= (uint64_t)s->lcd_content[(addr + 6) & 0x7] << 48;
+        val |= (uint64_t)s->lcd_content[(addr + 5) & 0x7] << 40;
+        val |= (uint64_t)s->lcd_content[(addr + 4) & 0x7] << 32;
+        /* fall through */
+    case 4:
+        val |= (uint64_t)s->lcd_content[(addr + 3) & 0x7] << 24;
+        val |= (uint64_t)s->lcd_content[(addr + 2) & 0x7] << 16;
+        /* fall through */
+    case 2:
+        val |= (uint64_t)s->lcd_content[(addr + 1) & 0x7] << 8;
+        /* fall through */
+    case 1:
+        val |= (uint64_t)s->lcd_content[(addr + 0) & 0x7];
+        break;
+    }
+
+    return val;
+}
+
+static void boston_lcd_write(void *opaque, hwaddr addr,
+                             uint64_t val, unsigned size)
+{
+    BostonState *s = opaque;
+
+    switch (size) {
+    case 8:
+        s->lcd_content[(addr + 7) & 0x7] = val >> 56;
+        s->lcd_content[(addr + 6) & 0x7] = val >> 48;
+        s->lcd_content[(addr + 5) & 0x7] = val >> 40;
+        s->lcd_content[(addr + 4) & 0x7] = val >> 32;
+        /* fall through */
+    case 4:
+        s->lcd_content[(addr + 3) & 0x7] = val >> 24;
+        s->lcd_content[(addr + 2) & 0x7] = val >> 16;
+        /* fall through */
+    case 2:
+        s->lcd_content[(addr + 1) & 0x7] = val >> 8;
+        /* fall through */
+    case 1:
+        s->lcd_content[(addr + 0) & 0x7] = val;
+        break;
+    }
+
+    qemu_chr_fe_printf(&s->lcd_display,
+                       "\r%-8.8s", s->lcd_content);
+}
+
+static const MemoryRegionOps boston_lcd_ops = {
+    .read = boston_lcd_read,
+    .write = boston_lcd_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static uint64_t boston_platreg_read(void *opaque, hwaddr addr,
+                                    unsigned size)
+{
+    BostonState *s = opaque;
+    uint32_t gic_freq, val;
+
+    if (size != 4) {
+        qemu_log_mask(LOG_UNIMP, "%uB platform register read", size);
+        return 0;
+    }
+
+    switch (addr & 0xffff) {
+    case PLAT_FPGA_BUILD:
+    case PLAT_CORE_CL:
+    case PLAT_WRAPPER_CL:
+        return 0;
+    case PLAT_DDR3_STATUS:
+        return PLAT_DDR3_STATUS_LOCKED | PLAT_DDR3_STATUS_CALIBRATED;
+    case PLAT_MMCM_DIV:
+        gic_freq = mips_gictimer_get_freq(s->cps->gic.gic_timer) / 1000000;
+        val = gic_freq << PLAT_MMCM_DIV_INPUT_SHIFT;
+        val |= 1 << PLAT_MMCM_DIV_MUL_SHIFT;
+        val |= 1 << PLAT_MMCM_DIV_CLK0DIV_SHIFT;
+        val |= 1 << PLAT_MMCM_DIV_CLK1DIV_SHIFT;
+        return val;
+    case PLAT_BUILD_CFG:
+        val = PLAT_BUILD_CFG_PCIE0_EN;
+        val |= PLAT_BUILD_CFG_PCIE1_EN;
+        val |= PLAT_BUILD_CFG_PCIE2_EN;
+        return val;
+    case PLAT_DDR_CFG:
+        val = s->mach->ram_size / G_BYTE;
+        assert(!(val & ~PLAT_DDR_CFG_SIZE));
+        val |= PLAT_DDR_CFG_MHZ;
+        return val;
+    default:
+        qemu_log_mask(LOG_UNIMP, "Read platform register 0x%" HWADDR_PRIx,
+                      addr & 0xffff);
+        return 0;
+    }
+}
+
+static void boston_platreg_write(void *opaque, hwaddr addr,
+                                 uint64_t val, unsigned size)
+{
+    if (size != 4) {
+        qemu_log_mask(LOG_UNIMP, "%uB platform register write", size);
+        return;
+    }
+
+    switch (addr & 0xffff) {
+    case PLAT_FPGA_BUILD:
+    case PLAT_CORE_CL:
+    case PLAT_WRAPPER_CL:
+    case PLAT_DDR3_STATUS:
+    case PLAT_PCIE_STATUS:
+    case PLAT_MMCM_DIV:
+    case PLAT_BUILD_CFG:
+    case PLAT_DDR_CFG:
+        /* read only */
+        break;
+    case PLAT_SOFTRST_CTL:
+        if (val & PLAT_SOFTRST_CTL_SYSRESET) {
+            qemu_system_reset_request();
+        }
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "Write platform register 0x%" HWADDR_PRIx
+                      " = 0x%" PRIx64, addr & 0xffff, val);
+        break;
+    }
+}
+
+static const MemoryRegionOps boston_platreg_ops = {
+    .read = boston_platreg_read,
+    .write = boston_platreg_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static void boston_flash_write(void *opaque, hwaddr addr,
+                               uint64_t val, unsigned size)
+{
+}
+
+static const MemoryRegionOps boston_flash_ops = {
+    .write = boston_flash_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static const TypeInfo boston_device = {
+    .name          = TYPE_MIPS_BOSTON,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(BostonState),
+};
+
+static void boston_register_types(void)
+{
+    type_register_static(&boston_device);
+}
+type_init(boston_register_types)
+
+static void gen_firmware(uint32_t *p, hwaddr kernel_entry, hwaddr fdt_addr,
+                         bool is_64b)
+{
+    const uint32_t cm_base = 0x16100000;
+    const uint32_t gic_base = 0x16120000;
+    const uint32_t cpc_base = 0x16200000;
+
+    /* Move CM GCRs */
+    if (is_64b) {
+        stl_p(p++, 0x40287803);                 /* dmfc0 $8, CMGCRBase */
+        stl_p(p++, 0x00084138);                 /* dsll $8, $8, 4 */
+    } else {
+        stl_p(p++, 0x40087803);                 /* mfc0 $8, CMGCRBase */
+        stl_p(p++, 0x00084100);                 /* sll  $8, $8, 4 */
+    }
+    stl_p(p++, 0x3c09a000);                     /* lui  $9, 0xa000 */
+    stl_p(p++, 0x01094025);                     /* or   $8, $9 */
+    stl_p(p++, 0x3c0a0000 | (cm_base >> 16));   /* lui  $10, cm_base >> 16 */
+    if (is_64b) {
+        stl_p(p++, 0xfd0a0008);                 /* sd   $10, 0x8($8) */
+    } else {
+        stl_p(p++, 0xad0a0008);                 /* sw   $10, 0x8($8) */
+    }
+    stl_p(p++, 0x012a4025);                     /* or   $8, $10 */
+
+    /* Move & enable GIC GCRs */
+    stl_p(p++, 0x3c090000 | (gic_base >> 16));  /* lui  $9, gic_base >> 16 */
+    stl_p(p++, 0x35290001);                     /* ori  $9, 0x1 */
+    if (is_64b) {
+        stl_p(p++, 0xfd090080);                 /* sd   $9, 0x80($8) */
+    } else {
+        stl_p(p++, 0xad090080);                 /* sw   $9, 0x80($8) */
+    }
+
+    /* Move & enable CPC GCRs */
+    stl_p(p++, 0x3c090000 | (cpc_base >> 16));  /* lui  $9, cpc_base >> 16 */
+    stl_p(p++, 0x35290001);                     /* ori  $9, 0x1 */
+    if (is_64b) {
+        stl_p(p++, 0xfd090088);                 /* sd   $9, 0x88($8) */
+    } else {
+        stl_p(p++, 0xad090088);                 /* sw   $9, 0x88($8) */
+    }
+
+    /*
+     * Setup argument registers to follow the UHI boot protocol:
+     *
+     * a0/$4 = -2
+     * a1/$5 = virtual address of FDT
+     * a2/$6 = 0
+     * a3/$7 = 0
+     */
+    stl_p(p++, 0x2404fffe);                     /* li   $4, -2 */
+                                                /* lui  $5, hi(fdt_addr) */
+    stl_p(p++, 0x3c050000 | ((fdt_addr >> 16) & 0xffff));
+    if (fdt_addr & 0xffff) {                    /* ori  $5, lo(fdt_addr) */
+        stl_p(p++, 0x34a50000 | (fdt_addr & 0xffff));
+    }
+    stl_p(p++, 0x34060000);                     /* li   $6, 0 */
+    stl_p(p++, 0x34070000);                     /* li   $7, 0 */
+
+    /* Load kernel entry address & jump to it */
+                                                /* lui  $25, hi(kernel_entry) */
+    stl_p(p++, 0x3c190000 | ((kernel_entry >> 16) & 0xffff));
+                                                /* ori  $25, lo(kernel_entry) */
+    stl_p(p++, 0x37390000 | (kernel_entry & 0xffff));
+    stl_p(p++, 0x03200009);                     /* jr   $25 */
+}
+
+static const void *boston_fdt_filter(void *opaque, const void *fdt_orig,
+                                     const void *match_data, hwaddr *load_addr)
+{
+    BostonState *s = BOSTON(opaque);
+    MachineState *machine = s->mach;
+    const char *cmdline;
+    int err;
+    void *fdt;
+    size_t fdt_sz, ram_low_sz, ram_high_sz;
+
+    fdt_sz = fdt_totalsize(fdt_orig) * 2;
+    fdt = g_malloc0(fdt_sz);
+
+    err = fdt_open_into(fdt_orig, fdt, fdt_sz);
+    if (err) {
+        fprintf(stderr, "unable to open FDT\n");
+        return NULL;
+    }
+
+    cmdline = (machine->kernel_cmdline && machine->kernel_cmdline[0])
+            ? machine->kernel_cmdline : " ";
+    err = qemu_fdt_setprop_string(fdt, "/chosen", "bootargs", cmdline);
+    if (err < 0) {
+        fprintf(stderr, "couldn't set /chosen/bootargs\n");
+        return NULL;
+    }
+
+    ram_low_sz = MIN(256 * M_BYTE, machine->ram_size);
+    ram_high_sz = machine->ram_size - ram_low_sz;
+    qemu_fdt_setprop_sized_cells(fdt, "/memory@0", "reg",
+                                 1, 0x00000000, 1, ram_low_sz,
+                                 1, 0x90000000, 1, ram_high_sz);
+
+    fdt = g_realloc(fdt, fdt_totalsize(fdt));
+    qemu_fdt_dumpdtb(fdt, fdt_sz);
+
+    s->fdt_base = *load_addr;
+
+    return fdt;
+}
+
+static const void *boston_kernel_filter(void *opaque, const void *kernel,
+                                        hwaddr *load_addr, hwaddr *entry_addr)
+{
+    BostonState *s = BOSTON(opaque);
+
+    s->kernel_entry = *entry_addr;
+
+    return kernel;
+}
+
+static const struct fit_loader_match boston_matches[] = {
+    { "img,boston" },
+    { NULL },
+};
+
+static const struct fit_loader boston_fit_loader = {
+    .matches = boston_matches,
+    .addr_to_phys = cpu_mips_kseg0_to_phys,
+    .fdt_filter = boston_fdt_filter,
+    .kernel_filter = boston_kernel_filter,
+};
+
+static inline XilinxPCIEHost *
+xilinx_pcie_init(MemoryRegion *sys_mem, uint32_t bus_nr,
+                 hwaddr cfg_base, uint64_t cfg_size,
+                 hwaddr mmio_base, uint64_t mmio_size,
+                 qemu_irq irq, bool link_up)
+{
+    DeviceState *dev;
+    MemoryRegion *cfg, *mmio;
+
+    dev = qdev_create(NULL, TYPE_XILINX_PCIE_HOST);
+
+    qdev_prop_set_uint32(dev, "bus_nr", bus_nr);
+    qdev_prop_set_uint64(dev, "cfg_base", cfg_base);
+    qdev_prop_set_uint64(dev, "cfg_size", cfg_size);
+    qdev_prop_set_uint64(dev, "mmio_base", mmio_base);
+    qdev_prop_set_uint64(dev, "mmio_size", mmio_size);
+    qdev_prop_set_bit(dev, "link_up", link_up);
+
+    qdev_init_nofail(dev);
+
+    cfg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
+    memory_region_add_subregion_overlap(sys_mem, cfg_base, cfg, 0);
+
+    mmio = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 1);
+    memory_region_add_subregion_overlap(sys_mem, 0, mmio, 0);
+
+    qdev_connect_gpio_out_named(dev, "interrupt_out", 0, irq);
+
+    return XILINX_PCIE_HOST(dev);
+}
+
+static void boston_mach_init(MachineState *machine)
+{
+    DeviceState *dev;
+    BostonState *s;
+    Error *err = NULL;
+    const char *cpu_model;
+    MemoryRegion *flash, *ddr, *ddr_low_alias, *lcd, *platreg;
+    MemoryRegion *sys_mem = get_system_memory();
+    XilinxPCIEHost *pcie2;
+    PCIDevice *ahci;
+    DriveInfo *hd[6];
+    Chardev *chr;
+    int fw_size, fit_err;
+    bool is_64b;
+
+    if ((machine->ram_size % G_BYTE) ||
+        (machine->ram_size > (2 * G_BYTE))) {
+        error_report("Memory size must be 1GB or 2GB");
+        exit(1);
+    }
+
+    cpu_model = machine->cpu_model ?: "I6400";
+
+    dev = qdev_create(NULL, TYPE_MIPS_BOSTON);
+    qdev_init_nofail(dev);
+
+    s = BOSTON(dev);
+    s->mach = machine;
+    s->cps = g_new0(MIPSCPSState, 1);
+
+    if (!cpu_supports_cps_smp(cpu_model)) {
+        error_report("Boston requires CPUs which support CPS");
+        exit(1);
+    }
+
+    is_64b = cpu_supports_isa(cpu_model, ISA_MIPS64);
+
+    object_initialize(s->cps, sizeof(MIPSCPSState), TYPE_MIPS_CPS);
+    qdev_set_parent_bus(DEVICE(s->cps), sysbus_get_default());
+
+    object_property_set_str(OBJECT(s->cps), cpu_model, "cpu-model", &err);
+    object_property_set_int(OBJECT(s->cps), smp_cpus, "num-vp", &err);
+    object_property_set_bool(OBJECT(s->cps), true, "realized", &err);
+
+    if (err != NULL) {
+        error_report("%s", error_get_pretty(err));
+        exit(1);
+    }
+
+    sysbus_mmio_map_overlap(SYS_BUS_DEVICE(s->cps), 0, 0, 1);
+
+    flash =  g_new(MemoryRegion, 1);
+    memory_region_init_rom_device(flash, NULL, &boston_flash_ops, s,
+                                  "boston.flash", 128 * M_BYTE, &err);
+    memory_region_add_subregion_overlap(sys_mem, 0x18000000, flash, 0);
+
+    ddr = g_new(MemoryRegion, 1);
+    memory_region_allocate_system_memory(ddr, NULL, "boston.ddr",
+                                         machine->ram_size);
+    memory_region_add_subregion_overlap(sys_mem, 0x80000000, ddr, 0);
+
+    ddr_low_alias = g_new(MemoryRegion, 1);
+    memory_region_init_alias(ddr_low_alias, NULL, "boston_low.ddr",
+                             ddr, 0, MIN(machine->ram_size, (256 * M_BYTE)));
+    memory_region_add_subregion_overlap(sys_mem, 0, ddr_low_alias, 0);
+
+    xilinx_pcie_init(sys_mem, 0,
+                     0x10000000, 32 * M_BYTE,
+                     0x40000000, 1 * G_BYTE,
+                     get_cps_irq(s->cps, 2), false);
+
+    xilinx_pcie_init(sys_mem, 1,
+                     0x12000000, 32 * M_BYTE,
+                     0x20000000, 512 * M_BYTE,
+                     get_cps_irq(s->cps, 1), false);
+
+    pcie2 = xilinx_pcie_init(sys_mem, 2,
+                             0x14000000, 32 * M_BYTE,
+                             0x16000000, 1 * M_BYTE,
+                             get_cps_irq(s->cps, 0), true);
+
+    platreg = g_new(MemoryRegion, 1);
+    memory_region_init_io(platreg, NULL, &boston_platreg_ops, s,
+                          "boston-platregs", 0x1000);
+    memory_region_add_subregion_overlap(sys_mem, 0x17ffd000, platreg, 0);
+
+    if (!serial_hds[0]) {
+        serial_hds[0] = qemu_chr_new("serial0", "null");
+    }
+
+    s->uart = serial_mm_init(sys_mem, 0x17ffe000, 2,
+                             get_cps_irq(s->cps, 3), 10000000,
+                             serial_hds[0], DEVICE_NATIVE_ENDIAN);
+
+    lcd = g_new(MemoryRegion, 1);
+    memory_region_init_io(lcd, NULL, &boston_lcd_ops, s, "boston-lcd", 0x8);
+    memory_region_add_subregion_overlap(sys_mem, 0x17fff000, lcd, 0);
+
+    chr = qemu_chr_new("lcd", "vc:320x240");
+    qemu_chr_fe_init(&s->lcd_display, chr, NULL);
+    qemu_chr_fe_set_handlers(&s->lcd_display, NULL, NULL,
+                             boston_lcd_event, s, NULL, true);
+
+    ahci = pci_create_simple_multifunction(&PCI_BRIDGE(&pcie2->root)->sec_bus,
+                                           PCI_DEVFN(0, 0),
+                                           true, TYPE_ICH9_AHCI);
+    g_assert(ARRAY_SIZE(hd) == ICH_AHCI(ahci)->ahci.ports);
+    ide_drive_get(hd, ICH_AHCI(ahci)->ahci.ports);
+    ahci_ide_create_devs(ahci, hd);
+
+    if (machine->firmware) {
+        fw_size = load_image_targphys(machine->firmware,
+                                      0x1fc00000, 4 * M_BYTE);
+        if (fw_size == -1) {
+            error_printf("unable to load firmware image '%s'\n",
+                          machine->firmware);
+            exit(1);
+        }
+    } else if (machine->kernel_filename) {
+        fit_err = load_fit(&boston_fit_loader, machine->kernel_filename, s);
+        if (fit_err) {
+            error_printf("unable to load FIT image\n");
+            exit(1);
+        }
+
+        gen_firmware(memory_region_get_ram_ptr(flash) + 0x7c00000,
+                     s->kernel_entry, s->fdt_base, is_64b);
+    } else if (!qtest_enabled()) {
+        error_printf("Please provide either a -kernel or -bios argument\n");
+        exit(1);
+    }
+}
+
+static void boston_mach_class_init(MachineClass *mc)
+{
+    mc->desc = "MIPS Boston";
+    mc->init = boston_mach_init;
+    mc->block_default_type = IF_IDE;
+    mc->default_ram_size = 1 * G_BYTE;
+    mc->max_cpus = 16;
+}
+
+DEFINE_MACHINE("boston", boston_mach_class_init)
diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs
index 898e4ccfb1..c8b489390f 100644
--- a/hw/misc/Makefile.objs
+++ b/hw/misc/Makefile.objs
@@ -26,7 +26,7 @@ obj-$(CONFIG_IVSHMEM) += ivshmem.o
 obj-$(CONFIG_REALVIEW) += arm_sysctl.o
 obj-$(CONFIG_NSERIES) += cbus.o
 obj-$(CONFIG_ECCMEMCTL) += eccmemctl.o
-obj-$(CONFIG_EXYNOS4) += exynos4210_pmu.o
+obj-$(CONFIG_EXYNOS4) += exynos4210_pmu.o exynos4210_clk.o
 obj-$(CONFIG_IMX) += imx_ccm.o
 obj-$(CONFIG_IMX) += imx31_ccm.o
 obj-$(CONFIG_IMX) += imx25_ccm.o
@@ -42,6 +42,7 @@ obj-$(CONFIG_OMAP) += omap_sdrc.o
 obj-$(CONFIG_OMAP) += omap_tap.o
 obj-$(CONFIG_RASPI) += bcm2835_mbox.o
 obj-$(CONFIG_RASPI) += bcm2835_property.o
+obj-$(CONFIG_RASPI) += bcm2835_rng.o
 obj-$(CONFIG_SLAVIO) += slavio_misc.o
 obj-$(CONFIG_ZYNQ) += zynq_slcr.o
 obj-$(CONFIG_ZYNQ) += zynq-xadc.o
diff --git a/hw/misc/bcm2835_rng.c b/hw/misc/bcm2835_rng.c
new file mode 100644
index 0000000000..4d62143b24
--- /dev/null
+++ b/hw/misc/bcm2835_rng.c
@@ -0,0 +1,149 @@
+/*
+ * BCM2835 Random Number Generator emulation
+ *
+ * Copyright (C) 2017 Marcin Chojnacki <marcinch7@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "crypto/random.h"
+#include "hw/misc/bcm2835_rng.h"
+
+static uint32_t get_random_bytes(void)
+{
+    uint32_t res;
+    Error *err = NULL;
+
+    if (qcrypto_random_bytes((uint8_t *)&res, sizeof(res), &err) < 0) {
+        /* On failure we don't want to return the guest a non-random
+         * value in case they're really using it for cryptographic
+         * purposes, so the best we can do is die here.
+         * This shouldn't happen unless something's broken.
+         * In theory we could implement this device's full FIFO
+         * and interrupt semantics and then just stop filling the
+         * FIFO. That's a lot of work, though, so we assume any
+         * errors are systematic problems and trust that if we didn't
+         * fail as the guest inited then we won't fail later on
+         * mid-run.
+         */
+        error_report_err(err);
+        exit(1);
+    }
+    return res;
+}
+
+static uint64_t bcm2835_rng_read(void *opaque, hwaddr offset,
+                                 unsigned size)
+{
+    BCM2835RngState *s = (BCM2835RngState *)opaque;
+    uint32_t res = 0;
+
+    assert(size == 4);
+
+    switch (offset) {
+    case 0x0:    /* rng_ctrl */
+        res = s->rng_ctrl;
+        break;
+    case 0x4:    /* rng_status */
+        res = s->rng_status | (1 << 24);
+        break;
+    case 0x8:    /* rng_data */
+        res = get_random_bytes();
+        break;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "bcm2835_rng_read: Bad offset %x\n",
+                      (int)offset);
+        res = 0;
+        break;
+    }
+
+    return res;
+}
+
+static void bcm2835_rng_write(void *opaque, hwaddr offset,
+                              uint64_t value, unsigned size)
+{
+    BCM2835RngState *s = (BCM2835RngState *)opaque;
+
+    assert(size == 4);
+
+    switch (offset) {
+    case 0x0:    /* rng_ctrl */
+        s->rng_ctrl = value;
+        break;
+    case 0x4:    /* rng_status */
+        /* we shouldn't let the guest write to bits [31..20] */
+        s->rng_status &= ~0xFFFFF;        /* clear 20 lower bits */
+        s->rng_status |= value & 0xFFFFF; /* set them to new value */
+        break;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "bcm2835_rng_write: Bad offset %x\n",
+                      (int)offset);
+        break;
+    }
+}
+
+static const MemoryRegionOps bcm2835_rng_ops = {
+    .read = bcm2835_rng_read,
+    .write = bcm2835_rng_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static const VMStateDescription vmstate_bcm2835_rng = {
+    .name = TYPE_BCM2835_RNG,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(rng_ctrl, BCM2835RngState),
+        VMSTATE_UINT32(rng_status, BCM2835RngState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void bcm2835_rng_init(Object *obj)
+{
+    BCM2835RngState *s = BCM2835_RNG(obj);
+
+    memory_region_init_io(&s->iomem, obj, &bcm2835_rng_ops, s,
+                          TYPE_BCM2835_RNG, 0x10);
+    sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem);
+}
+
+static void bcm2835_rng_reset(DeviceState *dev)
+{
+    BCM2835RngState *s = BCM2835_RNG(dev);
+
+    s->rng_ctrl = 0;
+    s->rng_status = 0;
+}
+
+static void bcm2835_rng_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->reset = bcm2835_rng_reset;
+    dc->vmsd = &vmstate_bcm2835_rng;
+}
+
+static TypeInfo bcm2835_rng_info = {
+    .name          = TYPE_BCM2835_RNG,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(BCM2835RngState),
+    .class_init    = bcm2835_rng_class_init,
+    .instance_init = bcm2835_rng_init,
+};
+
+static void bcm2835_rng_register_types(void)
+{
+    type_register_static(&bcm2835_rng_info);
+}
+
+type_init(bcm2835_rng_register_types)
diff --git a/hw/misc/exynos4210_clk.c b/hw/misc/exynos4210_clk.c
new file mode 100644
index 0000000000..81862c0ada
--- /dev/null
+++ b/hw/misc/exynos4210_clk.c
@@ -0,0 +1,164 @@
+/*
+ *  Exynos4210 Clock Controller Emulation
+ *
+ *  Copyright (c) 2017 Krzysztof Kozlowski <krzk@kernel.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the
+ *  Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/sysbus.h"
+#include "qemu/log.h"
+
+#define TYPE_EXYNOS4210_CLK             "exynos4210.clk"
+#define EXYNOS4210_CLK(obj) \
+    OBJECT_CHECK(Exynos4210ClkState, (obj), TYPE_EXYNOS4210_CLK)
+
+#define CLK_PLL_LOCKED                  BIT(29)
+
+#define EXYNOS4210_CLK_REGS_MEM_SIZE    0x15104
+
+typedef struct Exynos4210Reg {
+    const char   *name; /* for debug only */
+    uint32_t     offset;
+    uint32_t     reset_value;
+} Exynos4210Reg;
+
+/* Clock controller register base: 0x10030000 */
+static const Exynos4210Reg exynos4210_clk_regs[] = {
+    {"EPLL_LOCK",                     0xc010, 0x00000fff},
+    {"VPLL_LOCK",                     0xc020, 0x00000fff},
+    {"EPLL_CON0",                     0xc110, 0x00300301 | CLK_PLL_LOCKED},
+    {"EPLL_CON1",                     0xc114, 0x00000000},
+    {"VPLL_CON0",                     0xc120, 0x00240201 | CLK_PLL_LOCKED},
+    {"VPLL_CON1",                     0xc124, 0x66010464},
+    {"APLL_LOCK",                    0x14000, 0x00000fff},
+    {"MPLL_LOCK",                    0x14004, 0x00000fff},
+    {"APLL_CON0",                    0x14100, 0x00c80601 | CLK_PLL_LOCKED},
+    {"APLL_CON1",                    0x14104, 0x0000001c},
+    {"MPLL_CON0",                    0x14108, 0x00c80601 | CLK_PLL_LOCKED},
+    {"MPLL_CON1",                    0x1410c, 0x0000001c},
+};
+
+#define EXYNOS4210_REGS_NUM       ARRAY_SIZE(exynos4210_clk_regs)
+
+typedef struct Exynos4210ClkState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+    uint32_t reg[EXYNOS4210_REGS_NUM];
+} Exynos4210ClkState;
+
+static uint64_t exynos4210_clk_read(void *opaque, hwaddr offset,
+                                    unsigned size)
+{
+    const Exynos4210ClkState *s = (Exynos4210ClkState *)opaque;
+    const Exynos4210Reg *regs = exynos4210_clk_regs;
+    unsigned int i;
+
+    for (i = 0; i < EXYNOS4210_REGS_NUM; i++) {
+        if (regs->offset == offset) {
+            return s->reg[i];
+        }
+        regs++;
+    }
+    qemu_log_mask(LOG_GUEST_ERROR, "%s: bad read offset 0x%04x\n",
+                  __func__, (uint32_t)offset);
+    return 0;
+}
+
+static void exynos4210_clk_write(void *opaque, hwaddr offset,
+                                 uint64_t val, unsigned size)
+{
+    Exynos4210ClkState *s = (Exynos4210ClkState *)opaque;
+    const Exynos4210Reg *regs = exynos4210_clk_regs;
+    unsigned int i;
+
+    for (i = 0; i < EXYNOS4210_REGS_NUM; i++) {
+        if (regs->offset == offset) {
+            s->reg[i] = val;
+            return;
+        }
+        regs++;
+    }
+    qemu_log_mask(LOG_GUEST_ERROR, "%s: bad write offset 0x%04x\n",
+                  __func__, (uint32_t)offset);
+}
+
+static const MemoryRegionOps exynos4210_clk_ops = {
+    .read = exynos4210_clk_read,
+    .write = exynos4210_clk_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+        .unaligned = false
+    }
+};
+
+static void exynos4210_clk_reset(DeviceState *dev)
+{
+    Exynos4210ClkState *s = EXYNOS4210_CLK(dev);
+    unsigned int i;
+
+    /* Set default values for registers */
+    for (i = 0; i < EXYNOS4210_REGS_NUM; i++) {
+        s->reg[i] = exynos4210_clk_regs[i].reset_value;
+    }
+}
+
+static void exynos4210_clk_init(Object *obj)
+{
+    Exynos4210ClkState *s = EXYNOS4210_CLK(obj);
+    SysBusDevice *dev = SYS_BUS_DEVICE(obj);
+
+    /* memory mapping */
+    memory_region_init_io(&s->iomem, obj, &exynos4210_clk_ops, s,
+                          TYPE_EXYNOS4210_CLK, EXYNOS4210_CLK_REGS_MEM_SIZE);
+    sysbus_init_mmio(dev, &s->iomem);
+}
+
+static const VMStateDescription exynos4210_clk_vmstate = {
+    .name = TYPE_EXYNOS4210_CLK,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32_ARRAY(reg, Exynos4210ClkState, EXYNOS4210_REGS_NUM),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void exynos4210_clk_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->reset = exynos4210_clk_reset;
+    dc->vmsd = &exynos4210_clk_vmstate;
+}
+
+static const TypeInfo exynos4210_clk_info = {
+    .name          = TYPE_EXYNOS4210_CLK,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(Exynos4210ClkState),
+    .instance_init = exynos4210_clk_init,
+    .class_init    = exynos4210_clk_class_init,
+};
+
+static void exynos4210_clk_register(void)
+{
+    qemu_log_mask(LOG_GUEST_ERROR, "Clock init\n");
+    type_register_static(&exynos4210_clk_info);
+}
+
+type_init(exynos4210_clk_register)
diff --git a/hw/misc/imx6_src.c b/hw/misc/imx6_src.c
index 55b817b8d7..edbb756c36 100644
--- a/hw/misc/imx6_src.c
+++ b/hw/misc/imx6_src.c
@@ -14,6 +14,7 @@
 #include "qemu/bitops.h"
 #include "qemu/log.h"
 #include "arm-powerctl.h"
+#include "qom/cpu.h"
 
 #ifndef DEBUG_IMX6_SRC
 #define DEBUG_IMX6_SRC 0
@@ -113,6 +114,45 @@ static uint64_t imx6_src_read(void *opaque, hwaddr offset, unsigned size)
     return value;
 }
 
+
+/* The reset is asynchronous so we need to defer clearing the reset
+ * bit until the work is completed.
+ */
+
+struct SRCSCRResetInfo {
+    IMX6SRCState *s;
+    int reset_bit;
+};
+
+static void imx6_clear_reset_bit(CPUState *cpu, run_on_cpu_data data)
+{
+    struct SRCSCRResetInfo *ri = data.host_ptr;
+    IMX6SRCState *s = ri->s;
+
+    assert(qemu_mutex_iothread_locked());
+
+    s->regs[SRC_SCR] = deposit32(s->regs[SRC_SCR], ri->reset_bit, 1, 0);
+    DPRINTF("reg[%s] <= 0x%" PRIx32 "\n",
+            imx6_src_reg_name(SRC_SCR), s->regs[SRC_SCR]);
+
+    g_free(ri);
+}
+
+static void imx6_defer_clear_reset_bit(int cpuid,
+                                       IMX6SRCState *s,
+                                       unsigned long reset_shift)
+{
+    struct SRCSCRResetInfo *ri;
+
+    ri = g_malloc(sizeof(struct SRCSCRResetInfo));
+    ri->s = s;
+    ri->reset_bit = reset_shift;
+
+    async_run_on_cpu(arm_get_cpu_by_id(cpuid), imx6_clear_reset_bit,
+                     RUN_ON_CPU_HOST_PTR(ri));
+}
+
+
 static void imx6_src_write(void *opaque, hwaddr offset, uint64_t value,
                            unsigned size)
 {
@@ -153,7 +193,7 @@ static void imx6_src_write(void *opaque, hwaddr offset, uint64_t value,
                 arm_set_cpu_off(3);
             }
             /* We clear the reset bits as the processor changed state */
-            clear_bit(CORE3_RST_SHIFT, &current_value);
+            imx6_defer_clear_reset_bit(3, s, CORE3_RST_SHIFT);
             clear_bit(CORE3_RST_SHIFT, &change_mask);
         }
         if (EXTRACT(change_mask, CORE2_ENABLE)) {
@@ -162,11 +202,11 @@ static void imx6_src_write(void *opaque, hwaddr offset, uint64_t value,
                 arm_set_cpu_on(2, s->regs[SRC_GPR5], s->regs[SRC_GPR6],
                                3, false);
             } else {
-                /* CORE 3 is shut down */
+                /* CORE 2 is shut down */
                 arm_set_cpu_off(2);
             }
             /* We clear the reset bits as the processor changed state */
-            clear_bit(CORE2_RST_SHIFT, &current_value);
+            imx6_defer_clear_reset_bit(2, s, CORE2_RST_SHIFT);
             clear_bit(CORE2_RST_SHIFT, &change_mask);
         }
         if (EXTRACT(change_mask, CORE1_ENABLE)) {
@@ -175,28 +215,28 @@ static void imx6_src_write(void *opaque, hwaddr offset, uint64_t value,
                 arm_set_cpu_on(1, s->regs[SRC_GPR3], s->regs[SRC_GPR4],
                                3, false);
             } else {
-                /* CORE 3 is shut down */
+                /* CORE 1 is shut down */
                 arm_set_cpu_off(1);
             }
             /* We clear the reset bits as the processor changed state */
-            clear_bit(CORE1_RST_SHIFT, &current_value);
+            imx6_defer_clear_reset_bit(1, s, CORE1_RST_SHIFT);
             clear_bit(CORE1_RST_SHIFT, &change_mask);
         }
         if (EXTRACT(change_mask, CORE0_RST)) {
             arm_reset_cpu(0);
-            clear_bit(CORE0_RST_SHIFT, &current_value);
+            imx6_defer_clear_reset_bit(0, s, CORE0_RST_SHIFT);
         }
         if (EXTRACT(change_mask, CORE1_RST)) {
             arm_reset_cpu(1);
-            clear_bit(CORE1_RST_SHIFT, &current_value);
+            imx6_defer_clear_reset_bit(1, s, CORE1_RST_SHIFT);
         }
         if (EXTRACT(change_mask, CORE2_RST)) {
             arm_reset_cpu(2);
-            clear_bit(CORE2_RST_SHIFT, &current_value);
+            imx6_defer_clear_reset_bit(2, s, CORE2_RST_SHIFT);
         }
         if (EXTRACT(change_mask, CORE3_RST)) {
             arm_reset_cpu(3);
-            clear_bit(CORE3_RST_SHIFT, &current_value);
+            imx6_defer_clear_reset_bit(3, s, CORE3_RST_SHIFT);
         }
         if (EXTRACT(change_mask, SW_IPU2_RST)) {
             /* We pretend the IPU2 is reset */
diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
index bf57e635d6..82ce8378bf 100644
--- a/hw/misc/ivshmem.c
+++ b/hw/misc/ivshmem.c
@@ -1267,10 +1267,11 @@ static void ivshmem_realize(PCIDevice *dev, Error **errp)
     if (s->sizearg == NULL) {
         s->legacy_size = 4 << 20; /* 4 MB default */
     } else {
-        char *end;
-        int64_t size = qemu_strtosz(s->sizearg, &end);
-        if (size < 0 || (size_t)size != size || *end != '\0'
-            || !is_power_of_2(size)) {
+        int ret;
+        uint64_t size;
+
+        ret = qemu_strtosz_MiB(s->sizearg, NULL, &size);
+        if (ret < 0 || (size_t)size != size || !is_power_of_2(size)) {
             error_setg(errp, "Invalid size %s", s->sizearg);
             return;
         }
diff --git a/hw/misc/mips_cmgcr.c b/hw/misc/mips_cmgcr.c
index b3ba16694e..a1edb53f95 100644
--- a/hw/misc/mips_cmgcr.c
+++ b/hw/misc/mips_cmgcr.c
@@ -29,6 +29,20 @@ static inline bool is_gic_connected(MIPSGCRState *s)
     return s->gic_mr != NULL;
 }
 
+static inline void update_gcr_base(MIPSGCRState *gcr, uint64_t val)
+{
+    CPUState *cpu;
+    MIPSCPU *mips_cpu;
+
+    gcr->gcr_base = val & GCR_BASE_GCRBASE_MSK;
+    memory_region_set_address(&gcr->iomem, gcr->gcr_base);
+
+    CPU_FOREACH(cpu) {
+        mips_cpu = MIPS_CPU(cpu);
+        mips_cpu->env.CP0_CMGCRBase = gcr->gcr_base >> 4;
+    }
+}
+
 static inline void update_cpc_base(MIPSGCRState *gcr, uint64_t val)
 {
     if (is_cpc_connected(gcr)) {
@@ -117,6 +131,9 @@ static void gcr_write(void *opaque, hwaddr addr, uint64_t data, unsigned size)
     MIPSGCRVPState *other_vps = &gcr->vps[current_vps->other];
 
     switch (addr) {
+    case GCR_BASE_OFS:
+        update_gcr_base(gcr, data);
+        break;
     case GCR_GIC_BASE_OFS:
         update_gic_base(gcr, data);
         break;
diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c
index e99d4544a2..d4de8ad9f1 100644
--- a/hw/net/cadence_gem.c
+++ b/hw/net/cadence_gem.c
@@ -508,7 +508,7 @@ static void gem_update_int_status(CadenceGEMState *s)
 
     if ((s->num_priority_queues == 1) && s->regs[GEM_ISR]) {
         /* No priority queues, just trigger the interrupt */
-        DB_PRINT("asserting int.\n", i);
+        DB_PRINT("asserting int.\n");
         qemu_set_irq(s->irq[0], 1);
         return;
     }
diff --git a/hw/net/spapr_llan.c b/hw/net/spapr_llan.c
index 058908d8d7..d239e4bd7d 100644
--- a/hw/net/spapr_llan.c
+++ b/hw/net/spapr_llan.c
@@ -385,18 +385,24 @@ static int spapr_vlan_devnode(VIOsPAPRDevice *dev, void *fdt, int node_off)
     int ret;
 
     /* Some old phyp versions give the mac address in an 8-byte
-     * property.  The kernel driver has an insane workaround for this;
+     * property.  The kernel driver (before 3.10) has an insane workaround;
      * rather than doing the obvious thing and checking the property
      * length, it checks whether the first byte has 0b10 in the low
      * bits.  If a correct 6-byte property has a different first byte
      * the kernel will get the wrong mac address, overrunning its
      * buffer in the process (read only, thank goodness).
      *
-     * Here we workaround the kernel workaround by always supplying an
-     * 8-byte property, with the mac address in the last six bytes */
-    memcpy(&padded_mac[2], &vdev->nicconf.macaddr, ETH_ALEN);
-    ret = fdt_setprop(fdt, node_off, "local-mac-address",
-                      padded_mac, sizeof(padded_mac));
+     * Here we return a 6-byte address unless that would break a pre-3.10
+     * driver.  In that case we return a padded 8-byte address to allow the old
+     * workaround to succeed. */
+    if ((vdev->nicconf.macaddr.a[0] & 0x3) == 0x2) {
+        ret = fdt_setprop(fdt, node_off, "local-mac-address",
+                          &vdev->nicconf.macaddr, ETH_ALEN);
+    } else {
+        memcpy(&padded_mac[2], &vdev->nicconf.macaddr, ETH_ALEN);
+        ret = fdt_setprop(fdt, node_off, "local-mac-address",
+                          padded_mac, sizeof(padded_mac));
+    }
     if (ret < 0) {
         return ret;
     }
diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c
index 65ba188555..aa5d2c1f5f 100644
--- a/hw/nvram/spapr_nvram.c
+++ b/hw/nvram/spapr_nvram.c
@@ -141,9 +141,17 @@ static void rtas_nvram_store(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 static void spapr_nvram_realize(VIOsPAPRDevice *dev, Error **errp)
 {
     sPAPRNVRAM *nvram = VIO_SPAPR_NVRAM(dev);
+    int ret;
 
     if (nvram->blk) {
         nvram->size = blk_getlength(nvram->blk);
+
+        ret = blk_set_perm(nvram->blk,
+                           BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE,
+                           BLK_PERM_ALL, errp);
+        if (ret < 0) {
+            return;
+        }
     } else {
         nvram->size = DEFAULT_NVRAM_SIZE;
     }
diff --git a/hw/pci-host/Makefile.objs b/hw/pci-host/Makefile.objs
index 45f1f0ebab..9c7909cf44 100644
--- a/hw/pci-host/Makefile.objs
+++ b/hw/pci-host/Makefile.objs
@@ -16,3 +16,4 @@ common-obj-$(CONFIG_FULONG) += bonito.o
 common-obj-$(CONFIG_PCI_PIIX) += piix.o
 common-obj-$(CONFIG_PCI_Q35) += q35.o
 common-obj-$(CONFIG_PCI_GENERIC) += gpex.o
+common-obj-$(CONFIG_PCI_XILINX) += xilinx-pcie.o
diff --git a/hw/pci-host/prep.c b/hw/pci-host/prep.c
index 5580293f93..260a119a9e 100644
--- a/hw/pci-host/prep.c
+++ b/hw/pci-host/prep.c
@@ -309,7 +309,6 @@ static void raven_realize(PCIDevice *d, Error **errp)
     memory_region_set_readonly(&s->bios, true);
     memory_region_add_subregion(get_system_memory(), (uint32_t)(-BIOS_SIZE),
                                 &s->bios);
-    vmstate_register_ram_global(&s->bios);
     if (s->bios_name) {
         filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, s->bios_name);
         if (filename) {
@@ -328,12 +327,15 @@ static void raven_realize(PCIDevice *d, Error **errp)
                 }
             }
         }
+        g_free(filename);
         if (bios_size < 0 || bios_size > BIOS_SIZE) {
-            /* FIXME should error_setg() */
-            hw_error("qemu: could not load bios image '%s'\n", s->bios_name);
+            memory_region_del_subregion(get_system_memory(), &s->bios);
+            error_setg(errp, "Could not load bios image '%s'", s->bios_name);
+            return;
         }
-        g_free(filename);
     }
+
+    vmstate_register_ram_global(&s->bios);
 }
 
 static const VMStateDescription vmstate_raven = {
@@ -361,7 +363,6 @@ static void raven_class_init(ObjectClass *klass, void *data)
     /*
      * Reason: PCI-facing part of the host bridge, not usable without
      * the host-facing part, which can't be device_add'ed, yet.
-     * Reason: realize() method uses hw_error().
      */
     dc->cannot_instantiate_with_device_add_yet = true;
 }
diff --git a/hw/pci-host/xilinx-pcie.c b/hw/pci-host/xilinx-pcie.c
new file mode 100644
index 0000000000..8b71e2d950
--- /dev/null
+++ b/hw/pci-host/xilinx-pcie.c
@@ -0,0 +1,328 @@
+/*
+ * Xilinx PCIe host controller emulation.
+ *
+ * Copyright (c) 2016 Imagination Technologies
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/pci/pci_bridge.h"
+#include "hw/pci-host/xilinx-pcie.h"
+
+enum root_cfg_reg {
+    /* Interrupt Decode Register */
+    ROOTCFG_INTDEC              = 0x138,
+
+    /* Interrupt Mask Register */
+    ROOTCFG_INTMASK             = 0x13c,
+    /* INTx Interrupt Received */
+#define ROOTCFG_INTMASK_INTX    (1 << 16)
+    /* MSI Interrupt Received */
+#define ROOTCFG_INTMASK_MSI     (1 << 17)
+
+    /* PHY Status/Control Register */
+    ROOTCFG_PSCR                = 0x144,
+    /* Link Up */
+#define ROOTCFG_PSCR_LINK_UP    (1 << 11)
+
+    /* Root Port Status/Control Register */
+    ROOTCFG_RPSCR               = 0x148,
+    /* Bridge Enable */
+#define ROOTCFG_RPSCR_BRIDGEEN  (1 << 0)
+    /* Interrupt FIFO Not Empty */
+#define ROOTCFG_RPSCR_INTNEMPTY (1 << 18)
+    /* Interrupt FIFO Overflow */
+#define ROOTCFG_RPSCR_INTOVF    (1 << 19)
+
+    /* Root Port Interrupt FIFO Read Register 1 */
+    ROOTCFG_RPIFR1              = 0x158,
+#define ROOTCFG_RPIFR1_INT_LANE_SHIFT   27
+#define ROOTCFG_RPIFR1_INT_ASSERT_SHIFT 29
+#define ROOTCFG_RPIFR1_INT_VALID_SHIFT  31
+    /* Root Port Interrupt FIFO Read Register 2 */
+    ROOTCFG_RPIFR2              = 0x15c,
+};
+
+static void xilinx_pcie_update_intr(XilinxPCIEHost *s,
+                                    uint32_t set, uint32_t clear)
+{
+    int level;
+
+    s->intr |= set;
+    s->intr &= ~clear;
+
+    if (s->intr_fifo_r != s->intr_fifo_w) {
+        s->intr |= ROOTCFG_INTMASK_INTX;
+    }
+
+    level = !!(s->intr & s->intr_mask);
+    qemu_set_irq(s->irq, level);
+}
+
+static void xilinx_pcie_queue_intr(XilinxPCIEHost *s,
+                                   uint32_t fifo_reg1, uint32_t fifo_reg2)
+{
+    XilinxPCIEInt *intr;
+    unsigned int new_w;
+
+    new_w = (s->intr_fifo_w + 1) % ARRAY_SIZE(s->intr_fifo);
+    if (new_w == s->intr_fifo_r) {
+        s->rpscr |= ROOTCFG_RPSCR_INTOVF;
+        return;
+    }
+
+    intr = &s->intr_fifo[s->intr_fifo_w];
+    s->intr_fifo_w = new_w;
+
+    intr->fifo_reg1 = fifo_reg1;
+    intr->fifo_reg2 = fifo_reg2;
+
+    xilinx_pcie_update_intr(s, ROOTCFG_INTMASK_INTX, 0);
+}
+
+static void xilinx_pcie_set_irq(void *opaque, int irq_num, int level)
+{
+    XilinxPCIEHost *s = XILINX_PCIE_HOST(opaque);
+
+    xilinx_pcie_queue_intr(s,
+       (irq_num << ROOTCFG_RPIFR1_INT_LANE_SHIFT) |
+           (level << ROOTCFG_RPIFR1_INT_ASSERT_SHIFT) |
+           (1 << ROOTCFG_RPIFR1_INT_VALID_SHIFT),
+       0);
+}
+
+static void xilinx_pcie_host_realize(DeviceState *dev, Error **errp)
+{
+    PCIHostState *pci = PCI_HOST_BRIDGE(dev);
+    XilinxPCIEHost *s = XILINX_PCIE_HOST(dev);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+    PCIExpressHost *pex = PCIE_HOST_BRIDGE(dev);
+
+    snprintf(s->name, sizeof(s->name), "pcie%u", s->bus_nr);
+
+    /* PCI configuration space */
+    pcie_host_mmcfg_init(pex, s->cfg_size);
+
+    /* MMIO region */
+    memory_region_init(&s->mmio, OBJECT(s), "mmio", UINT64_MAX);
+    memory_region_set_enabled(&s->mmio, false);
+
+    /* dummy I/O region */
+    memory_region_init_ram(&s->io, OBJECT(s), "io", 16, NULL);
+    memory_region_set_enabled(&s->io, false);
+
+    /* interrupt out */
+    qdev_init_gpio_out_named(dev, &s->irq, "interrupt_out", 1);
+
+    sysbus_init_mmio(sbd, &pex->mmio);
+    sysbus_init_mmio(sbd, &s->mmio);
+
+    pci->bus = pci_register_bus(dev, s->name, xilinx_pcie_set_irq,
+                                pci_swizzle_map_irq_fn, s, &s->mmio,
+                                &s->io, 0, 4, TYPE_PCIE_BUS);
+
+    qdev_set_parent_bus(DEVICE(&s->root), BUS(pci->bus));
+    qdev_init_nofail(DEVICE(&s->root));
+}
+
+static const char *xilinx_pcie_host_root_bus_path(PCIHostState *host_bridge,
+                                                  PCIBus *rootbus)
+{
+    return "0000:00";
+}
+
+static void xilinx_pcie_host_init(Object *obj)
+{
+    XilinxPCIEHost *s = XILINX_PCIE_HOST(obj);
+    XilinxPCIERoot *root = &s->root;
+
+    object_initialize(root, sizeof(*root), TYPE_XILINX_PCIE_ROOT);
+    object_property_add_child(obj, "root", OBJECT(root), NULL);
+    qdev_prop_set_uint32(DEVICE(root), "addr", PCI_DEVFN(0, 0));
+    qdev_prop_set_bit(DEVICE(root), "multifunction", false);
+}
+
+static Property xilinx_pcie_host_props[] = {
+    DEFINE_PROP_UINT32("bus_nr", XilinxPCIEHost, bus_nr, 0),
+    DEFINE_PROP_SIZE("cfg_base", XilinxPCIEHost, cfg_base, 0),
+    DEFINE_PROP_SIZE("cfg_size", XilinxPCIEHost, cfg_size, 32 << 20),
+    DEFINE_PROP_SIZE("mmio_base", XilinxPCIEHost, mmio_base, 0),
+    DEFINE_PROP_SIZE("mmio_size", XilinxPCIEHost, mmio_size, 1 << 20),
+    DEFINE_PROP_BOOL("link_up", XilinxPCIEHost, link_up, true),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void xilinx_pcie_host_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
+
+    hc->root_bus_path = xilinx_pcie_host_root_bus_path;
+    dc->realize = xilinx_pcie_host_realize;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+    dc->fw_name = "pci";
+    dc->props = xilinx_pcie_host_props;
+}
+
+static const TypeInfo xilinx_pcie_host_info = {
+    .name       = TYPE_XILINX_PCIE_HOST,
+    .parent     = TYPE_PCIE_HOST_BRIDGE,
+    .instance_size = sizeof(XilinxPCIEHost),
+    .instance_init = xilinx_pcie_host_init,
+    .class_init = xilinx_pcie_host_class_init,
+};
+
+static uint32_t xilinx_pcie_root_config_read(PCIDevice *d,
+                                             uint32_t address, int len)
+{
+    XilinxPCIEHost *s = XILINX_PCIE_HOST(OBJECT(d)->parent);
+    uint32_t val;
+
+    switch (address) {
+    case ROOTCFG_INTDEC:
+        val = s->intr;
+        break;
+    case ROOTCFG_INTMASK:
+        val = s->intr_mask;
+        break;
+    case ROOTCFG_PSCR:
+        val = s->link_up ? ROOTCFG_PSCR_LINK_UP : 0;
+        break;
+    case ROOTCFG_RPSCR:
+        if (s->intr_fifo_r != s->intr_fifo_w) {
+            s->rpscr &= ~ROOTCFG_RPSCR_INTNEMPTY;
+        } else {
+            s->rpscr |= ROOTCFG_RPSCR_INTNEMPTY;
+        }
+        val = s->rpscr;
+        break;
+    case ROOTCFG_RPIFR1:
+        if (s->intr_fifo_w == s->intr_fifo_r) {
+            /* FIFO empty */
+            val = 0;
+        } else {
+            val = s->intr_fifo[s->intr_fifo_r].fifo_reg1;
+        }
+        break;
+    case ROOTCFG_RPIFR2:
+        if (s->intr_fifo_w == s->intr_fifo_r) {
+            /* FIFO empty */
+            val = 0;
+        } else {
+            val = s->intr_fifo[s->intr_fifo_r].fifo_reg2;
+        }
+        break;
+    default:
+        val = pci_default_read_config(d, address, len);
+        break;
+    }
+    return val;
+}
+
+static void xilinx_pcie_root_config_write(PCIDevice *d, uint32_t address,
+                                          uint32_t val, int len)
+{
+    XilinxPCIEHost *s = XILINX_PCIE_HOST(OBJECT(d)->parent);
+    switch (address) {
+    case ROOTCFG_INTDEC:
+        xilinx_pcie_update_intr(s, 0, val);
+        break;
+    case ROOTCFG_INTMASK:
+        s->intr_mask = val;
+        xilinx_pcie_update_intr(s, 0, 0);
+        break;
+    case ROOTCFG_RPSCR:
+        s->rpscr &= ~ROOTCFG_RPSCR_BRIDGEEN;
+        s->rpscr |= val & ROOTCFG_RPSCR_BRIDGEEN;
+        memory_region_set_enabled(&s->mmio, val & ROOTCFG_RPSCR_BRIDGEEN);
+
+        if (val & ROOTCFG_INTMASK_INTX) {
+            s->rpscr &= ~ROOTCFG_INTMASK_INTX;
+        }
+        break;
+    case ROOTCFG_RPIFR1:
+    case ROOTCFG_RPIFR2:
+        if (s->intr_fifo_w == s->intr_fifo_r) {
+            /* FIFO empty */
+            return;
+        } else {
+            s->intr_fifo_r = (s->intr_fifo_r + 1) % ARRAY_SIZE(s->intr_fifo);
+        }
+        break;
+    default:
+        pci_default_write_config(d, address, val, len);
+        break;
+    }
+}
+
+static int xilinx_pcie_root_init(PCIDevice *dev)
+{
+    BusState *bus = qdev_get_parent_bus(DEVICE(dev));
+    XilinxPCIEHost *s = XILINX_PCIE_HOST(bus->parent);
+
+    pci_set_word(dev->config + PCI_COMMAND,
+                 PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+    pci_set_word(dev->config + PCI_MEMORY_BASE, s->mmio_base >> 16);
+    pci_set_word(dev->config + PCI_MEMORY_LIMIT,
+                 ((s->mmio_base + s->mmio_size - 1) >> 16) & 0xfff0);
+
+    pci_bridge_initfn(dev, TYPE_PCI_BUS);
+
+    if (pcie_endpoint_cap_v1_init(dev, 0x80) < 0) {
+        hw_error("Failed to initialize PCIe capability");
+    }
+
+    return 0;
+}
+
+static void xilinx_pcie_root_class_init(ObjectClass *klass, void *data)
+{
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+    dc->desc = "Xilinx AXI-PCIe Host Bridge";
+    k->vendor_id = PCI_VENDOR_ID_XILINX;
+    k->device_id = 0x7021;
+    k->revision = 0;
+    k->class_id = PCI_CLASS_BRIDGE_HOST;
+    k->is_express = true;
+    k->is_bridge = true;
+    k->init = xilinx_pcie_root_init;
+    k->exit = pci_bridge_exitfn;
+    dc->reset = pci_bridge_reset;
+    k->config_read = xilinx_pcie_root_config_read;
+    k->config_write = xilinx_pcie_root_config_write;
+    /*
+     * PCI-facing part of the host bridge, not usable without the
+     * host-facing part, which can't be device_add'ed, yet.
+     */
+    dc->cannot_instantiate_with_device_add_yet = true;
+}
+
+static const TypeInfo xilinx_pcie_root_info = {
+    .name = TYPE_XILINX_PCIE_ROOT,
+    .parent = TYPE_PCI_BRIDGE,
+    .instance_size = sizeof(XilinxPCIERoot),
+    .class_init = xilinx_pcie_root_class_init,
+};
+
+static void xilinx_pcie_register(void)
+{
+    type_register_static(&xilinx_pcie_root_info);
+    type_register_static(&xilinx_pcie_host_info);
+}
+
+type_init(xilinx_pcie_register)
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index a563555e7d..273f1e4602 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -1530,6 +1530,34 @@ static const pci_class_desc pci_class_descriptions[] =
     { 0, NULL}
 };
 
+static void pci_for_each_device_under_bus_reverse(PCIBus *bus,
+                                                  void (*fn)(PCIBus *b,
+                                                             PCIDevice *d,
+                                                             void *opaque),
+                                                  void *opaque)
+{
+    PCIDevice *d;
+    int devfn;
+
+    for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
+        d = bus->devices[ARRAY_SIZE(bus->devices) - 1 - devfn];
+        if (d) {
+            fn(bus, d, opaque);
+        }
+    }
+}
+
+void pci_for_each_device_reverse(PCIBus *bus, int bus_num,
+                         void (*fn)(PCIBus *b, PCIDevice *d, void *opaque),
+                         void *opaque)
+{
+    bus = pci_find_bus_nr(bus, bus_num);
+
+    if (bus) {
+        pci_for_each_device_under_bus_reverse(bus, fn, opaque);
+    }
+}
+
 static void pci_for_each_device_under_bus(PCIBus *bus,
                                           void (*fn)(PCIBus *b, PCIDevice *d,
                                                      void *opaque),
diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
index daf1f65427..a8c18203d6 100644
--- a/hw/pci/pcie_aer.c
+++ b/hw/pci/pcie_aer.c
@@ -1025,8 +1025,8 @@ void hmp_pcie_aer_inject_error(Monitor *mon, const QDict *qdict)
         return;
     }
 
-    assert(qobject_type(data) == QTYPE_QDICT);
     qdict = qobject_to_qdict(data);
+    assert(qdict);
 
     devfn = (int)qdict_get_int(qdict, "devfn");
     monitor_printf(mon, "OK id: %s root bus: %s, bus: %x devfn: %x.%x\n",
diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index 716aea6852..68aaedc06d 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -72,6 +72,7 @@
 #include "exec/address-spaces.h"
 #include "hw/sysbus.h"
 #include "qemu/cutils.h"
+#include "trace.h"
 
 #define MAX_IDE_BUS 2
 #define CFG_ADDR 0xf0000510
@@ -79,21 +80,11 @@
 #define CLOCKFREQ (266UL * 1000UL * 1000UL)
 #define BUSFREQ (100UL * 1000UL * 1000UL)
 
-/* debug UniNorth */
-//#define DEBUG_UNIN
-
-#ifdef DEBUG_UNIN
-#define UNIN_DPRINTF(fmt, ...)                                  \
-    do { printf("UNIN: " fmt , ## __VA_ARGS__); } while (0)
-#else
-#define UNIN_DPRINTF(fmt, ...)
-#endif
-
 /* UniN device */
 static void unin_write(void *opaque, hwaddr addr, uint64_t value,
                        unsigned size)
 {
-    UNIN_DPRINTF("write addr " TARGET_FMT_plx " val %"PRIx64"\n", addr, value);
+    trace_mac99_uninorth_write(addr, value);
     if (addr == 0x0) {
         *(int*)opaque = value;
     }
@@ -109,7 +100,7 @@ static uint64_t unin_read(void *opaque, hwaddr addr, unsigned size)
         value = *(int*)opaque;
     }
 
-    UNIN_DPRINTF("readl addr " TARGET_FMT_plx " val %x\n", addr, value);
+    trace_mac99_uninorth_read(addr, value);
 
     return value;
 }
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 4fab5c0ae7..09f0d22def 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -381,7 +381,7 @@ static void ppc_powernv_init(MachineState *machine)
 
     fw_size = load_image_targphys(fw_filename, FW_LOAD_ADDR, FW_MAX_SIZE);
     if (fw_size < 0) {
-        error_report("qemu: could not load OPAL '%s'", fw_filename);
+        error_report("Could not load OPAL '%s'", fw_filename);
         exit(1);
     }
     g_free(fw_filename);
@@ -393,7 +393,7 @@ static void ppc_powernv_init(MachineState *machine)
         kernel_size = load_image_targphys(machine->kernel_filename,
                                           KERNEL_LOAD_ADDR, 0x2000000);
         if (kernel_size < 0) {
-            error_report("qemu: could not load kernel'%s'",
+            error_report("Could not load kernel '%s'",
                          machine->kernel_filename);
             exit(1);
         }
@@ -405,7 +405,7 @@ static void ppc_powernv_init(MachineState *machine)
         pnv->initrd_size = load_image_targphys(machine->initrd_filename,
                                   pnv->initrd_base, 0x10000000); /* 128MB max */
         if (pnv->initrd_size < 0) {
-            error_report("qemu: could not load initial ram disk '%s'",
+            error_report("Could not load initial ram disk '%s'",
                          machine->initrd_filename);
             exit(1);
         }
diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index d171e60b5c..5f93083d4a 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -62,7 +62,16 @@ void ppc_set_irq(PowerPCCPU *cpu, int n_IRQ, int level)
 {
     CPUState *cs = CPU(cpu);
     CPUPPCState *env = &cpu->env;
-    unsigned int old_pending = env->pending_interrupts;
+    unsigned int old_pending;
+    bool locked = false;
+
+    /* We may already have the BQL if coming from the reset path */
+    if (!qemu_mutex_iothread_locked()) {
+        locked = true;
+        qemu_mutex_lock_iothread();
+    }
+
+    old_pending = env->pending_interrupts;
 
     if (level) {
         env->pending_interrupts |= 1 << n_IRQ;
@@ -80,9 +89,14 @@ void ppc_set_irq(PowerPCCPU *cpu, int n_IRQ, int level)
 #endif
     }
 
+
     LOG_IRQ("%s: %p n_IRQ %d level %d => pending %08" PRIx32
                 "req %08x\n", __func__, env, n_IRQ, level,
                 env->pending_interrupts, CPU(cpu)->interrupt_request);
+
+    if (locked) {
+        qemu_mutex_unlock_iothread();
+    }
 }
 
 /* PowerPC 6xx / 7xx internal IRQ controller */
diff --git a/hw/ppc/ppc405_uc.c b/hw/ppc/ppc405_uc.c
index d6d3fc2c4a..d5df94aa6e 100644
--- a/hw/ppc/ppc405_uc.c
+++ b/hw/ppc/ppc405_uc.c
@@ -1881,7 +1881,7 @@ static void ppc405cr_clk_setup (ppc405cr_cpc_t *cpc)
         D1 = (((cpc->pllmr >> 20) - 1) & 0xF) + 1; /* FBDV */
         D2 = 8 - ((cpc->pllmr >> 16) & 0x7); /* FWDVA */
         M = D0 * D1 * D2;
-        VCO_out = cpc->sysclk * M;
+        VCO_out = (uint64_t)cpc->sysclk * M;
         if (VCO_out < 400000000 || VCO_out > 800000000) {
             /* PLL cannot lock */
             cpc->pllmr &= ~0x80000000;
@@ -1892,7 +1892,7 @@ static void ppc405cr_clk_setup (ppc405cr_cpc_t *cpc)
         /* Bypass PLL */
     bypass_pll:
         M = D0;
-        PLL_out = cpc->sysclk * M;
+        PLL_out = (uint64_t)cpc->sysclk * M;
     }
     CPU_clk = PLL_out;
     if (cpc->cr1 & 0x00800000)
@@ -2242,7 +2242,7 @@ static void ppc405ep_compute_clocks (ppc405ep_cpc_t *cpc)
 #ifdef DEBUG_CLOCKS_LL
         printf("FWDA %01" PRIx32 " %d\n", (cpc->pllmr[1] >> 16) & 0x7, D);
 #endif
-        VCO_out = cpc->sysclk * M * D;
+        VCO_out = (uint64_t)cpc->sysclk * M * D;
         if (VCO_out < 500000000UL || VCO_out > 1000000000UL) {
             /* Error - unlock the PLL */
             printf("VCO out of range %" PRIu64 "\n", VCO_out);
diff --git a/hw/ppc/ppc4xx_pci.c b/hw/ppc/ppc4xx_pci.c
index 683218e5c5..dc19682970 100644
--- a/hw/ppc/ppc4xx_pci.c
+++ b/hw/ppc/ppc4xx_pci.c
@@ -26,13 +26,7 @@
 #include "hw/pci/pci.h"
 #include "hw/pci/pci_host.h"
 #include "exec/address-spaces.h"
-
-#undef DEBUG
-#ifdef DEBUG
-#define DPRINTF(fmt, ...) do { printf(fmt, ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTF(fmt, ...)
-#endif /* DEBUG */
+#include "trace.h"
 
 struct PCIMasterMap {
     uint32_t la;
@@ -249,8 +243,7 @@ static int ppc4xx_pci_map_irq(PCIDevice *pci_dev, int irq_num)
 {
     int slot = pci_dev->devfn >> 3;
 
-    DPRINTF("%s: devfn %x irq %d -> %d\n", __func__,
-            pci_dev->devfn, irq_num, slot);
+    trace_ppc4xx_pci_map_irq(pci_dev->devfn, irq_num, slot);
 
     return slot - 1;
 }
@@ -259,7 +252,7 @@ static void ppc4xx_pci_set_irq(void *opaque, int irq_num, int level)
 {
     qemu_irq *pci_irqs = opaque;
 
-    DPRINTF("%s: PCI irq %d\n", __func__, irq_num);
+    trace_ppc4xx_pci_set_irq(irq_num);
     if (irq_num < 0) {
         fprintf(stderr, "%s: PCI irq %d\n", __func__, irq_num);
         return;
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index e465d7ac98..81c6c1c27c 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -63,6 +63,7 @@
 #include "qemu/error-report.h"
 #include "trace.h"
 #include "hw/nmi.h"
+#include "hw/intc/intc.h"
 
 #include "hw/compat.h"
 #include "qemu/cutils.h"
@@ -95,37 +96,68 @@
 
 #define HTAB_SIZE(spapr)        (1ULL << ((spapr)->htab_shift))
 
-static XICSState *try_create_xics(const char *type, int nr_servers,
-                                  int nr_irqs, Error **errp)
+static int try_create_xics(sPAPRMachineState *spapr, const char *type_ics,
+                           const char *type_icp, int nr_servers,
+                           int nr_irqs, Error **errp)
 {
-    Error *err = NULL;
-    DeviceState *dev;
+    XICSFabric *xi = XICS_FABRIC(spapr);
+    Error *err = NULL, *local_err = NULL;
+    ICSState *ics = NULL;
+    int i;
 
-    dev = qdev_create(NULL, type);
-    qdev_prop_set_uint32(dev, "nr_servers", nr_servers);
-    qdev_prop_set_uint32(dev, "nr_irqs", nr_irqs);
-    object_property_set_bool(OBJECT(dev), true, "realized", &err);
+    ics = ICS_SIMPLE(object_new(type_ics));
+    qdev_set_parent_bus(DEVICE(ics), sysbus_get_default());
+    object_property_add_child(OBJECT(spapr), "ics", OBJECT(ics), NULL);
+    object_property_set_int(OBJECT(ics), nr_irqs, "nr-irqs", &err);
+    object_property_add_const_link(OBJECT(ics), "xics", OBJECT(xi), NULL);
+    object_property_set_bool(OBJECT(ics), true, "realized", &local_err);
+    error_propagate(&err, local_err);
     if (err) {
-        error_propagate(errp, err);
-        object_unparent(OBJECT(dev));
-        return NULL;
+        goto error;
+    }
+
+    spapr->icps = g_malloc0(nr_servers * sizeof(ICPState));
+    spapr->nr_servers = nr_servers;
+
+    for (i = 0; i < nr_servers; i++) {
+        ICPState *icp = &spapr->icps[i];
+
+        object_initialize(icp, sizeof(*icp), type_icp);
+        qdev_set_parent_bus(DEVICE(icp), sysbus_get_default());
+        object_property_add_child(OBJECT(spapr), "icp[*]", OBJECT(icp), NULL);
+        object_property_add_const_link(OBJECT(icp), "xics", OBJECT(xi), NULL);
+        object_property_set_bool(OBJECT(icp), true, "realized", &err);
+        if (err) {
+            goto error;
+        }
+        object_unref(OBJECT(icp));
+    }
+
+    spapr->ics = ics;
+    return 0;
+
+error:
+    error_propagate(errp, err);
+    if (ics) {
+        object_unparent(OBJECT(ics));
     }
-    return XICS_COMMON(dev);
+    return -1;
 }
 
-static XICSState *xics_system_init(MachineState *machine,
-                                   int nr_servers, int nr_irqs, Error **errp)
+static int xics_system_init(MachineState *machine,
+                            int nr_servers, int nr_irqs, Error **errp)
 {
-    XICSState *xics = NULL;
+    int rc = -1;
 
     if (kvm_enabled()) {
         Error *err = NULL;
 
-        if (machine_kernel_irqchip_allowed(machine)) {
-            xics = try_create_xics(TYPE_XICS_SPAPR_KVM, nr_servers, nr_irqs,
-                                   &err);
+        if (machine_kernel_irqchip_allowed(machine) &&
+            !xics_kvm_init(SPAPR_MACHINE(machine), errp)) {
+            rc = try_create_xics(SPAPR_MACHINE(machine), TYPE_ICS_KVM,
+                                 TYPE_KVM_ICP, nr_servers, nr_irqs, &err);
         }
-        if (machine_kernel_irqchip_required(machine) && !xics) {
+        if (machine_kernel_irqchip_required(machine) && rc < 0) {
             error_reportf_err(err,
                               "kernel_irqchip requested but unavailable: ");
         } else {
@@ -133,11 +165,13 @@ static XICSState *xics_system_init(MachineState *machine,
         }
     }
 
-    if (!xics) {
-        xics = try_create_xics(TYPE_XICS_SPAPR, nr_servers, nr_irqs, errp);
+    if (rc < 0) {
+        xics_spapr_init(SPAPR_MACHINE(machine), errp);
+        rc = try_create_xics(SPAPR_MACHINE(machine), TYPE_ICS_SIMPLE,
+                               TYPE_ICP, nr_servers, nr_irqs, errp);
     }
 
-    return xics;
+    return rc;
 }
 
 static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu,
@@ -924,7 +958,7 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr,
     _FDT(fdt_setprop_cell(fdt, 0, "#size-cells", 2));
 
     /* /interrupt controller */
-    spapr_dt_xics(spapr->xics, fdt, PHANDLE_XICP);
+    spapr_dt_xics(spapr->nr_servers, fdt, PHANDLE_XICP);
 
     ret = spapr_populate_memory(spapr, fdt);
     if (ret < 0) {
@@ -958,7 +992,7 @@ static void *spapr_build_fdt(sPAPRMachineState *spapr,
         _FDT(spapr_drc_populate_dt(fdt, 0, NULL, SPAPR_DR_CONNECTOR_TYPE_LMB));
     }
 
-    if (mc->query_hotpluggable_cpus) {
+    if (mc->has_hotpluggable_cpus) {
         int offset = fdt_path_offset(fdt, "/cpus");
         ret = spapr_drc_populate_dt(fdt, offset, NULL,
                                     SPAPR_DR_CONNECTOR_TYPE_CPU);
@@ -1010,6 +1044,9 @@ static void emulate_spapr_hypercall(PPCVirtualHypervisor *vhyp,
 {
     CPUPPCState *env = &cpu->env;
 
+    /* The TCG path should also be holding the BQL at this point */
+    g_assert(qemu_mutex_iothread_locked());
+
     if (msr_pr) {
         hcall_dprintf("Hypercall made with MSR[PR]=1\n");
         env->gpr[3] = H_PRIVILEGE;
@@ -1050,6 +1087,62 @@ static void close_htab_fd(sPAPRMachineState *spapr)
     spapr->htab_fd = -1;
 }
 
+static hwaddr spapr_hpt_mask(PPCVirtualHypervisor *vhyp)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
+
+    return HTAB_SIZE(spapr) / HASH_PTEG_SIZE_64 - 1;
+}
+
+static const ppc_hash_pte64_t *spapr_map_hptes(PPCVirtualHypervisor *vhyp,
+                                                hwaddr ptex, int n)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
+    hwaddr pte_offset = ptex * HASH_PTE_SIZE_64;
+
+    if (!spapr->htab) {
+        /*
+         * HTAB is controlled by KVM. Fetch into temporary buffer
+         */
+        ppc_hash_pte64_t *hptes = g_malloc(n * HASH_PTE_SIZE_64);
+        kvmppc_read_hptes(hptes, ptex, n);
+        return hptes;
+    }
+
+    /*
+     * HTAB is controlled by QEMU. Just point to the internally
+     * accessible PTEG.
+     */
+    return (const ppc_hash_pte64_t *)(spapr->htab + pte_offset);
+}
+
+static void spapr_unmap_hptes(PPCVirtualHypervisor *vhyp,
+                              const ppc_hash_pte64_t *hptes,
+                              hwaddr ptex, int n)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
+
+    if (!spapr->htab) {
+        g_free((void *)hptes);
+    }
+
+    /* Nothing to do for qemu managed HPT */
+}
+
+static void spapr_store_hpte(PPCVirtualHypervisor *vhyp, hwaddr ptex,
+                             uint64_t pte0, uint64_t pte1)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
+    hwaddr offset = ptex * HASH_PTE_SIZE_64;
+
+    if (!spapr->htab) {
+        kvmppc_write_hpte(ptex, pte0, pte1);
+    } else {
+        stq_p(spapr->htab + offset, pte0);
+        stq_p(spapr->htab + offset + HASH_PTE_SIZE_64 / 2, pte1);
+    }
+}
+
 static int spapr_hpt_shift_for_ramsize(uint64_t ramsize)
 {
     int shift;
@@ -1249,6 +1342,13 @@ static int spapr_post_load(void *opaque, int version_id)
     sPAPRMachineState *spapr = (sPAPRMachineState *)opaque;
     int err = 0;
 
+    if (!object_dynamic_cast(OBJECT(spapr->ics), TYPE_ICS_KVM)) {
+        int i;
+        for (i = 0; i < spapr->nr_servers; i++) {
+            icp_resend(&spapr->icps[i]);
+        }
+    }
+
     /* In earlier versions, there was no separate qdev for the PAPR
      * RTC, so the RTC offset was stored directly in sPAPREnvironment.
      * So when migrating from those versions, poke the incoming offset
@@ -1751,13 +1851,28 @@ static void spapr_validate_node_memory(MachineState *machine, Error **errp)
     }
 }
 
+/* find cpu slot in machine->possible_cpus by core_id */
+static CPUArchId *spapr_find_cpu_slot(MachineState *ms, uint32_t id, int *idx)
+{
+    int index = id / smp_threads;
+
+    if (index >= ms->possible_cpus->len) {
+        return NULL;
+    }
+    if (idx) {
+        *idx = index;
+    }
+    return &ms->possible_cpus->cpus[index];
+}
+
 static void spapr_init_cpus(sPAPRMachineState *spapr)
 {
     MachineState *machine = MACHINE(spapr);
     MachineClass *mc = MACHINE_GET_CLASS(machine);
     char *type = spapr_get_cpu_core_type(machine->cpu_model);
     int smt = kvmppc_smt_threads();
-    int spapr_max_cores, spapr_cores;
+    const CPUArchIdList *possible_cpus;
+    int boot_cores_nr = smp_cpus / smp_threads;
     int i;
 
     if (!type) {
@@ -1765,7 +1880,8 @@ static void spapr_init_cpus(sPAPRMachineState *spapr)
         exit(1);
     }
 
-    if (mc->query_hotpluggable_cpus) {
+    possible_cpus = mc->possible_cpu_arch_ids(machine);
+    if (mc->has_hotpluggable_cpus) {
         if (smp_cpus % smp_threads) {
             error_report("smp_cpus (%u) must be multiple of threads (%u)",
                          smp_cpus, smp_threads);
@@ -1776,24 +1892,18 @@ static void spapr_init_cpus(sPAPRMachineState *spapr)
                          max_cpus, smp_threads);
             exit(1);
         }
-
-        spapr_max_cores = max_cpus / smp_threads;
-        spapr_cores = smp_cpus / smp_threads;
     } else {
         if (max_cpus != smp_cpus) {
             error_report("This machine version does not support CPU hotplug");
             exit(1);
         }
-
-        spapr_max_cores = QEMU_ALIGN_UP(smp_cpus, smp_threads) / smp_threads;
-        spapr_cores = spapr_max_cores;
+        boot_cores_nr = possible_cpus->len;
     }
 
-    spapr->cores = g_new0(Object *, spapr_max_cores);
-    for (i = 0; i < spapr_max_cores; i++) {
+    for (i = 0; i < possible_cpus->len; i++) {
         int core_id = i * smp_threads;
 
-        if (mc->query_hotpluggable_cpus) {
+        if (mc->has_hotpluggable_cpus) {
             sPAPRDRConnector *drc =
                 spapr_dr_connector_new(OBJECT(spapr),
                                        SPAPR_DR_CONNECTOR_TYPE_CPU,
@@ -1802,7 +1912,7 @@ static void spapr_init_cpus(sPAPRMachineState *spapr)
             qemu_register_reset(spapr_drc_reset, drc);
         }
 
-        if (i < spapr_cores) {
+        if (i < boot_cores_nr) {
             Object *core  = object_new(type);
             int nr_threads = smp_threads;
 
@@ -1889,9 +1999,8 @@ static void ppc_spapr_init(MachineState *machine)
     load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD;
 
     /* Set up Interrupt Controller before we create the VCPUs */
-    spapr->xics = xics_system_init(machine,
-                                   DIV_ROUND_UP(max_cpus * smt, smp_threads),
-                                   XICS_IRQS_SPAPR, &error_fatal);
+    xics_system_init(machine, DIV_ROUND_UP(max_cpus * smt, smp_threads),
+                     XICS_IRQS_SPAPR, &error_fatal);
 
     /* Set up containers for ibm,client-set-architecture negotiated options */
     spapr->ov5 = spapr_ovec_new();
@@ -2357,6 +2466,7 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
     uint64_t align = memory_region_get_alignment(mr);
     uint64_t size = memory_region_size(mr);
     uint64_t addr;
+    char *mem_dev;
 
     if (size % SPAPR_MEMORY_BLOCK_SIZE) {
         error_setg(&local_err, "Hotplugged memory size must be a multiple of "
@@ -2364,6 +2474,13 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
         goto out;
     }
 
+    mem_dev = object_property_get_str(OBJECT(dimm), PC_DIMM_MEMDEV_PROP, NULL);
+    if (mem_dev && !kvmppc_is_mem_backend_page_size_ok(mem_dev)) {
+        error_setg(&local_err, "Memory backend has bad page size. "
+                   "Use 'memory-backend-file' with correct mem-path.");
+        goto out;
+    }
+
     pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, &local_err);
     if (local_err) {
         goto out;
@@ -2488,6 +2605,165 @@ void *spapr_populate_hotplug_cpu_dt(CPUState *cs, int *fdt_offset,
     return fdt;
 }
 
+static void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
+                              Error **errp)
+{
+    MachineState *ms = MACHINE(qdev_get_machine());
+    CPUCore *cc = CPU_CORE(dev);
+    CPUArchId *core_slot = spapr_find_cpu_slot(ms, cc->core_id, NULL);
+
+    core_slot->cpu = NULL;
+    object_unparent(OBJECT(dev));
+}
+
+static void spapr_core_release(DeviceState *dev, void *opaque)
+{
+    HotplugHandler *hotplug_ctrl;
+
+    hotplug_ctrl = qdev_get_hotplug_handler(dev);
+    hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort);
+}
+
+static
+void spapr_core_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev,
+                               Error **errp)
+{
+    int index;
+    sPAPRDRConnector *drc;
+    sPAPRDRConnectorClass *drck;
+    Error *local_err = NULL;
+    CPUCore *cc = CPU_CORE(dev);
+    int smt = kvmppc_smt_threads();
+
+    if (!spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index)) {
+        error_setg(errp, "Unable to find CPU core with core-id: %d",
+                   cc->core_id);
+        return;
+    }
+    if (index == 0) {
+        error_setg(errp, "Boot CPU core may not be unplugged");
+        return;
+    }
+
+    drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, index * smt);
+    g_assert(drc);
+
+    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+    drck->detach(drc, dev, spapr_core_release, NULL, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    spapr_hotplug_req_remove_by_index(drc);
+}
+
+static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
+                            Error **errp)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
+    MachineClass *mc = MACHINE_GET_CLASS(spapr);
+    sPAPRCPUCore *core = SPAPR_CPU_CORE(OBJECT(dev));
+    CPUCore *cc = CPU_CORE(dev);
+    CPUState *cs = CPU(core->threads);
+    sPAPRDRConnector *drc;
+    Error *local_err = NULL;
+    void *fdt = NULL;
+    int fdt_offset = 0;
+    int smt = kvmppc_smt_threads();
+    CPUArchId *core_slot;
+    int index;
+
+    core_slot = spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index);
+    if (!core_slot) {
+        error_setg(errp, "Unable to find CPU core with core-id: %d",
+                   cc->core_id);
+        return;
+    }
+    drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, index * smt);
+
+    g_assert(drc || !mc->has_hotpluggable_cpus);
+
+    /*
+     * Setup CPU DT entries only for hotplugged CPUs. For boot time or
+     * coldplugged CPUs DT entries are setup in spapr_build_fdt().
+     */
+    if (dev->hotplugged) {
+        fdt = spapr_populate_hotplug_cpu_dt(cs, &fdt_offset, spapr);
+    }
+
+    if (drc) {
+        sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+        drck->attach(drc, dev, fdt, fdt_offset, !dev->hotplugged, &local_err);
+        if (local_err) {
+            g_free(fdt);
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+
+    if (dev->hotplugged) {
+        /*
+         * Send hotplug notification interrupt to the guest only in case
+         * of hotplugged CPUs.
+         */
+        spapr_hotplug_req_add_by_index(drc);
+    } else {
+        /*
+         * Set the right DRC states for cold plugged CPU.
+         */
+        if (drc) {
+            sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+            drck->set_allocation_state(drc, SPAPR_DR_ALLOCATION_STATE_USABLE);
+            drck->set_isolation_state(drc, SPAPR_DR_ISOLATION_STATE_UNISOLATED);
+        }
+    }
+    core_slot->cpu = OBJECT(dev);
+}
+
+static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
+                                Error **errp)
+{
+    MachineState *machine = MACHINE(OBJECT(hotplug_dev));
+    MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev);
+    Error *local_err = NULL;
+    CPUCore *cc = CPU_CORE(dev);
+    char *base_core_type = spapr_get_cpu_core_type(machine->cpu_model);
+    const char *type = object_get_typename(OBJECT(dev));
+    CPUArchId *core_slot;
+    int index;
+
+    if (dev->hotplugged && !mc->has_hotpluggable_cpus) {
+        error_setg(&local_err, "CPU hotplug not supported for this machine");
+        goto out;
+    }
+
+    if (strcmp(base_core_type, type)) {
+        error_setg(&local_err, "CPU core type should be %s", base_core_type);
+        goto out;
+    }
+
+    if (cc->core_id % smp_threads) {
+        error_setg(&local_err, "invalid core id %d", cc->core_id);
+        goto out;
+    }
+
+    core_slot = spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index);
+    if (!core_slot) {
+        error_setg(&local_err, "core id %d out of range", cc->core_id);
+        goto out;
+    }
+
+    if (core_slot->cpu) {
+        error_setg(&local_err, "core %d already populated", cc->core_id);
+        goto out;
+    }
+
+out:
+    g_free(base_core_type);
+    error_propagate(errp, local_err);
+}
+
 static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
                                       DeviceState *dev, Error **errp)
 {
@@ -2550,7 +2826,7 @@ static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev,
             error_setg(errp, "Memory hot unplug not supported for this guest");
         }
     } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
-        if (!mc->query_hotpluggable_cpus) {
+        if (!mc->has_hotpluggable_cpus) {
             error_setg(errp, "CPU hot unplug not supported on this machine");
             return;
         }
@@ -2577,11 +2853,11 @@ static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev,
             error_setg(errp, "Memory hot unplug not supported for this guest");
         }
     } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
-        if (!mc->query_hotpluggable_cpus) {
+        if (!mc->has_hotpluggable_cpus) {
             error_setg(errp, "CPU hot unplug not supported on this machine");
             return;
         }
-        spapr_core_unplug(hotplug_dev, dev, errp);
+        spapr_core_unplug_request(hotplug_dev, dev, errp);
     }
 }
 
@@ -2610,35 +2886,34 @@ static unsigned spapr_cpu_index_to_socket_id(unsigned cpu_index)
     return cpu_index / smp_threads / smp_cores;
 }
 
-static HotpluggableCPUList *spapr_query_hotpluggable_cpus(MachineState *machine)
+static const CPUArchIdList *spapr_possible_cpu_arch_ids(MachineState *machine)
 {
     int i;
-    HotpluggableCPUList *head = NULL;
-    sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
     int spapr_max_cores = max_cpus / smp_threads;
+    MachineClass *mc = MACHINE_GET_CLASS(machine);
 
-    for (i = 0; i < spapr_max_cores; i++) {
-        HotpluggableCPUList *list_item = g_new0(typeof(*list_item), 1);
-        HotpluggableCPU *cpu_item = g_new0(typeof(*cpu_item), 1);
-        CpuInstanceProperties *cpu_props = g_new0(typeof(*cpu_props), 1);
+    if (!mc->has_hotpluggable_cpus) {
+        spapr_max_cores = QEMU_ALIGN_UP(smp_cpus, smp_threads) / smp_threads;
+    }
+    if (machine->possible_cpus) {
+        assert(machine->possible_cpus->len == spapr_max_cores);
+        return machine->possible_cpus;
+    }
 
-        cpu_item->type = spapr_get_cpu_core_type(machine->cpu_model);
-        cpu_item->vcpus_count = smp_threads;
-        cpu_props->has_core_id = true;
-        cpu_props->core_id = i * smp_threads;
+    machine->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
+                             sizeof(CPUArchId) * spapr_max_cores);
+    machine->possible_cpus->len = spapr_max_cores;
+    for (i = 0; i < machine->possible_cpus->len; i++) {
+        int core_id = i * smp_threads;
+
+        machine->possible_cpus->cpus[i].vcpus_count = smp_threads;
+        machine->possible_cpus->cpus[i].arch_id = core_id;
+        machine->possible_cpus->cpus[i].props.has_core_id = true;
+        machine->possible_cpus->cpus[i].props.core_id = core_id;
         /* TODO: add 'has_node/node' here to describe
            to which node core belongs */
-
-        cpu_item->props = cpu_props;
-        if (spapr->cores[i]) {
-            cpu_item->has_qom_path = true;
-            cpu_item->qom_path = object_get_canonical_path(spapr->cores[i]);
-        }
-        list_item->value = cpu_item;
-        list_item->next = head;
-        head = list_item;
     }
-    return head;
+    return machine->possible_cpus;
 }
 
 static void spapr_phb_placement(sPAPRMachineState *spapr, uint32_t index,
@@ -2693,6 +2968,40 @@ static void spapr_phb_placement(sPAPRMachineState *spapr, uint32_t index,
     *mmio64 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM64_WIN_SIZE;
 }
 
+static ICSState *spapr_ics_get(XICSFabric *dev, int irq)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(dev);
+
+    return ics_valid_irq(spapr->ics, irq) ? spapr->ics : NULL;
+}
+
+static void spapr_ics_resend(XICSFabric *dev)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(dev);
+
+    ics_resend(spapr->ics);
+}
+
+static ICPState *spapr_icp_get(XICSFabric *xi, int server)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(xi);
+
+    return (server < spapr->nr_servers) ? &spapr->icps[server] : NULL;
+}
+
+static void spapr_pic_print_info(InterruptStatsProvider *obj,
+                                 Monitor *mon)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
+    int i;
+
+    for (i = 0; i < spapr->nr_servers; i++) {
+        icp_pic_print_info(&spapr->icps[i], mon);
+    }
+
+    ics_pic_print_info(spapr->ics, mon);
+}
+
 static void spapr_machine_class_init(ObjectClass *oc, void *data)
 {
     MachineClass *mc = MACHINE_CLASS(oc);
@@ -2701,6 +3010,8 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     NMIClass *nc = NMI_CLASS(oc);
     HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
     PPCVirtualHypervisorClass *vhc = PPC_VIRTUAL_HYPERVISOR_CLASS(oc);
+    XICSFabricClass *xic = XICS_FABRIC_CLASS(oc);
+    InterruptStatsProviderClass *ispc = INTERRUPT_STATS_PROVIDER_CLASS(oc);
 
     mc->desc = "pSeries Logical Partition (PAPR compliant)";
 
@@ -2712,7 +3023,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     mc->init = ppc_spapr_init;
     mc->reset = ppc_spapr_reset;
     mc->block_default_type = IF_SCSI;
-    mc->max_cpus = 255;
+    mc->max_cpus = 1024;
     mc->no_parallel = 1;
     mc->default_boot_order = "";
     mc->default_ram_size = 512 * M_BYTE;
@@ -2724,15 +3035,24 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     hc->plug = spapr_machine_device_plug;
     hc->unplug = spapr_machine_device_unplug;
     mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id;
+    mc->possible_cpu_arch_ids = spapr_possible_cpu_arch_ids;
     hc->unplug_request = spapr_machine_device_unplug_request;
 
     smc->dr_lmb_enabled = true;
     smc->tcg_default_cpu = "POWER8";
-    mc->query_hotpluggable_cpus = spapr_query_hotpluggable_cpus;
+    mc->has_hotpluggable_cpus = true;
     fwc->get_dev_path = spapr_get_fw_dev_path;
     nc->nmi_monitor_handler = spapr_nmi;
     smc->phb_placement = spapr_phb_placement;
     vhc->hypercall = emulate_spapr_hypercall;
+    vhc->hpt_mask = spapr_hpt_mask;
+    vhc->map_hptes = spapr_map_hptes;
+    vhc->unmap_hptes = spapr_unmap_hptes;
+    vhc->store_hpte = spapr_store_hpte;
+    xic->ics_get = spapr_ics_get;
+    xic->ics_resend = spapr_ics_resend;
+    xic->icp_get = spapr_icp_get;
+    ispc->print_info = spapr_pic_print_info;
 }
 
 static const TypeInfo spapr_machine_info = {
@@ -2749,6 +3069,8 @@ static const TypeInfo spapr_machine_info = {
         { TYPE_NMI },
         { TYPE_HOTPLUG_HANDLER },
         { TYPE_PPC_VIRTUAL_HYPERVISOR },
+        { TYPE_XICS_FABRIC },
+        { TYPE_INTERRUPT_STATS_PROVIDER },
         { }
     },
 };
@@ -2928,7 +3250,7 @@ static void spapr_machine_2_6_instance_options(MachineState *machine)
 static void spapr_machine_2_6_class_options(MachineClass *mc)
 {
     spapr_machine_2_7_class_options(mc);
-    mc->query_hotpluggable_cpus = NULL;
+    mc->has_hotpluggable_cpus = false;
     SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_6);
 }
 
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index 9dddaeb3fa..90d682fe33 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -13,10 +13,12 @@
 #include "hw/boards.h"
 #include "qapi/error.h"
 #include "sysemu/cpus.h"
+#include "sysemu/kvm.h"
 #include "target/ppc/kvm_ppc.h"
 #include "hw/ppc/ppc.h"
 #include "target/ppc/mmu-hash64.h"
 #include "sysemu/numa.h"
+#include "qemu/error-report.h"
 
 static void spapr_cpu_reset(void *opaque)
 {
@@ -34,15 +36,26 @@ static void spapr_cpu_reset(void *opaque)
 
     env->spr[SPR_HIOR] = 0;
 
-    ppc_hash64_set_external_hpt(cpu, spapr->htab, spapr->htab_shift,
-                                &error_fatal);
+    /*
+     * This is a hack for the benefit of KVM PR - it abuses the SDR1
+     * slot in kvm_sregs to communicate the userspace address of the
+     * HPT
+     */
+    if (kvm_enabled()) {
+        env->spr[SPR_SDR1] = (target_ulong)(uintptr_t)spapr->htab
+            | (spapr->htab_shift - 18);
+        if (kvmppc_put_books_sregs(cpu) < 0) {
+            error_report("Unable to update SDR1 in KVM");
+            exit(1);
+        }
+    }
 }
 
 static void spapr_cpu_destroy(PowerPCCPU *cpu)
 {
     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
 
-    xics_cpu_destroy(spapr->xics, cpu);
+    xics_cpu_destroy(XICS_FABRIC(spapr), cpu);
     qemu_unregister_reset(spapr_cpu_reset, cpu);
 }
 
@@ -57,8 +70,7 @@ static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu,
     cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ);
 
     /* Enable PAPR mode in TCG or KVM */
-    cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr));
-    cpu_ppc_set_papr(cpu);
+    cpu_ppc_set_papr(cpu, PPC_VIRTUAL_HYPERVISOR(spapr));
 
     if (cpu->max_compat) {
         Error *local_err = NULL;
@@ -76,7 +88,7 @@ static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu,
             cs->numa_node = i;
     }
 
-    xics_cpu_setup(spapr->xics, cpu);
+    xics_cpu_setup(XICS_FABRIC(spapr), cpu);
 
     qemu_register_reset(spapr_cpu_reset, cpu);
     spapr_cpu_reset(cpu);
@@ -109,13 +121,12 @@ char *spapr_get_cpu_core_type(const char *model)
     return core_type;
 }
 
-static void spapr_core_release(DeviceState *dev, void *opaque)
+static void spapr_cpu_core_unrealizefn(DeviceState *dev, Error **errp)
 {
     sPAPRCPUCore *sc = SPAPR_CPU_CORE(OBJECT(dev));
     sPAPRCPUCoreClass *scc = SPAPR_CPU_CORE_GET_CLASS(OBJECT(dev));
     const char *typename = object_class_get_name(scc->cpu_class);
     size_t size = object_type_get_instance_size(typename);
-    sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     CPUCore *cc = CPU_CORE(dev);
     int i;
 
@@ -129,140 +140,7 @@ static void spapr_core_release(DeviceState *dev, void *opaque)
         cpu_remove_sync(cs);
         object_unparent(obj);
     }
-
-    spapr->cores[cc->core_id / smp_threads] = NULL;
-
     g_free(sc->threads);
-    object_unparent(OBJECT(dev));
-}
-
-void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
-                       Error **errp)
-{
-    CPUCore *cc = CPU_CORE(dev);
-    int smt = kvmppc_smt_threads();
-    int index = cc->core_id / smp_threads;
-    sPAPRDRConnector *drc =
-        spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, index * smt);
-    sPAPRDRConnectorClass *drck;
-    Error *local_err = NULL;
-
-    if (index == 0) {
-        error_setg(errp, "Boot CPU core may not be unplugged");
-        return;
-    }
-
-    g_assert(drc);
-
-    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
-    drck->detach(drc, dev, spapr_core_release, NULL, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return;
-    }
-
-    spapr_hotplug_req_remove_by_index(drc);
-}
-
-void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
-                     Error **errp)
-{
-    sPAPRMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
-    MachineClass *mc = MACHINE_GET_CLASS(spapr);
-    sPAPRCPUCore *core = SPAPR_CPU_CORE(OBJECT(dev));
-    CPUCore *cc = CPU_CORE(dev);
-    CPUState *cs = CPU(core->threads);
-    sPAPRDRConnector *drc;
-    Error *local_err = NULL;
-    void *fdt = NULL;
-    int fdt_offset = 0;
-    int index = cc->core_id / smp_threads;
-    int smt = kvmppc_smt_threads();
-
-    drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, index * smt);
-    spapr->cores[index] = OBJECT(dev);
-
-    g_assert(drc || !mc->query_hotpluggable_cpus);
-
-    /*
-     * Setup CPU DT entries only for hotplugged CPUs. For boot time or
-     * coldplugged CPUs DT entries are setup in spapr_build_fdt().
-     */
-    if (dev->hotplugged) {
-        fdt = spapr_populate_hotplug_cpu_dt(cs, &fdt_offset, spapr);
-    }
-
-    if (drc) {
-        sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
-        drck->attach(drc, dev, fdt, fdt_offset, !dev->hotplugged, &local_err);
-        if (local_err) {
-            g_free(fdt);
-            spapr->cores[index] = NULL;
-            error_propagate(errp, local_err);
-            return;
-        }
-    }
-
-    if (dev->hotplugged) {
-        /*
-         * Send hotplug notification interrupt to the guest only in case
-         * of hotplugged CPUs.
-         */
-        spapr_hotplug_req_add_by_index(drc);
-    } else {
-        /*
-         * Set the right DRC states for cold plugged CPU.
-         */
-        if (drc) {
-            sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
-            drck->set_allocation_state(drc, SPAPR_DR_ALLOCATION_STATE_USABLE);
-            drck->set_isolation_state(drc, SPAPR_DR_ISOLATION_STATE_UNISOLATED);
-        }
-    }
-}
-
-void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
-                         Error **errp)
-{
-    MachineState *machine = MACHINE(OBJECT(hotplug_dev));
-    MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev);
-    sPAPRMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
-    int spapr_max_cores = max_cpus / smp_threads;
-    int index;
-    Error *local_err = NULL;
-    CPUCore *cc = CPU_CORE(dev);
-    char *base_core_type = spapr_get_cpu_core_type(machine->cpu_model);
-    const char *type = object_get_typename(OBJECT(dev));
-
-    if (dev->hotplugged && !mc->query_hotpluggable_cpus) {
-        error_setg(&local_err, "CPU hotplug not supported for this machine");
-        goto out;
-    }
-
-    if (strcmp(base_core_type, type)) {
-        error_setg(&local_err, "CPU core type should be %s", base_core_type);
-        goto out;
-    }
-
-    if (cc->core_id % smp_threads) {
-        error_setg(&local_err, "invalid core id %d", cc->core_id);
-        goto out;
-    }
-
-    index = cc->core_id / smp_threads;
-    if (index < 0 || index >= spapr_max_cores) {
-        error_setg(&local_err, "core id %d out of range", cc->core_id);
-        goto out;
-    }
-
-    if (spapr->cores[index]) {
-        error_setg(&local_err, "core %d already populated", cc->core_id);
-        goto out;
-    }
-
-out:
-    g_free(base_core_type);
-    error_propagate(errp, local_err);
 }
 
 static void spapr_cpu_core_realize_child(Object *child, Error **errp)
@@ -368,6 +246,7 @@ void spapr_cpu_core_class_init(ObjectClass *oc, void *data)
     sPAPRCPUCoreClass *scc = SPAPR_CPU_CORE_CLASS(oc);
 
     dc->realize = spapr_cpu_core_realize;
+    dc->unrealize = spapr_cpu_core_unrealizefn;
     scc->cpu_class = cpu_class_by_name(TYPE_POWERPC_CPU, data);
     g_assert(scc->cpu_class);
 }
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index f85a9c32a7..24a5758e62 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -481,7 +481,7 @@ static void spapr_powerdown_req(Notifier *n, void *opaque)
 
     rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow, true);
 
-    qemu_irq_pulse(xics_get_qirq(spapr->xics,
+    qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr),
                                  rtas_event_log_to_irq(spapr,
                                                        RTAS_LOG_TYPE_EPOW)));
 }
@@ -574,7 +574,7 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
 
     rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true);
 
-    qemu_irq_pulse(xics_get_qirq(spapr->xics,
+    qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr),
                                  rtas_event_log_to_irq(spapr,
                                                        RTAS_LOG_TYPE_HOTPLUG)));
 }
@@ -695,7 +695,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                 spapr_event_sources_get_source(spapr->event_sources, i);
 
             g_assert(source->enabled);
-            qemu_irq_pulse(xics_get_qirq(spapr->xics, source->irq));
+            qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), source->irq));
         }
     }
 
@@ -752,7 +752,7 @@ void spapr_events_init(sPAPRMachineState *spapr)
     spapr->event_sources = spapr_event_sources_new();
 
     spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_EPOW,
-                                 xics_spapr_alloc(spapr->xics, 0, false,
+                                 spapr_ics_alloc(spapr->ics, 0, false,
                                                   &error_fatal));
 
     /* NOTE: if machine supports modern/dedicated hotplug event source,
@@ -765,7 +765,7 @@ void spapr_events_init(sPAPRMachineState *spapr)
      */
     if (spapr->use_hotplug_event_source) {
         spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_HOT_PLUG,
-                                     xics_spapr_alloc(spapr->xics, 0, false,
+                                     spapr_ics_alloc(spapr->ics, 0, false,
                                                       &error_fatal));
     }
 
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 42d20e0b92..f05a90ed2c 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -47,12 +47,12 @@ static bool has_spr(PowerPCCPU *cpu, int spr)
     return cpu->env.spr_cb[spr].name != NULL;
 }
 
-static inline bool valid_pte_index(CPUPPCState *env, target_ulong pte_index)
+static inline bool valid_ptex(PowerPCCPU *cpu, target_ulong ptex)
 {
     /*
-     * hash value/pteg group index is normalized by htab_mask
+     * hash value/pteg group index is normalized by HPT mask
      */
-    if (((pte_index & ~7ULL) / HPTES_PER_GROUP) & ~env->htab_mask) {
+    if (((ptex & ~7ULL) / HPTES_PER_GROUP) & ~ppc_hash64_hpt_mask(cpu)) {
         return false;
     }
     return true;
@@ -77,15 +77,14 @@ static bool is_ram_address(sPAPRMachineState *spapr, hwaddr addr)
 static target_ulong h_enter(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                             target_ulong opcode, target_ulong *args)
 {
-    CPUPPCState *env = &cpu->env;
     target_ulong flags = args[0];
-    target_ulong pte_index = args[1];
+    target_ulong ptex = args[1];
     target_ulong pteh = args[2];
     target_ulong ptel = args[3];
     unsigned apshift;
     target_ulong raddr;
-    target_ulong index;
-    uint64_t token;
+    target_ulong slot;
+    const ppc_hash_pte64_t *hptes;
 
     apshift = ppc_hash64_hpte_page_shift_noslb(cpu, pteh, ptel);
     if (!apshift) {
@@ -116,36 +115,36 @@ static target_ulong h_enter(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 
     pteh &= ~0x60ULL;
 
-    if (!valid_pte_index(env, pte_index)) {
+    if (!valid_ptex(cpu, ptex)) {
         return H_PARAMETER;
     }
 
-    index = 0;
+    slot = ptex & 7ULL;
+    ptex = ptex & ~7ULL;
+
     if (likely((flags & H_EXACT) == 0)) {
-        pte_index &= ~7ULL;
-        token = ppc_hash64_start_access(cpu, pte_index);
-        for (; index < 8; index++) {
-            if (!(ppc_hash64_load_hpte0(cpu, token, index) & HPTE64_V_VALID)) {
+        hptes = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
+        for (slot = 0; slot < 8; slot++) {
+            if (!(ppc_hash64_hpte0(cpu, hptes, slot) & HPTE64_V_VALID)) {
                 break;
             }
         }
-        ppc_hash64_stop_access(cpu, token);
-        if (index == 8) {
+        ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
+        if (slot == 8) {
             return H_PTEG_FULL;
         }
     } else {
-        token = ppc_hash64_start_access(cpu, pte_index);
-        if (ppc_hash64_load_hpte0(cpu, token, 0) & HPTE64_V_VALID) {
-            ppc_hash64_stop_access(cpu, token);
+        hptes = ppc_hash64_map_hptes(cpu, ptex + slot, 1);
+        if (ppc_hash64_hpte0(cpu, hptes, 0) & HPTE64_V_VALID) {
+            ppc_hash64_unmap_hptes(cpu, hptes, ptex + slot, 1);
             return H_PTEG_FULL;
         }
-        ppc_hash64_stop_access(cpu, token);
+        ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
     }
 
-    ppc_hash64_store_hpte(cpu, pte_index + index,
-                          pteh | HPTE64_V_HPTE_DIRTY, ptel);
+    ppc_hash64_store_hpte(cpu, ptex + slot, pteh | HPTE64_V_HPTE_DIRTY, ptel);
 
-    args[0] = pte_index + index;
+    args[0] = ptex + slot;
     return H_SUCCESS;
 }
 
@@ -161,18 +160,17 @@ static RemoveResult remove_hpte(PowerPCCPU *cpu, target_ulong ptex,
                                 target_ulong flags,
                                 target_ulong *vp, target_ulong *rp)
 {
-    CPUPPCState *env = &cpu->env;
-    uint64_t token;
+    const ppc_hash_pte64_t *hptes;
     target_ulong v, r;
 
-    if (!valid_pte_index(env, ptex)) {
+    if (!valid_ptex(cpu, ptex)) {
         return REMOVE_PARM;
     }
 
-    token = ppc_hash64_start_access(cpu, ptex);
-    v = ppc_hash64_load_hpte0(cpu, token, 0);
-    r = ppc_hash64_load_hpte1(cpu, token, 0);
-    ppc_hash64_stop_access(cpu, token);
+    hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
+    v = ppc_hash64_hpte0(cpu, hptes, 0);
+    r = ppc_hash64_hpte1(cpu, hptes, 0);
+    ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
 
     if ((v & HPTE64_V_VALID) == 0 ||
         ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) ||
@@ -191,11 +189,11 @@ static target_ulong h_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 {
     CPUPPCState *env = &cpu->env;
     target_ulong flags = args[0];
-    target_ulong pte_index = args[1];
+    target_ulong ptex = args[1];
     target_ulong avpn = args[2];
     RemoveResult ret;
 
-    ret = remove_hpte(cpu, pte_index, avpn, flags,
+    ret = remove_hpte(cpu, ptex, avpn, flags,
                       &args[0], &args[1]);
 
     switch (ret) {
@@ -291,19 +289,19 @@ static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 {
     CPUPPCState *env = &cpu->env;
     target_ulong flags = args[0];
-    target_ulong pte_index = args[1];
+    target_ulong ptex = args[1];
     target_ulong avpn = args[2];
-    uint64_t token;
+    const ppc_hash_pte64_t *hptes;
     target_ulong v, r;
 
-    if (!valid_pte_index(env, pte_index)) {
+    if (!valid_ptex(cpu, ptex)) {
         return H_PARAMETER;
     }
 
-    token = ppc_hash64_start_access(cpu, pte_index);
-    v = ppc_hash64_load_hpte0(cpu, token, 0);
-    r = ppc_hash64_load_hpte1(cpu, token, 0);
-    ppc_hash64_stop_access(cpu, token);
+    hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
+    v = ppc_hash64_hpte0(cpu, hptes, 0);
+    r = ppc_hash64_hpte1(cpu, hptes, 0);
+    ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
 
     if ((v & HPTE64_V_VALID) == 0 ||
         ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) {
@@ -315,36 +313,35 @@ static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     r |= (flags << 55) & HPTE64_R_PP0;
     r |= (flags << 48) & HPTE64_R_KEY_HI;
     r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO);
-    ppc_hash64_store_hpte(cpu, pte_index,
+    ppc_hash64_store_hpte(cpu, ptex,
                           (v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0);
-    ppc_hash64_tlb_flush_hpte(cpu, pte_index, v, r);
+    ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
     /* Flush the tlb */
     check_tlb_flush(env, true);
     /* Don't need a memory barrier, due to qemu's global lock */
-    ppc_hash64_store_hpte(cpu, pte_index, v | HPTE64_V_HPTE_DIRTY, r);
+    ppc_hash64_store_hpte(cpu, ptex, v | HPTE64_V_HPTE_DIRTY, r);
     return H_SUCCESS;
 }
 
 static target_ulong h_read(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                            target_ulong opcode, target_ulong *args)
 {
-    CPUPPCState *env = &cpu->env;
     target_ulong flags = args[0];
-    target_ulong pte_index = args[1];
+    target_ulong ptex = args[1];
     uint8_t *hpte;
     int i, ridx, n_entries = 1;
 
-    if (!valid_pte_index(env, pte_index)) {
+    if (!valid_ptex(cpu, ptex)) {
         return H_PARAMETER;
     }
 
     if (flags & H_READ_4) {
         /* Clear the two low order bits */
-        pte_index &= ~(3ULL);
+        ptex &= ~(3ULL);
         n_entries = 4;
     }
 
-    hpte = env->external_htab + (pte_index * HASH_PTE_SIZE_64);
+    hpte = spapr->htab + (ptex * HASH_PTE_SIZE_64);
 
     for (i = 0, ridx = 0; i < n_entries; i++) {
         args[ridx++] = ldq_p(hpte);
diff --git a/hw/ppc/spapr_ovec.c b/hw/ppc/spapr_ovec.c
index 3eb1d5976f..41df4c35ba 100644
--- a/hw/ppc/spapr_ovec.c
+++ b/hw/ppc/spapr_ovec.c
@@ -16,18 +16,9 @@
 #include "qemu/bitmap.h"
 #include "exec/address-spaces.h"
 #include "qemu/error-report.h"
+#include "trace.h"
 #include <libfdt.h>
 
-/* #define DEBUG_SPAPR_OVEC */
-
-#ifdef DEBUG_SPAPR_OVEC
-#define DPRINTFN(fmt, ...) \
-    do { fprintf(stderr, fmt "\n", ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTFN(fmt, ...) \
-    do { } while (0)
-#endif
-
 #define OV_MAXBYTES 256 /* not including length byte */
 #define OV_MAXBITS (OV_MAXBYTES * BITS_PER_BYTE)
 
@@ -210,8 +201,7 @@ sPAPROptionVector *spapr_ovec_parse_vector(target_ulong table_addr, int vector)
     for (i = 0; i < vector_len; i++) {
         uint8_t entry = ldub_phys(&address_space_memory, addr + i);
         if (entry) {
-            DPRINTFN("read guest vector %2d, byte %3d / %3d: 0x%.2x",
-                     vector, i + 1, vector_len, entry);
+            trace_spapr_ovec_parse_vector(vector, i + 1, vector_len, entry);
             guest_byte_to_bitmap(entry, ov->bitmap, i * BITS_PER_BYTE);
         }
     }
@@ -245,10 +235,9 @@ int spapr_ovec_populate_dt(void *fdt, int fdt_offset,
     for (i = 1; i < vec_len + 1; i++) {
         vec[i] = guest_byte_from_bitmap(ov->bitmap, (i - 1) * BITS_PER_BYTE);
         if (vec[i]) {
-            DPRINTFN("encoding guest vector byte %3d / %3d: 0x%.2x",
-                     i, vec_len, vec[i]);
+            trace_spapr_ovec_populate_dt(i, vec_len, vec[i]);
         }
     }
 
-    return fdt_setprop(fdt, fdt_offset, name, vec, vec_len);
+    return fdt_setprop(fdt, fdt_offset, name, vec, vec_len + 1);
 }
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index fd6fc1d953..2a3499eaf8 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -43,6 +43,7 @@
 
 #include "hw/pci/pci_bridge.h"
 #include "hw/pci/pci_bus.h"
+#include "hw/pci/pci_ids.h"
 #include "hw/ppc/spapr_drc.h"
 #include "sysemu/device_tree.h"
 #include "sysemu/kvm.h"
@@ -325,7 +326,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
             return;
         }
 
-        xics_spapr_free(spapr->xics, msi->first_irq, msi->num);
+        spapr_ics_free(spapr->ics, msi->first_irq, msi->num);
         if (msi_present(pdev)) {
             spapr_msi_setmsg(pdev, 0, false, 0, 0);
         }
@@ -363,7 +364,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     }
 
     /* Allocate MSIs */
-    irq = xics_spapr_alloc_block(spapr->xics, req_num, false,
+    irq = spapr_ics_alloc_block(spapr->ics, req_num, false,
                            ret_intr_type == RTAS_TYPE_MSI, &err);
     if (err) {
         error_reportf_err(err, "Can't allocate MSIs for device %x: ",
@@ -374,7 +375,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 
     /* Release previous MSIs */
     if (msi) {
-        xics_spapr_free(spapr->xics, msi->first_irq, msi->num);
+        spapr_ics_free(spapr->ics, msi->first_irq, msi->num);
         g_hash_table_remove(phb->msi, &config_addr);
     }
 
@@ -736,7 +737,7 @@ static void spapr_msi_write(void *opaque, hwaddr addr,
 
     trace_spapr_pci_msi_write(addr, data, irq);
 
-    qemu_irq_pulse(xics_get_qirq(spapr->xics, irq));
+    qemu_irq_pulse(xics_get_qirq(XICS_FABRIC(spapr), irq));
 }
 
 static const MemoryRegionOps spapr_msi_ops = {
@@ -946,6 +947,274 @@ static void populate_resource_props(PCIDevice *d, ResourceProps *rp)
     rp->assigned_len = assigned_idx * sizeof(ResourceFields);
 }
 
+typedef struct PCIClass PCIClass;
+typedef struct PCISubClass PCISubClass;
+typedef struct PCIIFace PCIIFace;
+
+struct PCIIFace {
+    int iface;
+    const char *name;
+};
+
+struct PCISubClass {
+    int subclass;
+    const char *name;
+    const PCIIFace *iface;
+};
+
+struct PCIClass {
+    const char *name;
+    const PCISubClass *subc;
+};
+
+static const PCISubClass undef_subclass[] = {
+    { PCI_CLASS_NOT_DEFINED_VGA, "display", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass mass_subclass[] = {
+    { PCI_CLASS_STORAGE_SCSI, "scsi", NULL },
+    { PCI_CLASS_STORAGE_IDE, "ide", NULL },
+    { PCI_CLASS_STORAGE_FLOPPY, "fdc", NULL },
+    { PCI_CLASS_STORAGE_IPI, "ipi", NULL },
+    { PCI_CLASS_STORAGE_RAID, "raid", NULL },
+    { PCI_CLASS_STORAGE_ATA, "ata", NULL },
+    { PCI_CLASS_STORAGE_SATA, "sata", NULL },
+    { PCI_CLASS_STORAGE_SAS, "sas", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass net_subclass[] = {
+    { PCI_CLASS_NETWORK_ETHERNET, "ethernet", NULL },
+    { PCI_CLASS_NETWORK_TOKEN_RING, "token-ring", NULL },
+    { PCI_CLASS_NETWORK_FDDI, "fddi", NULL },
+    { PCI_CLASS_NETWORK_ATM, "atm", NULL },
+    { PCI_CLASS_NETWORK_ISDN, "isdn", NULL },
+    { PCI_CLASS_NETWORK_WORLDFIP, "worldfip", NULL },
+    { PCI_CLASS_NETWORK_PICMG214, "picmg", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass displ_subclass[] = {
+    { PCI_CLASS_DISPLAY_VGA, "vga", NULL },
+    { PCI_CLASS_DISPLAY_XGA, "xga", NULL },
+    { PCI_CLASS_DISPLAY_3D, "3d-controller", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass media_subclass[] = {
+    { PCI_CLASS_MULTIMEDIA_VIDEO, "video", NULL },
+    { PCI_CLASS_MULTIMEDIA_AUDIO, "sound", NULL },
+    { PCI_CLASS_MULTIMEDIA_PHONE, "telephony", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass mem_subclass[] = {
+    { PCI_CLASS_MEMORY_RAM, "memory", NULL },
+    { PCI_CLASS_MEMORY_FLASH, "flash", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass bridg_subclass[] = {
+    { PCI_CLASS_BRIDGE_HOST, "host", NULL },
+    { PCI_CLASS_BRIDGE_ISA, "isa", NULL },
+    { PCI_CLASS_BRIDGE_EISA, "eisa", NULL },
+    { PCI_CLASS_BRIDGE_MC, "mca", NULL },
+    { PCI_CLASS_BRIDGE_PCI, "pci", NULL },
+    { PCI_CLASS_BRIDGE_PCMCIA, "pcmcia", NULL },
+    { PCI_CLASS_BRIDGE_NUBUS, "nubus", NULL },
+    { PCI_CLASS_BRIDGE_CARDBUS, "cardbus", NULL },
+    { PCI_CLASS_BRIDGE_RACEWAY, "raceway", NULL },
+    { PCI_CLASS_BRIDGE_PCI_SEMITP, "semi-transparent-pci", NULL },
+    { PCI_CLASS_BRIDGE_IB_PCI, "infiniband", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass comm_subclass[] = {
+    { PCI_CLASS_COMMUNICATION_SERIAL, "serial", NULL },
+    { PCI_CLASS_COMMUNICATION_PARALLEL, "parallel", NULL },
+    { PCI_CLASS_COMMUNICATION_MULTISERIAL, "multiport-serial", NULL },
+    { PCI_CLASS_COMMUNICATION_MODEM, "modem", NULL },
+    { PCI_CLASS_COMMUNICATION_GPIB, "gpib", NULL },
+    { PCI_CLASS_COMMUNICATION_SC, "smart-card", NULL },
+    { 0xFF, NULL, NULL, },
+};
+
+static const PCIIFace pic_iface[] = {
+    { PCI_CLASS_SYSTEM_PIC_IOAPIC, "io-apic" },
+    { PCI_CLASS_SYSTEM_PIC_IOXAPIC, "io-xapic" },
+    { 0xFF, NULL },
+};
+
+static const PCISubClass sys_subclass[] = {
+    { PCI_CLASS_SYSTEM_PIC, "interrupt-controller", pic_iface },
+    { PCI_CLASS_SYSTEM_DMA, "dma-controller", NULL },
+    { PCI_CLASS_SYSTEM_TIMER, "timer", NULL },
+    { PCI_CLASS_SYSTEM_RTC, "rtc", NULL },
+    { PCI_CLASS_SYSTEM_PCI_HOTPLUG, "hot-plug-controller", NULL },
+    { PCI_CLASS_SYSTEM_SDHCI, "sd-host-controller", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass inp_subclass[] = {
+    { PCI_CLASS_INPUT_KEYBOARD, "keyboard", NULL },
+    { PCI_CLASS_INPUT_PEN, "pen", NULL },
+    { PCI_CLASS_INPUT_MOUSE, "mouse", NULL },
+    { PCI_CLASS_INPUT_SCANNER, "scanner", NULL },
+    { PCI_CLASS_INPUT_GAMEPORT, "gameport", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass dock_subclass[] = {
+    { PCI_CLASS_DOCKING_GENERIC, "dock", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass cpu_subclass[] = {
+    { PCI_CLASS_PROCESSOR_PENTIUM, "pentium", NULL },
+    { PCI_CLASS_PROCESSOR_POWERPC, "powerpc", NULL },
+    { PCI_CLASS_PROCESSOR_MIPS, "mips", NULL },
+    { PCI_CLASS_PROCESSOR_CO, "co-processor", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCIIFace usb_iface[] = {
+    { PCI_CLASS_SERIAL_USB_UHCI, "usb-uhci" },
+    { PCI_CLASS_SERIAL_USB_OHCI, "usb-ohci", },
+    { PCI_CLASS_SERIAL_USB_EHCI, "usb-ehci" },
+    { PCI_CLASS_SERIAL_USB_XHCI, "usb-xhci" },
+    { PCI_CLASS_SERIAL_USB_UNKNOWN, "usb-unknown" },
+    { PCI_CLASS_SERIAL_USB_DEVICE, "usb-device" },
+    { 0xFF, NULL },
+};
+
+static const PCISubClass ser_subclass[] = {
+    { PCI_CLASS_SERIAL_FIREWIRE, "firewire", NULL },
+    { PCI_CLASS_SERIAL_ACCESS, "access-bus", NULL },
+    { PCI_CLASS_SERIAL_SSA, "ssa", NULL },
+    { PCI_CLASS_SERIAL_USB, "usb", usb_iface },
+    { PCI_CLASS_SERIAL_FIBER, "fibre-channel", NULL },
+    { PCI_CLASS_SERIAL_SMBUS, "smb", NULL },
+    { PCI_CLASS_SERIAL_IB, "infiniband", NULL },
+    { PCI_CLASS_SERIAL_IPMI, "ipmi", NULL },
+    { PCI_CLASS_SERIAL_SERCOS, "sercos", NULL },
+    { PCI_CLASS_SERIAL_CANBUS, "canbus", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass wrl_subclass[] = {
+    { PCI_CLASS_WIRELESS_IRDA, "irda", NULL },
+    { PCI_CLASS_WIRELESS_CIR, "consumer-ir", NULL },
+    { PCI_CLASS_WIRELESS_RF_CONTROLLER, "rf-controller", NULL },
+    { PCI_CLASS_WIRELESS_BLUETOOTH, "bluetooth", NULL },
+    { PCI_CLASS_WIRELESS_BROADBAND, "broadband", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass sat_subclass[] = {
+    { PCI_CLASS_SATELLITE_TV, "satellite-tv", NULL },
+    { PCI_CLASS_SATELLITE_AUDIO, "satellite-audio", NULL },
+    { PCI_CLASS_SATELLITE_VOICE, "satellite-voice", NULL },
+    { PCI_CLASS_SATELLITE_DATA, "satellite-data", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass crypt_subclass[] = {
+    { PCI_CLASS_CRYPT_NETWORK, "network-encryption", NULL },
+    { PCI_CLASS_CRYPT_ENTERTAINMENT,
+      "entertainment-encryption", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass spc_subclass[] = {
+    { PCI_CLASS_SP_DPIO, "dpio", NULL },
+    { PCI_CLASS_SP_PERF, "counter", NULL },
+    { PCI_CLASS_SP_SYNCH, "measurement", NULL },
+    { PCI_CLASS_SP_MANAGEMENT, "management-card", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCIClass pci_classes[] = {
+    { "legacy-device", undef_subclass },
+    { "mass-storage",  mass_subclass },
+    { "network", net_subclass },
+    { "display", displ_subclass, },
+    { "multimedia-device", media_subclass },
+    { "memory-controller", mem_subclass },
+    { "unknown-bridge", bridg_subclass },
+    { "communication-controller", comm_subclass},
+    { "system-peripheral", sys_subclass },
+    { "input-controller", inp_subclass },
+    { "docking-station", dock_subclass },
+    { "cpu", cpu_subclass },
+    { "serial-bus", ser_subclass },
+    { "wireless-controller", wrl_subclass },
+    { "intelligent-io", NULL },
+    { "satellite-device", sat_subclass },
+    { "encryption", crypt_subclass },
+    { "data-processing-controller", spc_subclass },
+};
+
+static const char *pci_find_device_name(uint8_t class, uint8_t subclass,
+                                        uint8_t iface)
+{
+    const PCIClass *pclass;
+    const PCISubClass *psubclass;
+    const PCIIFace *piface;
+    const char *name;
+
+    if (class >= ARRAY_SIZE(pci_classes)) {
+        return "pci";
+    }
+
+    pclass = pci_classes + class;
+    name = pclass->name;
+
+    if (pclass->subc == NULL) {
+        return name;
+    }
+
+    psubclass = pclass->subc;
+    while ((psubclass->subclass & 0xff) != 0xff) {
+        if ((psubclass->subclass & 0xff) == subclass) {
+            name = psubclass->name;
+            break;
+        }
+        psubclass++;
+    }
+
+    piface = psubclass->iface;
+    if (piface == NULL) {
+        return name;
+    }
+    while ((piface->iface & 0xff) != 0xff) {
+        if ((piface->iface & 0xff) == iface) {
+            name = piface->name;
+            break;
+        }
+        piface++;
+    }
+
+    return name;
+}
+
+static void pci_get_node_name(char *nodename, int len, PCIDevice *dev)
+{
+    int slot = PCI_SLOT(dev->devfn);
+    int func = PCI_FUNC(dev->devfn);
+    uint32_t ccode = pci_default_read_config(dev, PCI_CLASS_PROG, 3);
+    const char *name;
+
+    name = pci_find_device_name((ccode >> 16) & 0xff, (ccode >> 8) & 0xff,
+                                ccode & 0xff);
+
+    if (func != 0) {
+        snprintf(nodename, len, "%s@%x,%x", name, slot, func);
+    } else {
+        snprintf(nodename, len, "%s@%x", name, slot);
+    }
+}
+
 static uint32_t spapr_phb_get_pci_drc_index(sPAPRPHBState *phb,
                                             PCIDevice *pdev);
 
@@ -957,6 +1226,7 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
     int pci_status, err;
     char *buf = NULL;
     uint32_t drc_index = spapr_phb_get_pci_drc_index(sphb, dev);
+    uint32_t ccode = pci_default_read_config(dev, PCI_CLASS_PROG, 3);
     uint32_t max_msi, max_msix;
 
     if (pci_default_read_config(dev, PCI_HEADER_TYPE, 1) ==
@@ -971,8 +1241,7 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
                           pci_default_read_config(dev, PCI_DEVICE_ID, 2)));
     _FDT(fdt_setprop_cell(fdt, offset, "revision-id",
                           pci_default_read_config(dev, PCI_REVISION_ID, 1)));
-    _FDT(fdt_setprop_cell(fdt, offset, "class-code",
-                          pci_default_read_config(dev, PCI_CLASS_PROG, 3)));
+    _FDT(fdt_setprop_cell(fdt, offset, "class-code", ccode));
     if (pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1)) {
         _FDT(fdt_setprop_cell(fdt, offset, "interrupts",
                  pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1)));
@@ -1013,11 +1282,10 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
         _FDT(fdt_setprop(fdt, offset, "udf-supported", NULL, 0));
     }
 
-    /* NOTE: this is normally generated by firmware via path/unit name,
-     * but in our case we must set it manually since it does not get
-     * processed by OF beforehand
-     */
-    _FDT(fdt_setprop_string(fdt, offset, "name", "pci"));
+    _FDT(fdt_setprop_string(fdt, offset, "name",
+                            pci_find_device_name((ccode >> 16) & 0xff,
+                                                 (ccode >> 8) & 0xff,
+                                                 ccode & 0xff)));
     buf = spapr_phb_get_loc_code(sphb, dev);
     if (!buf) {
         error_report("Failed setting the ibm,loc-code");
@@ -1061,15 +1329,9 @@ static int spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev,
                                      void *fdt, int node_offset)
 {
     int offset, ret;
-    int slot = PCI_SLOT(dev->devfn);
-    int func = PCI_FUNC(dev->devfn);
     char nodename[FDT_NAME_MAX];
 
-    if (func != 0) {
-        snprintf(nodename, FDT_NAME_MAX, "pci@%x,%x", slot, func);
-    } else {
-        snprintf(nodename, FDT_NAME_MAX, "pci@%x", slot);
-    }
+    pci_get_node_name(nodename, FDT_NAME_MAX, dev);
     offset = fdt_add_subnode(fdt, node_offset, nodename);
     ret = spapr_populate_pci_child_dt(dev, fdt, offset, phb);
 
@@ -1485,7 +1747,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
         uint32_t irq;
         Error *local_err = NULL;
 
-        irq = xics_spapr_alloc_block(spapr->xics, 1, true, false, &local_err);
+        irq = spapr_ics_alloc_block(spapr->ics, 1, true, false, &local_err);
         if (local_err) {
             error_propagate(errp, local_err);
             error_prepend(errp, "can't allocate LSIs: ");
@@ -1782,9 +2044,9 @@ static void spapr_populate_pci_devices_dt(PCIBus *bus, PCIDevice *pdev,
     s_fdt.fdt = p->fdt;
     s_fdt.node_off = offset;
     s_fdt.sphb = p->sphb;
-    pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
-                        spapr_populate_pci_devices_dt,
-                        &s_fdt);
+    pci_for_each_device_reverse(sec_bus, pci_bus_num(sec_bus),
+                                spapr_populate_pci_devices_dt,
+                                &s_fdt);
 }
 
 static void spapr_phb_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev,
@@ -1953,9 +2215,9 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
     s_fdt.fdt = fdt;
     s_fdt.node_off = bus_off;
     s_fdt.sphb = phb;
-    pci_for_each_device(bus, pci_bus_num(bus),
-                        spapr_populate_pci_devices_dt,
-                        &s_fdt);
+    pci_for_each_device_reverse(bus, pci_bus_num(bus),
+                                spapr_populate_pci_devices_dt,
+                                &s_fdt);
 
     ret = spapr_drc_populate_dt(fdt, bus_off, OBJECT(phb),
                                 SPAPR_DR_CONNECTOR_TYPE_PCI);
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index 8bfc5f971f..a0ee4fd265 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -454,7 +454,7 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp)
         dev->qdev.id = id;
     }
 
-    dev->irq = xics_spapr_alloc(spapr->xics, dev->irq, false, &local_err);
+    dev->irq = spapr_ics_alloc(spapr->ics, dev->irq, false, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         return;
diff --git a/hw/ppc/trace-events b/hw/ppc/trace-events
index f46995cdb2..43d265f351 100644
--- a/hw/ppc/trace-events
+++ b/hw/ppc/trace-events
@@ -56,6 +56,10 @@ spapr_drc_realize_child(uint32_t index, char *childname) "drc: 0x%"PRIx32", chil
 spapr_drc_realize_complete(uint32_t index) "drc: 0x%"PRIx32
 spapr_drc_unrealize(uint32_t index) "drc: 0x%"PRIx32
 
+# hw/ppc/spapr_ovec.c
+spapr_ovec_parse_vector(int vector, int byte, uint16_t vec_len, uint8_t entry) "read guest vector %2d, byte %3d / %3d: 0x%.2x"
+spapr_ovec_populate_dt(int byte, uint16_t vec_len, uint8_t entry) "encoding guest vector byte %3d / %3d: 0x%.2x"
+
 # hw/ppc/spapr_rtas.c
 spapr_rtas_set_indicator_invalid(uint32_t index) "sensor index: 0x%"PRIx32
 spapr_rtas_set_indicator_not_supported(uint32_t index, uint32_t type) "sensor index: 0x%"PRIx32", type: %"PRIu32
@@ -85,3 +89,11 @@ rs6000mc_presence_read(uint32_t addr, uint32_t val) "read addr=%x val=%x"
 rs6000mc_size_read(uint32_t addr, uint32_t val) "read addr=%x val=%x"
 rs6000mc_size_write(uint32_t addr, uint32_t val) "write addr=%x val=%x"
 rs6000mc_parity_read(uint32_t addr, uint32_t val) "read addr=%x val=%x"
+
+# hw/ppc/mac_newworld.c
+mac99_uninorth_write(uint64_t addr, uint64_t value) "addr=0x%" PRIx64 " val=0x%"PRIx64
+mac99_uninorth_read(uint64_t addr, uint64_t value) "addr=0x%" PRIx64 " val=0x%"PRIx64
+
+# hw/ppc/ppc4xx_pci.c
+ppc4xx_pci_map_irq(int32_t devfn, int irq_num, int slot) "devfn %x irq %d -> %d"
+ppc4xx_pci_set_irq(int irq_num) "PCI irq %d"
diff --git a/hw/s390x/css.c b/hw/s390x/css.c
index 0f2580d644..e32b2a4d42 100644
--- a/hw/s390x/css.c
+++ b/hw/s390x/css.c
@@ -368,13 +368,16 @@ static CCW1 copy_ccw_from_guest(hwaddr addr, bool fmt1)
         ret.cda = be32_to_cpu(tmp1.cda);
     } else {
         cpu_physical_memory_read(addr, &tmp0, sizeof(tmp0));
-        ret.cmd_code = tmp0.cmd_code;
-        ret.flags = tmp0.flags;
-        ret.count = be16_to_cpu(tmp0.count);
-        ret.cda = be16_to_cpu(tmp0.cda1) | (tmp0.cda0 << 16);
-        if ((ret.cmd_code & 0x0f) == CCW_CMD_TIC) {
-            ret.cmd_code &= 0x0f;
+        if ((tmp0.cmd_code & 0x0f) == CCW_CMD_TIC) {
+            ret.cmd_code = CCW_CMD_TIC;
+            ret.flags = 0;
+            ret.count = 0;
+        } else {
+            ret.cmd_code = tmp0.cmd_code;
+            ret.flags = tmp0.flags;
+            ret.count = be16_to_cpu(tmp0.count);
         }
+        ret.cda = be16_to_cpu(tmp0.cda1) | (tmp0.cda0 << 16);
     }
     return ret;
 }
diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c
index 2e2664f22e..7978c7d52a 100644
--- a/hw/s390x/ipl.c
+++ b/hw/s390x/ipl.c
@@ -20,6 +20,7 @@
 #include "hw/s390x/virtio-ccw.h"
 #include "hw/s390x/css.h"
 #include "ipl.h"
+#include "qemu/error-report.h"
 
 #define KERN_IMAGE_START                0x010000UL
 #define KERN_PARM_AREA                  0x010480UL
@@ -209,6 +210,7 @@ static Property s390_ipl_properties[] = {
     DEFINE_PROP_STRING("initrd", S390IPLState, initrd),
     DEFINE_PROP_STRING("cmdline", S390IPLState, cmdline),
     DEFINE_PROP_STRING("firmware", S390IPLState, firmware),
+    DEFINE_PROP_STRING("netboot_fw", S390IPLState, netboot_fw),
     DEFINE_PROP_BOOL("enforce_bios", S390IPLState, enforce_bios, false),
     DEFINE_PROP_BOOL("iplbext_migration", S390IPLState, iplbext_migration,
                      true),
@@ -226,6 +228,12 @@ static bool s390_gen_initial_iplb(S390IPLState *ipl)
                 TYPE_VIRTIO_CCW_DEVICE);
         SCSIDevice *sd = (SCSIDevice *) object_dynamic_cast(OBJECT(dev_st),
                                                             TYPE_SCSI_DEVICE);
+        VirtIONet *vn = (VirtIONet *) object_dynamic_cast(OBJECT(dev_st),
+                                                          TYPE_VIRTIO_NET);
+
+        if (vn) {
+            ipl->netboot = true;
+        }
         if (virtio_ccw_dev) {
             CcwDevice *ccw_dev = CCW_DEVICE(virtio_ccw_dev);
 
@@ -258,12 +266,86 @@ static bool s390_gen_initial_iplb(S390IPLState *ipl)
     return false;
 }
 
+static int load_netboot_image(Error **errp)
+{
+    S390IPLState *ipl = get_ipl_device();
+    char *netboot_filename;
+    MemoryRegion *sysmem =  get_system_memory();
+    MemoryRegion *mr = NULL;
+    void *ram_ptr = NULL;
+    int img_size = -1;
+
+    mr = memory_region_find(sysmem, 0, 1).mr;
+    if (!mr) {
+        error_setg(errp, "Failed to find memory region at address 0");
+        return -1;
+    }
+
+    ram_ptr = memory_region_get_ram_ptr(mr);
+    if (!ram_ptr) {
+        error_setg(errp, "No RAM found");
+        goto unref_mr;
+    }
+
+    netboot_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, ipl->netboot_fw);
+    if (netboot_filename == NULL) {
+        error_setg(errp, "Could not find network bootloader");
+        goto unref_mr;
+    }
+
+    img_size = load_elf_ram(netboot_filename, NULL, NULL, &ipl->start_addr,
+                            NULL, NULL, 1, EM_S390, 0, 0, NULL, false);
+
+    if (img_size < 0) {
+        img_size = load_image_size(netboot_filename, ram_ptr, ram_size);
+        ipl->start_addr = KERN_IMAGE_START;
+    }
+
+    if (img_size < 0) {
+        error_setg(errp, "Failed to load network bootloader");
+    }
+
+    g_free(netboot_filename);
+
+unref_mr:
+    memory_region_unref(mr);
+    return img_size;
+}
+
+static bool is_virtio_net_device(IplParameterBlock *iplb)
+{
+    uint8_t cssid;
+    uint8_t ssid;
+    uint16_t devno;
+    uint16_t schid;
+    SubchDev *sch = NULL;
+
+    if (iplb->pbt != S390_IPL_TYPE_CCW) {
+        return false;
+    }
+
+    devno = be16_to_cpu(iplb->ccw.devno);
+    ssid = iplb->ccw.ssid & 3;
+
+    for (schid = 0; schid < MAX_SCHID; schid++) {
+        for (cssid = 0; cssid < MAX_CSSID; cssid++) {
+            sch = css_find_subch(1, cssid, ssid, schid);
+
+            if (sch && sch->devno == devno) {
+                return sch->id.cu_model == VIRTIO_ID_NET;
+            }
+        }
+    }
+    return false;
+}
+
 void s390_ipl_update_diag308(IplParameterBlock *iplb)
 {
     S390IPLState *ipl = get_ipl_device();
 
     ipl->iplb = *iplb;
     ipl->iplb_valid = true;
+    ipl->netboot = is_virtio_net_device(iplb);
 }
 
 IplParameterBlock *s390_ipl_get_iplb(void)
@@ -287,6 +369,7 @@ void s390_reipl_request(void)
 void s390_ipl_prepare_cpu(S390CPU *cpu)
 {
     S390IPLState *ipl = get_ipl_device();
+    Error *err = NULL;
 
     cpu->env.psw.addr = ipl->start_addr;
     cpu->env.psw.mask = IPL_PSW_MASK;
@@ -297,6 +380,13 @@ void s390_ipl_prepare_cpu(S390CPU *cpu)
             ipl->iplb_valid = s390_gen_initial_iplb(ipl);
         }
     }
+    if (ipl->netboot) {
+        if (load_netboot_image(&err) < 0) {
+            error_report_err(err);
+            vm_stop(RUN_STATE_INTERNAL_ERROR);
+        }
+        ipl->iplb.ccw.netboot_start_addr = ipl->start_addr;
+    }
 }
 
 static void s390_ipl_reset(DeviceState *dev)
diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h
index c89109585a..46930e4c64 100644
--- a/hw/s390x/ipl.h
+++ b/hw/s390x/ipl.h
@@ -16,7 +16,8 @@
 #include "cpu.h"
 
 struct IplBlockCcw {
-    uint8_t  reserved0[85];
+    uint64_t netboot_start_addr;
+    uint8_t  reserved0[77];
     uint8_t  ssid;
     uint16_t devno;
     uint8_t  vm_flags;
@@ -100,12 +101,14 @@ struct S390IPLState {
     IplParameterBlock iplb;
     bool iplb_valid;
     bool reipl_requested;
+    bool netboot;
 
     /*< public >*/
     char *kernel;
     char *initrd;
     char *cmdline;
     char *firmware;
+    char *netboot_fw;
     uint8_t cssid;
     uint8_t ssid;
     uint16_t devno;
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index e9a676797a..40914fde6f 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -63,7 +63,7 @@ static int virtio_ccw_hcall_notify(const uint64_t *args)
     if (!sch || !css_subch_visible(sch)) {
         return -EINVAL;
     }
-    if (queue >= VIRTIO_CCW_QUEUE_MAX) {
+    if (queue >= VIRTIO_QUEUE_MAX) {
         return -EINVAL;
     }
     virtio_queue_notify(virtio_ccw_get_vdev(sch), queue);
@@ -116,7 +116,8 @@ static void ccw_init(MachineState *machine)
     /* get a BUS */
     css_bus = virtual_css_bus_init();
     s390_init_ipl_dev(machine->kernel_filename, machine->kernel_cmdline,
-                      machine->initrd_filename, "s390-ccw.img", true);
+                      machine->initrd_filename, "s390-ccw.img",
+                      "s390-netboot.img", true);
     s390_flic_init();
 
     dev = qdev_create(NULL, TYPE_S390_PCI_HOST_BRIDGE);
@@ -336,7 +337,12 @@ static const TypeInfo ccw_machine_info = {
     type_init(ccw_machine_register_##suffix)
 
 #define CCW_COMPAT_2_8 \
-        HW_COMPAT_2_8
+        HW_COMPAT_2_8 \
+        {\
+            .driver   = TYPE_S390_FLIC_COMMON,\
+            .property = "adapter_routes_max_batch",\
+            .value    = "64",\
+        },
 
 #define CCW_COMPAT_2_7 \
         HW_COMPAT_2_7
diff --git a/hw/s390x/s390-virtio.c b/hw/s390x/s390-virtio.c
index 7a3a7fe5fd..afa4148e6b 100644
--- a/hw/s390x/s390-virtio.c
+++ b/hw/s390x/s390-virtio.c
@@ -44,16 +44,6 @@
 #include "hw/s390x/ipl.h"
 #include "cpu.h"
 
-//#define DEBUG_S390
-
-#ifdef DEBUG_S390
-#define DPRINTF(fmt, ...) \
-    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTF(fmt, ...) \
-    do { } while (0)
-#endif
-
 #define MAX_BLK_DEVS                    10
 
 #define S390_TOD_CLOCK_VALUE_MISSING    0x00
@@ -75,6 +65,7 @@ void s390_init_ipl_dev(const char *kernel_filename,
                        const char *kernel_cmdline,
                        const char *initrd_filename,
                        const char *firmware,
+                       const char *netboot_fw,
                        bool enforce_bios)
 {
     Object *new = object_new(TYPE_S390_IPL);
@@ -88,6 +79,7 @@ void s390_init_ipl_dev(const char *kernel_filename,
     }
     qdev_prop_set_string(dev, "cmdline", kernel_cmdline);
     qdev_prop_set_string(dev, "firmware", firmware);
+    qdev_prop_set_string(dev, "netboot_fw", netboot_fw);
     qdev_prop_set_bit(dev, "enforce_bios", enforce_bios);
     object_property_add_child(qdev_get_machine(), TYPE_S390_IPL,
                               new, NULL);
diff --git a/hw/s390x/s390-virtio.h b/hw/s390x/s390-virtio.h
index f588b80a6e..f2377a3e0e 100644
--- a/hw/s390x/s390-virtio.h
+++ b/hw/s390x/s390-virtio.h
@@ -24,6 +24,7 @@ void s390_init_ipl_dev(const char *kernel_filename,
                        const char *kernel_cmdline,
                        const char *initrd_filename,
                        const char *firmware,
+                       const char *netboot_fw,
                        bool enforce_bios);
 void s390_create_virtio_net(BusState *bus, const char *name);
 void s390_nmi(NMIState *n, int cpu_index, Error **errp);
diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index 63c46373fb..00b3bde4e9 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -35,6 +35,8 @@
 #include "trace.h"
 #include "hw/s390x/css-bridge.h"
 
+#define NR_CLASSIC_INDICATOR_BITS 64
+
 static void virtio_ccw_bus_new(VirtioBusState *bus, size_t bus_size,
                                VirtioCcwDevice *dev);
 
@@ -126,7 +128,7 @@ static int virtio_ccw_set_vqs(SubchDev *sch, VqInfoBlock *info,
     uint16_t num = info ? info->num : linfo->num;
     uint64_t desc = info ? info->desc : linfo->queue;
 
-    if (index >= VIRTIO_CCW_QUEUE_MAX) {
+    if (index >= VIRTIO_QUEUE_MAX) {
         return -EINVAL;
     }
 
@@ -162,7 +164,7 @@ static int virtio_ccw_set_vqs(SubchDev *sch, VqInfoBlock *info,
         virtio_queue_set_vector(vdev, index, index);
     }
     /* tell notify handler in case of config change */
-    vdev->config_vector = VIRTIO_CCW_QUEUE_MAX;
+    vdev->config_vector = VIRTIO_QUEUE_MAX;
     return 0;
 }
 
@@ -280,6 +282,15 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
                                    ccw.cmd_code);
     check_len = !((ccw.flags & CCW_FLAG_SLI) && !(ccw.flags & CCW_FLAG_DC));
 
+    if (dev->force_revision_1 && dev->revision < 0 &&
+        ccw.cmd_code != CCW_CMD_SET_VIRTIO_REV) {
+        /*
+         * virtio-1 drivers must start with negotiating to a revision >= 1,
+         * so post a command reject for all other commands
+         */
+        return -ENOSYS;
+    }
+
     /* Look at the command. */
     switch (ccw.cmd_code) {
     case CCW_CMD_SET_VQ:
@@ -500,6 +511,11 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
             ret = -ENOSYS;
             break;
         }
+        if (virtio_get_num_queues(vdev) > NR_CLASSIC_INDICATOR_BITS) {
+            /* More queues than indicator bits --> trigger a reject */
+            ret = -ENOSYS;
+            break;
+        }
         if (!ccw.cda) {
             ret = -EFAULT;
         } else {
@@ -549,7 +565,7 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
                                                     ccw.cda,
                                                     MEMTXATTRS_UNSPECIFIED,
                                                     NULL);
-            if (vq_config.index >= VIRTIO_CCW_QUEUE_MAX) {
+            if (vq_config.index >= VIRTIO_QUEUE_MAX) {
                 ret = -EINVAL;
                 break;
             }
@@ -638,7 +654,8 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
          * need to fetch it here. Nothing to do for now, though.
          */
         if (dev->revision >= 0 ||
-            revinfo.revision > virtio_ccw_rev_max(dev)) {
+            revinfo.revision > virtio_ccw_rev_max(dev) ||
+            (dev->force_revision_1 && !revinfo.revision)) {
             ret = -ENOSYS;
             break;
         }
@@ -669,6 +686,12 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp)
     if (!sch) {
         return;
     }
+    if (!virtio_ccw_rev_max(dev) && dev->force_revision_1) {
+        error_setg(&err, "Invalid value of property max_rev "
+                   "(is %d expected >= 1)", virtio_ccw_rev_max(dev));
+        error_propagate(errp, err);
+        return;
+    }
 
     sch->driver_data = dev;
     sch->ccw_cb = virtio_ccw_cb;
@@ -878,6 +901,24 @@ static void virtio_ccw_rng_realize(VirtioCcwDevice *ccw_dev, Error **errp)
                              NULL);
 }
 
+static void virtio_ccw_crypto_realize(VirtioCcwDevice *ccw_dev, Error **errp)
+{
+    VirtIOCryptoCcw *dev = VIRTIO_CRYPTO_CCW(ccw_dev);
+    DeviceState *vdev = DEVICE(&dev->vdev);
+    Error *err = NULL;
+
+    qdev_set_parent_bus(vdev, BUS(&ccw_dev->bus));
+    object_property_set_bool(OBJECT(vdev), true, "realized", &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+
+    object_property_set_link(OBJECT(vdev),
+                             OBJECT(dev->vdev.conf.cryptodev), "cryptodev",
+                             NULL);
+}
+
 /* DeviceState to VirtioCcwDevice. Note: used on datapath,
  * be careful and test performance if you change this.
  */
@@ -919,11 +960,11 @@ static void virtio_ccw_notify(DeviceState *d, uint16_t vector)
     uint64_t indicators;
 
     /* queue indicators + secondary indicators */
-    if (vector >= VIRTIO_CCW_QUEUE_MAX + 64) {
+    if (vector >= VIRTIO_QUEUE_MAX + 64) {
         return;
     }
 
-    if (vector < VIRTIO_CCW_QUEUE_MAX) {
+    if (vector < VIRTIO_QUEUE_MAX) {
         if (!dev->indicators) {
             return;
         }
@@ -1278,15 +1319,22 @@ static void virtio_ccw_device_plugged(DeviceState *d, Error **errp)
     CcwDevice *ccw_dev = CCW_DEVICE(d);
     SubchDev *sch = ccw_dev->sch;
     int n = virtio_get_num_queues(vdev);
+    S390FLICState *flic = s390_get_flic();
 
     if (!virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
         dev->max_rev = 0;
     }
 
-    if (virtio_get_num_queues(vdev) > VIRTIO_CCW_QUEUE_MAX) {
+    if (virtio_get_num_queues(vdev) > VIRTIO_QUEUE_MAX) {
+        error_setg(errp, "The number of virtqueues %d "
+                   "exceeds virtio limit %d", n,
+                   VIRTIO_QUEUE_MAX);
+        return;
+    }
+    if (virtio_get_num_queues(vdev) > flic->adapter_routes_max_batch) {
         error_setg(errp, "The number of virtqueues %d "
-                   "exceeds ccw limit %d", n,
-                   VIRTIO_CCW_QUEUE_MAX);
+                   "exceeds flic adapter route limit %d", n,
+                   flic->adapter_routes_max_batch);
         return;
     }
 
@@ -1518,6 +1566,48 @@ static const TypeInfo virtio_ccw_rng = {
     .class_init    = virtio_ccw_rng_class_init,
 };
 
+static Property virtio_ccw_crypto_properties[] = {
+    DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id),
+    DEFINE_PROP_BIT("ioeventfd", VirtioCcwDevice, flags,
+                    VIRTIO_CCW_FLAG_USE_IOEVENTFD_BIT, true),
+    DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev,
+                       VIRTIO_CCW_MAX_REV),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_ccw_crypto_instance_init(Object *obj)
+{
+    VirtIOCryptoCcw *dev = VIRTIO_CRYPTO_CCW(obj);
+    VirtioCcwDevice *ccw_dev = VIRTIO_CCW_DEVICE(obj);
+
+    ccw_dev->force_revision_1 = true;
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VIRTIO_CRYPTO);
+
+    object_property_add_alias(obj, "cryptodev", OBJECT(&dev->vdev),
+                              "cryptodev", &error_abort);
+}
+
+static void virtio_ccw_crypto_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_CLASS(klass);
+
+    k->realize = virtio_ccw_crypto_realize;
+    k->exit = virtio_ccw_exit;
+    dc->reset = virtio_ccw_reset;
+    dc->props = virtio_ccw_crypto_properties;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+}
+
+static const TypeInfo virtio_ccw_crypto = {
+    .name          = TYPE_VIRTIO_CRYPTO_CCW,
+    .parent        = TYPE_VIRTIO_CCW_DEVICE,
+    .instance_size = sizeof(VirtIOCryptoCcw),
+    .instance_init = virtio_ccw_crypto_instance_init,
+    .class_init    = virtio_ccw_crypto_class_init,
+};
+
 static void virtio_ccw_busdev_realize(DeviceState *dev, Error **errp)
 {
     VirtioCcwDevice *_dev = (VirtioCcwDevice *)dev;
@@ -1720,6 +1810,7 @@ static void virtio_ccw_register(void)
 #ifdef CONFIG_VHOST_VSOCK
     type_register_static(&vhost_vsock_ccw_info);
 #endif
+    type_register_static(&virtio_ccw_crypto);
 }
 
 type_init(virtio_ccw_register)
diff --git a/hw/s390x/virtio-ccw.h b/hw/s390x/virtio-ccw.h
index 77d10f1671..41d4010378 100644
--- a/hw/s390x/virtio-ccw.h
+++ b/hw/s390x/virtio-ccw.h
@@ -22,6 +22,7 @@
 #endif
 #include "hw/virtio/virtio-balloon.h"
 #include "hw/virtio/virtio-rng.h"
+#include "hw/virtio/virtio-crypto.h"
 #include "hw/virtio/virtio-bus.h"
 #ifdef CONFIG_VHOST_VSOCK
 #include "hw/virtio/vhost-vsock.h"
@@ -94,6 +95,7 @@ struct VirtioCcwDevice {
     IndAddr *indicators2;
     IndAddr *summary_indicator;
     uint64_t ind_bit;
+    bool force_revision_1;
 };
 
 /* The maximum virtio revision we support. */
@@ -182,6 +184,17 @@ typedef struct VirtIORNGCcw {
     VirtIORNG vdev;
 } VirtIORNGCcw;
 
+/* virtio-crypto-ccw */
+
+#define TYPE_VIRTIO_CRYPTO_CCW "virtio-crypto-ccw"
+#define VIRTIO_CRYPTO_CCW(obj) \
+        OBJECT_CHECK(VirtIOCryptoCcw, (obj), TYPE_VIRTIO_CRYPTO_CCW)
+
+typedef struct VirtIOCryptoCcw {
+    VirtioCcwDevice parent_obj;
+    VirtIOCrypto vdev;
+} VirtIOCryptoCcw;
+
 VirtIODevice *virtio_ccw_get_vdev(SubchDev *sch);
 
 #ifdef CONFIG_VIRTFS
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index bbfb5dc289..a53f058621 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -2240,7 +2240,7 @@ static void scsi_disk_resize_cb(void *opaque)
     }
 }
 
-static void scsi_cd_change_media_cb(void *opaque, bool load)
+static void scsi_cd_change_media_cb(void *opaque, bool load, Error **errp)
 {
     SCSIDiskState *s = opaque;
 
@@ -2328,7 +2328,13 @@ static void scsi_realize(SCSIDevice *dev, Error **errp)
             return;
         }
     }
-    blkconf_apply_backend_options(&dev->conf);
+    blkconf_apply_backend_options(&dev->conf,
+                                  blk_is_read_only(s->qdev.conf.blk),
+                                  dev->type == TYPE_DISK, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
 
     if (s->qdev.conf.discard_granularity == -1) {
         s->qdev.conf.discard_granularity =
@@ -2380,7 +2386,7 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp)
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
 
     if (!dev->conf.blk) {
-        dev->conf.blk = blk_new();
+        dev->conf.blk = blk_new(0, BLK_PERM_ALL);
     }
 
     s->qdev.blocksize = 2048;
diff --git a/hw/sd/Makefile.objs b/hw/sd/Makefile.objs
index 31c83308f2..c2b7664264 100644
--- a/hw/sd/Makefile.objs
+++ b/hw/sd/Makefile.objs
@@ -6,3 +6,4 @@ common-obj-$(CONFIG_SDHCI) += sdhci.o
 obj-$(CONFIG_MILKYMIST) += milkymist-memcard.o
 obj-$(CONFIG_OMAP) += omap_mmc.o
 obj-$(CONFIG_PXA2XX) += pxa2xx_mmci.o
+obj-$(CONFIG_RASPI) += bcm2835_sdhost.o
diff --git a/hw/sd/bcm2835_sdhost.c b/hw/sd/bcm2835_sdhost.c
new file mode 100644
index 0000000000..f7f4e656df
--- /dev/null
+++ b/hw/sd/bcm2835_sdhost.c
@@ -0,0 +1,429 @@
+/*
+ * Raspberry Pi (BCM2835) SD Host Controller
+ *
+ * Copyright (c) 2017 Antfield SAS
+ *
+ * Authors:
+ *  Clement Deschamps <clement.deschamps@antfield.fr>
+ *  Luc Michel <luc.michel@antfield.fr>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "sysemu/blockdev.h"
+#include "hw/sd/bcm2835_sdhost.h"
+
+#define TYPE_BCM2835_SDHOST_BUS "bcm2835-sdhost-bus"
+#define BCM2835_SDHOST_BUS(obj) \
+    OBJECT_CHECK(SDBus, (obj), TYPE_BCM2835_SDHOST_BUS)
+
+#define SDCMD  0x00 /* Command to SD card              - 16 R/W */
+#define SDARG  0x04 /* Argument to SD card             - 32 R/W */
+#define SDTOUT 0x08 /* Start value for timeout counter - 32 R/W */
+#define SDCDIV 0x0c /* Start value for clock divider   - 11 R/W */
+#define SDRSP0 0x10 /* SD card rsp (31:0)         - 32 R   */
+#define SDRSP1 0x14 /* SD card rsp (63:32)        - 32 R   */
+#define SDRSP2 0x18 /* SD card rsp (95:64)        - 32 R   */
+#define SDRSP3 0x1c /* SD card rsp (127:96)       - 32 R   */
+#define SDHSTS 0x20 /* SD host status                  - 11 R   */
+#define SDVDD  0x30 /* SD card power control           -  1 R/W */
+#define SDEDM  0x34 /* Emergency Debug Mode            - 13 R/W */
+#define SDHCFG 0x38 /* Host configuration              -  2 R/W */
+#define SDHBCT 0x3c /* Host byte count (debug)         - 32 R/W */
+#define SDDATA 0x40 /* Data to/from SD card            - 32 R/W */
+#define SDHBLC 0x50 /* Host block count (SDIO/SDHC)    -  9 R/W */
+
+#define SDCMD_NEW_FLAG                  0x8000
+#define SDCMD_FAIL_FLAG                 0x4000
+#define SDCMD_BUSYWAIT                  0x800
+#define SDCMD_NO_RESPONSE               0x400
+#define SDCMD_LONG_RESPONSE             0x200
+#define SDCMD_WRITE_CMD                 0x80
+#define SDCMD_READ_CMD                  0x40
+#define SDCMD_CMD_MASK                  0x3f
+
+#define SDCDIV_MAX_CDIV                 0x7ff
+
+#define SDHSTS_BUSY_IRPT                0x400
+#define SDHSTS_BLOCK_IRPT               0x200
+#define SDHSTS_SDIO_IRPT                0x100
+#define SDHSTS_REW_TIME_OUT             0x80
+#define SDHSTS_CMD_TIME_OUT             0x40
+#define SDHSTS_CRC16_ERROR              0x20
+#define SDHSTS_CRC7_ERROR               0x10
+#define SDHSTS_FIFO_ERROR               0x08
+/* Reserved */
+/* Reserved */
+#define SDHSTS_DATA_FLAG                0x01
+
+#define SDHCFG_BUSY_IRPT_EN     (1 << 10)
+#define SDHCFG_BLOCK_IRPT_EN    (1 << 8)
+#define SDHCFG_SDIO_IRPT_EN     (1 << 5)
+#define SDHCFG_DATA_IRPT_EN     (1 << 4)
+#define SDHCFG_SLOW_CARD        (1 << 3)
+#define SDHCFG_WIDE_EXT_BUS     (1 << 2)
+#define SDHCFG_WIDE_INT_BUS     (1 << 1)
+#define SDHCFG_REL_CMD_LINE     (1 << 0)
+
+#define SDEDM_FORCE_DATA_MODE   (1 << 19)
+#define SDEDM_CLOCK_PULSE       (1 << 20)
+#define SDEDM_BYPASS            (1 << 21)
+
+#define SDEDM_WRITE_THRESHOLD_SHIFT 9
+#define SDEDM_READ_THRESHOLD_SHIFT 14
+#define SDEDM_THRESHOLD_MASK     0x1f
+
+#define SDEDM_FSM_MASK           0xf
+#define SDEDM_FSM_IDENTMODE      0x0
+#define SDEDM_FSM_DATAMODE       0x1
+#define SDEDM_FSM_READDATA       0x2
+#define SDEDM_FSM_WRITEDATA      0x3
+#define SDEDM_FSM_READWAIT       0x4
+#define SDEDM_FSM_READCRC        0x5
+#define SDEDM_FSM_WRITECRC       0x6
+#define SDEDM_FSM_WRITEWAIT1     0x7
+#define SDEDM_FSM_POWERDOWN      0x8
+#define SDEDM_FSM_POWERUP        0x9
+#define SDEDM_FSM_WRITESTART1    0xa
+#define SDEDM_FSM_WRITESTART2    0xb
+#define SDEDM_FSM_GENPULSES      0xc
+#define SDEDM_FSM_WRITEWAIT2     0xd
+#define SDEDM_FSM_STARTPOWDOWN   0xf
+
+#define SDDATA_FIFO_WORDS        16
+
+static void bcm2835_sdhost_update_irq(BCM2835SDHostState *s)
+{
+    uint32_t irq = s->status &
+        (SDHSTS_BUSY_IRPT | SDHSTS_BLOCK_IRPT | SDHSTS_SDIO_IRPT);
+    qemu_set_irq(s->irq, !!irq);
+}
+
+static void bcm2835_sdhost_send_command(BCM2835SDHostState *s)
+{
+    SDRequest request;
+    uint8_t rsp[16];
+    int rlen;
+
+    request.cmd = s->cmd & SDCMD_CMD_MASK;
+    request.arg = s->cmdarg;
+
+    rlen = sdbus_do_command(&s->sdbus, &request, rsp);
+    if (rlen < 0) {
+        goto error;
+    }
+    if (!(s->cmd & SDCMD_NO_RESPONSE)) {
+#define RWORD(n) (((uint32_t)rsp[n] << 24) | (rsp[n + 1] << 16) \
+                  | (rsp[n + 2] << 8) | rsp[n + 3])
+        if (rlen == 0 || (rlen == 4 && (s->cmd & SDCMD_LONG_RESPONSE))) {
+            goto error;
+        }
+        if (rlen != 4 && rlen != 16) {
+            goto error;
+        }
+        if (rlen == 4) {
+            s->rsp[0] = RWORD(0);
+            s->rsp[1] = s->rsp[2] = s->rsp[3] = 0;
+        } else {
+            s->rsp[0] = RWORD(12);
+            s->rsp[1] = RWORD(8);
+            s->rsp[2] = RWORD(4);
+            s->rsp[3] = RWORD(0);
+        }
+#undef RWORD
+    }
+    return;
+
+error:
+    s->cmd |= SDCMD_FAIL_FLAG;
+    s->status |= SDHSTS_CMD_TIME_OUT;
+}
+
+static void bcm2835_sdhost_fifo_push(BCM2835SDHostState *s, uint32_t value)
+{
+    int n;
+
+    if (s->fifo_len == BCM2835_SDHOST_FIFO_LEN) {
+        /* FIFO overflow */
+        return;
+    }
+    n = (s->fifo_pos + s->fifo_len) & (BCM2835_SDHOST_FIFO_LEN - 1);
+    s->fifo_len++;
+    s->fifo[n] = value;
+}
+
+static uint32_t bcm2835_sdhost_fifo_pop(BCM2835SDHostState *s)
+{
+    uint32_t value;
+
+    if (s->fifo_len == 0) {
+        /* FIFO underflow */
+        return 0;
+    }
+    value = s->fifo[s->fifo_pos];
+    s->fifo_len--;
+    s->fifo_pos = (s->fifo_pos + 1) & (BCM2835_SDHOST_FIFO_LEN - 1);
+    return value;
+}
+
+static void bcm2835_sdhost_fifo_run(BCM2835SDHostState *s)
+{
+    uint32_t value = 0;
+    int n;
+    int is_read;
+
+    is_read = (s->cmd & SDCMD_READ_CMD) != 0;
+    if (s->datacnt != 0 && (!is_read || sdbus_data_ready(&s->sdbus))) {
+        if (is_read) {
+            n = 0;
+            while (s->datacnt && s->fifo_len < BCM2835_SDHOST_FIFO_LEN) {
+                value |= (uint32_t)sdbus_read_data(&s->sdbus) << (n * 8);
+                s->datacnt--;
+                n++;
+                if (n == 4) {
+                    bcm2835_sdhost_fifo_push(s, value);
+                    n = 0;
+                    value = 0;
+                }
+            }
+            if (n != 0) {
+                bcm2835_sdhost_fifo_push(s, value);
+            }
+        } else { /* write */
+            n = 0;
+            while (s->datacnt > 0 && (s->fifo_len > 0 || n > 0)) {
+                if (n == 0) {
+                    value = bcm2835_sdhost_fifo_pop(s);
+                    n = 4;
+                }
+                n--;
+                s->datacnt--;
+                sdbus_write_data(&s->sdbus, value & 0xff);
+                value >>= 8;
+            }
+        }
+    }
+    if (s->datacnt == 0) {
+        s->status |= SDHSTS_DATA_FLAG;
+
+        s->edm &= ~0xf;
+        s->edm |= SDEDM_FSM_DATAMODE;
+
+        if (s->config & SDHCFG_DATA_IRPT_EN) {
+            s->status |= SDHSTS_SDIO_IRPT;
+        }
+
+        if ((s->cmd & SDCMD_BUSYWAIT) && (s->config & SDHCFG_BUSY_IRPT_EN)) {
+            s->status |= SDHSTS_BUSY_IRPT;
+        }
+
+        if ((s->cmd & SDCMD_WRITE_CMD) && (s->config & SDHCFG_BLOCK_IRPT_EN)) {
+            s->status |= SDHSTS_BLOCK_IRPT;
+        }
+
+        bcm2835_sdhost_update_irq(s);
+    }
+
+    s->edm &= ~(0x1f << 4);
+    s->edm |= ((s->fifo_len & 0x1f) << 4);
+}
+
+static uint64_t bcm2835_sdhost_read(void *opaque, hwaddr offset,
+    unsigned size)
+{
+    BCM2835SDHostState *s = (BCM2835SDHostState *)opaque;
+    uint32_t res = 0;
+
+    switch (offset) {
+    case SDCMD:
+        res = s->cmd;
+        break;
+    case SDHSTS:
+        res = s->status;
+        break;
+    case SDRSP0:
+        res = s->rsp[0];
+        break;
+    case SDRSP1:
+        res = s->rsp[1];
+        break;
+    case SDRSP2:
+        res = s->rsp[2];
+        break;
+    case SDRSP3:
+        res = s->rsp[3];
+        break;
+    case SDEDM:
+        res = s->edm;
+        break;
+    case SDVDD:
+        res = s->vdd;
+        break;
+    case SDDATA:
+        res = bcm2835_sdhost_fifo_pop(s);
+        bcm2835_sdhost_fifo_run(s);
+        break;
+    case SDHBCT:
+        res = s->hbct;
+        break;
+    case SDHBLC:
+        res = s->hblc;
+        break;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n",
+                      __func__, offset);
+        res = 0;
+        break;
+    }
+
+    return res;
+}
+
+static void bcm2835_sdhost_write(void *opaque, hwaddr offset,
+    uint64_t value, unsigned size)
+{
+    BCM2835SDHostState *s = (BCM2835SDHostState *)opaque;
+
+    switch (offset) {
+    case SDCMD:
+        s->cmd = value;
+        if (value & SDCMD_NEW_FLAG) {
+            bcm2835_sdhost_send_command(s);
+            bcm2835_sdhost_fifo_run(s);
+            s->cmd &= ~SDCMD_NEW_FLAG;
+        }
+        break;
+    case SDTOUT:
+        break;
+    case SDCDIV:
+        break;
+    case SDHSTS:
+        s->status &= ~value;
+        bcm2835_sdhost_update_irq(s);
+        break;
+    case SDARG:
+        s->cmdarg = value;
+        break;
+    case SDEDM:
+        if ((value & 0xf) == 0xf) {
+            /* power down */
+            value &= ~0xf;
+        }
+        s->edm = value;
+        break;
+    case SDHCFG:
+        s->config = value;
+        bcm2835_sdhost_fifo_run(s);
+        break;
+    case SDVDD:
+        s->vdd = value;
+        break;
+    case SDDATA:
+        bcm2835_sdhost_fifo_push(s, value);
+        bcm2835_sdhost_fifo_run(s);
+        break;
+    case SDHBCT:
+        s->hbct = value;
+        break;
+    case SDHBLC:
+        s->hblc = value;
+        s->datacnt = s->hblc * s->hbct;
+        bcm2835_sdhost_fifo_run(s);
+        break;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %"HWADDR_PRIx"\n",
+                      __func__, offset);
+        break;
+    }
+}
+
+static const MemoryRegionOps bcm2835_sdhost_ops = {
+    .read = bcm2835_sdhost_read,
+    .write = bcm2835_sdhost_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static const VMStateDescription vmstate_bcm2835_sdhost = {
+    .name = TYPE_BCM2835_SDHOST,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(cmd, BCM2835SDHostState),
+        VMSTATE_UINT32(cmdarg, BCM2835SDHostState),
+        VMSTATE_UINT32(status, BCM2835SDHostState),
+        VMSTATE_UINT32_ARRAY(rsp, BCM2835SDHostState, 4),
+        VMSTATE_UINT32(config, BCM2835SDHostState),
+        VMSTATE_UINT32(edm, BCM2835SDHostState),
+        VMSTATE_UINT32(vdd, BCM2835SDHostState),
+        VMSTATE_UINT32(hbct, BCM2835SDHostState),
+        VMSTATE_UINT32(hblc, BCM2835SDHostState),
+        VMSTATE_INT32(fifo_pos, BCM2835SDHostState),
+        VMSTATE_INT32(fifo_len, BCM2835SDHostState),
+        VMSTATE_UINT32_ARRAY(fifo, BCM2835SDHostState, BCM2835_SDHOST_FIFO_LEN),
+        VMSTATE_UINT32(datacnt, BCM2835SDHostState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void bcm2835_sdhost_init(Object *obj)
+{
+    BCM2835SDHostState *s = BCM2835_SDHOST(obj);
+
+    qbus_create_inplace(&s->sdbus, sizeof(s->sdbus),
+                        TYPE_BCM2835_SDHOST_BUS, DEVICE(s), "sd-bus");
+
+    memory_region_init_io(&s->iomem, obj, &bcm2835_sdhost_ops, s,
+                          TYPE_BCM2835_SDHOST, 0x1000);
+    sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem);
+    sysbus_init_irq(SYS_BUS_DEVICE(s), &s->irq);
+}
+
+static void bcm2835_sdhost_reset(DeviceState *dev)
+{
+    BCM2835SDHostState *s = BCM2835_SDHOST(dev);
+
+    s->cmd = 0;
+    s->cmdarg = 0;
+    s->edm = 0x0000c60f;
+    s->config = 0;
+    s->hbct = 0;
+    s->hblc = 0;
+    s->datacnt = 0;
+    s->fifo_pos = 0;
+    s->fifo_len = 0;
+}
+
+static void bcm2835_sdhost_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->reset = bcm2835_sdhost_reset;
+    dc->vmsd = &vmstate_bcm2835_sdhost;
+}
+
+static TypeInfo bcm2835_sdhost_info = {
+    .name          = TYPE_BCM2835_SDHOST,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(BCM2835SDHostState),
+    .class_init    = bcm2835_sdhost_class_init,
+    .instance_init = bcm2835_sdhost_init,
+};
+
+static const TypeInfo bcm2835_sdhost_bus_info = {
+    .name = TYPE_BCM2835_SDHOST_BUS,
+    .parent = TYPE_SD_BUS,
+    .instance_size = sizeof(SDBus),
+};
+
+static void bcm2835_sdhost_register_types(void)
+{
+    type_register_static(&bcm2835_sdhost_info);
+    type_register_static(&bcm2835_sdhost_bus_info);
+}
+
+type_init(bcm2835_sdhost_register_types)
diff --git a/hw/sd/core.c b/hw/sd/core.c
index 14c2bdf27b..295dc44ab7 100644
--- a/hw/sd/core.c
+++ b/hw/sd/core.c
@@ -131,6 +131,33 @@ void sdbus_set_readonly(SDBus *sdbus, bool readonly)
     }
 }
 
+void sdbus_reparent_card(SDBus *from, SDBus *to)
+{
+    SDState *card = get_card(from);
+    SDCardClass *sc;
+    bool readonly;
+
+    /* We directly reparent the card object rather than implementing this
+     * as a hotpluggable connection because we don't want to expose SD cards
+     * to users as being hotpluggable, and we can get away with it in this
+     * limited use case. This could perhaps be implemented more cleanly in
+     * future by adding support to the hotplug infrastructure for "device
+     * can be hotplugged only via code, not by user".
+     */
+
+    if (!card) {
+        return;
+    }
+
+    sc = SD_CARD_GET_CLASS(card);
+    readonly = sc->get_readonly(card);
+
+    sdbus_set_inserted(from, false);
+    qdev_set_parent_bus(DEVICE(card), &to->qbus);
+    sdbus_set_inserted(to, true);
+    sdbus_set_readonly(to, readonly);
+}
+
 static const TypeInfo sd_bus_info = {
     .name = TYPE_SD_BUS,
     .parent = TYPE_BUS,
diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index 8e88e8311a..ba47bff4db 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -458,7 +458,7 @@ static bool sd_get_readonly(SDState *sd)
     return sd->wp_switch;
 }
 
-static void sd_cardchange(void *opaque, bool load)
+static void sd_cardchange(void *opaque, bool load, Error **errp)
 {
     SDState *sd = opaque;
     DeviceState *dev = DEVICE(sd);
@@ -1887,6 +1887,7 @@ static void sd_instance_finalize(Object *obj)
 static void sd_realize(DeviceState *dev, Error **errp)
 {
     SDState *sd = SD_CARD(dev);
+    int ret;
 
     if (sd->blk && blk_is_read_only(sd->blk)) {
         error_setg(errp, "Cannot use read-only drive as SD card");
@@ -1894,6 +1895,11 @@ static void sd_realize(DeviceState *dev, Error **errp)
     }
 
     if (sd->blk) {
+        ret = blk_set_perm(sd->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE,
+                           BLK_PERM_ALL, errp);
+        if (ret < 0) {
+            return;
+        }
         blk_set_dev_ops(sd->blk, &sd_block_ops, sd);
     }
 }
diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c
index da32b5f709..6d6a791ee9 100644
--- a/hw/sd/sdhci.c
+++ b/hw/sd/sdhci.c
@@ -119,6 +119,7 @@
     (SDHC_CAPAB_BASECLKFREQ << 8) | (SDHC_CAPAB_TOUNIT << 7) | \
     (SDHC_CAPAB_TOCLKFREQ))
 
+#define MASK_TRNMOD     0x0037
 #define MASKED_WRITE(reg, mask, val)  (reg = (reg & (mask)) | (val))
 
 static uint8_t sdhci_slotint(SDHCIState *s)
@@ -486,6 +487,11 @@ static void sdhci_sdma_transfer_multi_blocks(SDHCIState *s)
     uint32_t boundary_chk = 1 << (((s->blksize & 0xf000) >> 12) + 12);
     uint32_t boundary_count = boundary_chk - (s->sdmasysad % boundary_chk);
 
+    if (!(s->trnmod & SDHC_TRNS_BLK_CNT_EN) || !s->blkcnt) {
+        qemu_log_mask(LOG_UNIMP, "infinite transfer is not supported\n");
+        return;
+    }
+
     /* XXX: Some sd/mmc drivers (for example, u-boot-slp) do not account for
      * possible stop at page boundary if initial address is not page aligned,
      * allow them to work properly */
@@ -564,7 +570,6 @@ static void sdhci_sdma_transfer_multi_blocks(SDHCIState *s)
 }
 
 /* single block SDMA transfer */
-
 static void sdhci_sdma_transfer_single_block(SDHCIState *s)
 {
     int n;
@@ -583,10 +588,7 @@ static void sdhci_sdma_transfer_single_block(SDHCIState *s)
             sdbus_write_data(&s->sdbus, s->fifo_buffer[n]);
         }
     }
-
-    if (s->trnmod & SDHC_TRNS_BLK_CNT_EN) {
-        s->blkcnt--;
-    }
+    s->blkcnt--;
 
     sdhci_end_transfer(s);
 }
@@ -797,11 +799,6 @@ static void sdhci_data_transfer(void *opaque)
     if (s->trnmod & SDHC_TRNS_DMA) {
         switch (SDHC_DMA_TYPE(s->hostctl)) {
         case SDHC_CTRL_SDMA:
-            if ((s->trnmod & SDHC_TRNS_MULTI) &&
-                    (!(s->trnmod & SDHC_TRNS_BLK_CNT_EN) || s->blkcnt == 0)) {
-                break;
-            }
-
             if ((s->blkcnt == 1) || !(s->trnmod & SDHC_TRNS_MULTI)) {
                 sdhci_sdma_transfer_single_block(s);
             } else {
@@ -1022,7 +1019,11 @@ sdhci_write(void *opaque, hwaddr offset, uint64_t val, unsigned size)
         /* Writing to last byte of sdmasysad might trigger transfer */
         if (!(mask & 0xFF000000) && TRANSFERRING_DATA(s->prnsts) && s->blkcnt &&
                 s->blksize && SDHC_DMA_TYPE(s->hostctl) == SDHC_CTRL_SDMA) {
-            sdhci_sdma_transfer_multi_blocks(s);
+            if (s->trnmod & SDHC_TRNS_MULTI) {
+                sdhci_sdma_transfer_multi_blocks(s);
+            } else {
+                sdhci_sdma_transfer_single_block(s);
+            }
         }
         break;
     case SDHC_BLKSIZE:
@@ -1050,7 +1051,7 @@ sdhci_write(void *opaque, hwaddr offset, uint64_t val, unsigned size)
         if (!(s->capareg & SDHC_CAN_DO_DMA)) {
             value &= ~SDHC_TRNS_DMA;
         }
-        MASKED_WRITE(s->trnmod, mask, value);
+        MASKED_WRITE(s->trnmod, mask, value & MASK_TRNMOD);
         MASKED_WRITE(s->cmdreg, mask >> 16, value >> 16);
 
         /* Writing to the upper byte of CMDREG triggers SD command generation */
diff --git a/hw/sparc64/niagara.c b/hw/sparc64/niagara.c
index b55d4bb8d3..9a8d6109d4 100644
--- a/hw/sparc64/niagara.c
+++ b/hw/sparc64/niagara.c
@@ -35,6 +35,8 @@
 #include "hw/timer/sun4v-rtc.h"
 #include "exec/address-spaces.h"
 #include "sysemu/block-backend.h"
+#include "qemu/error-report.h"
+#include "sysemu/qtest.h"
 
 
 typedef struct NiagaraBoardState {
@@ -85,6 +87,17 @@ typedef struct NiagaraBoardState {
 #define NIAGARA_OBP_OFFSET  0x80000ULL
 #define PROM_SIZE_MAX       (4 * 1024 * 1024)
 
+static void add_rom_or_fail(const char *file, const hwaddr addr)
+{
+    /* XXX remove qtest_enabled() check once firmware files are
+     * in the qemu tree
+     */
+    if (!qtest_enabled() && rom_add_file_fixed(file, addr, -1)) {
+        error_report("Unable to load a firmware for -M niagara");
+        exit(1);
+    }
+
+}
 /* Niagara hardware initialisation */
 static void niagara_init(MachineState *machine)
 {
@@ -119,14 +132,13 @@ static void niagara_init(MachineState *machine)
                                          "sun4v.prom", PROM_SIZE_MAX);
     memory_region_add_subregion(sysmem, NIAGARA_PROM_BASE, &s->prom);
 
-    rom_add_file_fixed("nvram1", NIAGARA_NVRAM_BASE, -1);
-    rom_add_file_fixed("1up-md.bin", NIAGARA_MD_ROM_BASE, -1);
-    rom_add_file_fixed("1up-hv.bin", NIAGARA_HV_ROM_BASE, -1);
+    add_rom_or_fail("nvram1", NIAGARA_NVRAM_BASE);
+    add_rom_or_fail("1up-md.bin", NIAGARA_MD_ROM_BASE);
+    add_rom_or_fail("1up-hv.bin", NIAGARA_HV_ROM_BASE);
 
-    rom_add_file_fixed("reset.bin", NIAGARA_PROM_BASE, -1);
-    rom_add_file_fixed("q.bin", NIAGARA_PROM_BASE + NIAGARA_Q_OFFSET, -1);
-    rom_add_file_fixed("openboot.bin", NIAGARA_PROM_BASE + NIAGARA_OBP_OFFSET,
-                       -1);
+    add_rom_or_fail("reset.bin", NIAGARA_PROM_BASE);
+    add_rom_or_fail("q.bin", NIAGARA_PROM_BASE + NIAGARA_Q_OFFSET);
+    add_rom_or_fail("openboot.bin", NIAGARA_PROM_BASE + NIAGARA_OBP_OFFSET);
 
     /* the virtual ramdisk is kind of initrd, but it resides
        outside of the partition RAM */
@@ -146,9 +158,10 @@ static void niagara_init(MachineState *machine)
             exit(1);
         }
     }
-    serial_mm_init(sysmem, NIAGARA_UART_BASE, 0, NULL, 115200,
-                   serial_hds[0], DEVICE_BIG_ENDIAN);
-
+    if (serial_hds[0]) {
+        serial_mm_init(sysmem, NIAGARA_UART_BASE, 0, NULL, 115200,
+                       serial_hds[0], DEVICE_BIG_ENDIAN);
+    }
     empty_slot_init(NIAGARA_IOBBASE, NIAGARA_IOBSIZE);
     sun4v_rtc_init(NIAGARA_RTC_BASE);
 }
diff --git a/hw/timer/Makefile.objs b/hw/timer/Makefile.objs
index fc9966880f..dd6f27e2a3 100644
--- a/hw/timer/Makefile.objs
+++ b/hw/timer/Makefile.objs
@@ -1,5 +1,6 @@
 common-obj-$(CONFIG_ARM_TIMER) += arm_timer.o
 common-obj-$(CONFIG_ARM_MPTIMER) += arm_mptimer.o
+common-obj-$(CONFIG_ARM_V7M) += armv7m_systick.o
 common-obj-$(CONFIG_A9_GTIMER) += a9gtimer.o
 common-obj-$(CONFIG_CADENCE) += cadence_ttc.o
 common-obj-$(CONFIG_DS1338) += ds1338.o
diff --git a/hw/timer/armv7m_systick.c b/hw/timer/armv7m_systick.c
new file mode 100644
index 0000000000..df8d2804b3
--- /dev/null
+++ b/hw/timer/armv7m_systick.c
@@ -0,0 +1,240 @@
+/*
+ * ARMv7M SysTick timer
+ *
+ * Copyright (c) 2006-2007 CodeSourcery.
+ * Written by Paul Brook
+ * Copyright (c) 2017 Linaro Ltd
+ * Written by Peter Maydell
+ *
+ * This code is licensed under the GPL (version 2 or later).
+ */
+
+#include "qemu/osdep.h"
+#include "hw/timer/armv7m_systick.h"
+#include "qemu-common.h"
+#include "hw/sysbus.h"
+#include "qemu/timer.h"
+#include "qemu/log.h"
+#include "trace.h"
+
+/* qemu timers run at 1GHz.   We want something closer to 1MHz.  */
+#define SYSTICK_SCALE 1000ULL
+
+#define SYSTICK_ENABLE    (1 << 0)
+#define SYSTICK_TICKINT   (1 << 1)
+#define SYSTICK_CLKSOURCE (1 << 2)
+#define SYSTICK_COUNTFLAG (1 << 16)
+
+int system_clock_scale;
+
+/* Conversion factor from qemu timer to SysTick frequencies.  */
+static inline int64_t systick_scale(SysTickState *s)
+{
+    if (s->control & SYSTICK_CLKSOURCE) {
+        return system_clock_scale;
+    } else {
+        return 1000;
+    }
+}
+
+static void systick_reload(SysTickState *s, int reset)
+{
+    /* The Cortex-M3 Devices Generic User Guide says that "When the
+     * ENABLE bit is set to 1, the counter loads the RELOAD value from the
+     * SYST RVR register and then counts down". So, we need to check the
+     * ENABLE bit before reloading the value.
+     */
+    trace_systick_reload();
+
+    if ((s->control & SYSTICK_ENABLE) == 0) {
+        return;
+    }
+
+    if (reset) {
+        s->tick = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    }
+    s->tick += (s->reload + 1) * systick_scale(s);
+    timer_mod(s->timer, s->tick);
+}
+
+static void systick_timer_tick(void *opaque)
+{
+    SysTickState *s = (SysTickState *)opaque;
+
+    trace_systick_timer_tick();
+
+    s->control |= SYSTICK_COUNTFLAG;
+    if (s->control & SYSTICK_TICKINT) {
+        /* Tell the NVIC to pend the SysTick exception */
+        qemu_irq_pulse(s->irq);
+    }
+    if (s->reload == 0) {
+        s->control &= ~SYSTICK_ENABLE;
+    } else {
+        systick_reload(s, 0);
+    }
+}
+
+static uint64_t systick_read(void *opaque, hwaddr addr, unsigned size)
+{
+    SysTickState *s = opaque;
+    uint32_t val;
+
+    switch (addr) {
+    case 0x0: /* SysTick Control and Status.  */
+        val = s->control;
+        s->control &= ~SYSTICK_COUNTFLAG;
+        break;
+    case 0x4: /* SysTick Reload Value.  */
+        val = s->reload;
+        break;
+    case 0x8: /* SysTick Current Value.  */
+    {
+        int64_t t;
+
+        if ((s->control & SYSTICK_ENABLE) == 0) {
+            val = 0;
+            break;
+        }
+        t = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+        if (t >= s->tick) {
+            val = 0;
+            break;
+        }
+        val = ((s->tick - (t + 1)) / systick_scale(s)) + 1;
+        /* The interrupt in triggered when the timer reaches zero.
+           However the counter is not reloaded until the next clock
+           tick.  This is a hack to return zero during the first tick.  */
+        if (val > s->reload) {
+            val = 0;
+        }
+        break;
+    }
+    case 0xc: /* SysTick Calibration Value.  */
+        val = 10000;
+        break;
+    default:
+        val = 0;
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "SysTick: Bad read offset 0x%" HWADDR_PRIx "\n", addr);
+        break;
+    }
+
+    trace_systick_read(addr, val, size);
+    return val;
+}
+
+static void systick_write(void *opaque, hwaddr addr,
+                          uint64_t value, unsigned size)
+{
+    SysTickState *s = opaque;
+
+    trace_systick_write(addr, value, size);
+
+    switch (addr) {
+    case 0x0: /* SysTick Control and Status.  */
+    {
+        uint32_t oldval = s->control;
+
+        s->control &= 0xfffffff8;
+        s->control |= value & 7;
+        if ((oldval ^ value) & SYSTICK_ENABLE) {
+            int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+            if (value & SYSTICK_ENABLE) {
+                if (s->tick) {
+                    s->tick += now;
+                    timer_mod(s->timer, s->tick);
+                } else {
+                    systick_reload(s, 1);
+                }
+            } else {
+                timer_del(s->timer);
+                s->tick -= now;
+                if (s->tick < 0) {
+                    s->tick = 0;
+                }
+            }
+        } else if ((oldval ^ value) & SYSTICK_CLKSOURCE) {
+            /* This is a hack. Force the timer to be reloaded
+               when the reference clock is changed.  */
+            systick_reload(s, 1);
+        }
+        break;
+    }
+    case 0x4: /* SysTick Reload Value.  */
+        s->reload = value;
+        break;
+    case 0x8: /* SysTick Current Value.  Writes reload the timer.  */
+        systick_reload(s, 1);
+        s->control &= ~SYSTICK_COUNTFLAG;
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "SysTick: Bad write offset 0x%" HWADDR_PRIx "\n", addr);
+    }
+}
+
+static const MemoryRegionOps systick_ops = {
+    .read = systick_read,
+    .write = systick_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid.min_access_size = 4,
+    .valid.max_access_size = 4,
+};
+
+static void systick_reset(DeviceState *dev)
+{
+    SysTickState *s = SYSTICK(dev);
+
+    s->control = 0;
+    s->reload = 0;
+    s->tick = 0;
+    timer_del(s->timer);
+}
+
+static void systick_instance_init(Object *obj)
+{
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+    SysTickState *s = SYSTICK(obj);
+
+    memory_region_init_io(&s->iomem, obj, &systick_ops, s, "systick", 0xe0);
+    sysbus_init_mmio(sbd, &s->iomem);
+    sysbus_init_irq(sbd, &s->irq);
+    s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, systick_timer_tick, s);
+}
+
+static const VMStateDescription vmstate_systick = {
+    .name = "armv7m_systick",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(control, SysTickState),
+        VMSTATE_UINT32(reload, SysTickState),
+        VMSTATE_INT64(tick, SysTickState),
+        VMSTATE_TIMER_PTR(timer, SysTickState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void systick_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->vmsd = &vmstate_systick;
+    dc->reset = systick_reset;
+}
+
+static const TypeInfo armv7m_systick_info = {
+    .name = TYPE_SYSTICK,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_init = systick_instance_init,
+    .instance_size = sizeof(SysTickState),
+    .class_init = systick_class_init,
+};
+
+static void armv7m_systick_register_types(void)
+{
+    type_register_static(&armv7m_systick_info);
+}
+
+type_init(armv7m_systick_register_types)
diff --git a/hw/timer/imx_gpt.c b/hw/timer/imx_gpt.c
index 010ccbf207..4b9b54bf2e 100644
--- a/hw/timer/imx_gpt.c
+++ b/hw/timer/imx_gpt.c
@@ -296,18 +296,23 @@ static uint64_t imx_gpt_read(void *opaque, hwaddr offset, unsigned size)
     return reg_value;
 }
 
-static void imx_gpt_reset(DeviceState *dev)
-{
-    IMXGPTState *s = IMX_GPT(dev);
 
+static void imx_gpt_reset_common(IMXGPTState *s, bool is_soft_reset)
+{
     /* stop timer */
     ptimer_stop(s->timer);
 
-    /*
-     * Soft reset doesn't touch some bits; hard reset clears them
+    /* Soft reset and hard reset differ only in their handling of the CR
+     * register -- soft reset preserves the values of some bits there.
      */
-    s->cr &= ~(GPT_CR_EN|GPT_CR_ENMOD|GPT_CR_STOPEN|GPT_CR_DOZEN|
-               GPT_CR_WAITEN|GPT_CR_DBGEN);
+    if (is_soft_reset) {
+        /* Clear all CR bits except those that are preserved by soft reset. */
+        s->cr &= GPT_CR_EN | GPT_CR_ENMOD | GPT_CR_STOPEN | GPT_CR_DOZEN |
+            GPT_CR_WAITEN | GPT_CR_DBGEN |
+            (GPT_CR_CLKSRC_MASK << GPT_CR_CLKSRC_SHIFT);
+    } else {
+        s->cr = 0;
+    }
     s->sr = 0;
     s->pr = 0;
     s->ir = 0;
@@ -333,6 +338,18 @@ static void imx_gpt_reset(DeviceState *dev)
     }
 }
 
+static void imx_gpt_soft_reset(DeviceState *dev)
+{
+    IMXGPTState *s = IMX_GPT(dev);
+    imx_gpt_reset_common(s, true);
+}
+
+static void imx_gpt_reset(DeviceState *dev)
+{
+    IMXGPTState *s = IMX_GPT(dev);
+    imx_gpt_reset_common(s, false);
+}
+
 static void imx_gpt_write(void *opaque, hwaddr offset, uint64_t value,
                           unsigned size)
 {
@@ -348,7 +365,7 @@ static void imx_gpt_write(void *opaque, hwaddr offset, uint64_t value,
         s->cr = value & ~0x7c14;
         if (s->cr & GPT_CR_SWR) { /* force reset */
             /* handle the reset */
-            imx_gpt_reset(DEVICE(s));
+            imx_gpt_soft_reset(DEVICE(s));
         } else {
             /* set our freq, as the source might have changed */
             imx_gpt_set_freq(s);
diff --git a/hw/timer/mips_gictimer.c b/hw/timer/mips_gictimer.c
index 3698889475..f5c5806724 100644
--- a/hw/timer/mips_gictimer.c
+++ b/hw/timer/mips_gictimer.c
@@ -14,6 +14,11 @@
 
 #define TIMER_PERIOD 10 /* 10 ns period for 100 Mhz frequency */
 
+uint32_t mips_gictimer_get_freq(MIPSGICTimerState *gic)
+{
+    return NANOSECONDS_PER_SECOND / TIMER_PERIOD;
+}
+
 static void gic_vptimer_update(MIPSGICTimerState *gictimer,
                                    uint32_t vp_index, uint64_t now)
 {
diff --git a/hw/timer/trace-events b/hw/timer/trace-events
index 3495c41c18..d17cfe6b39 100644
--- a/hw/timer/trace-events
+++ b/hw/timer/trace-events
@@ -49,3 +49,9 @@ aspeed_timer_ctrl_pulse_enable(uint8_t i, bool enable) "Timer %" PRIu8 ": %d"
 aspeed_timer_set_ctrl2(uint32_t value) "Value: 0x%" PRIx32
 aspeed_timer_set_value(int timer, int reg, uint32_t value) "Timer %d register %d: 0x%" PRIx32
 aspeed_timer_read(uint64_t offset, unsigned size, uint64_t value) "From 0x%" PRIx64 ": of size %u: 0x%" PRIx64
+
+# hw/timer/armv7m_systick.c
+systick_reload(void) "systick reload"
+systick_timer_tick(void) "systick reload"
+systick_read(uint64_t addr, uint32_t value, unsigned size) "systick read addr 0x%" PRIx64 " data 0x%" PRIx32 " size %u"
+systick_write(uint64_t addr, uint32_t value, unsigned size) "systick write addr 0x%" PRIx64 " data 0x%" PRIx32 " size %u"
diff --git a/hw/usb/bus.c b/hw/usb/bus.c
index 1dcc35c8f8..24f1608b4b 100644
--- a/hw/usb/bus.c
+++ b/hw/usb/bus.c
@@ -8,7 +8,6 @@
 #include "monitor/monitor.h"
 #include "trace.h"
 #include "qemu/cutils.h"
-#include "migration/migration.h"
 
 static void usb_bus_dev_print(Monitor *mon, DeviceState *qdev, int indent);
 
@@ -136,11 +135,12 @@ USBDevice *usb_device_find_device(USBDevice *dev, uint8_t addr)
     return NULL;
 }
 
-static void usb_device_handle_destroy(USBDevice *dev)
+static void usb_device_unrealize(USBDevice *dev, Error **errp)
 {
     USBDeviceClass *klass = USB_DEVICE_GET_CLASS(dev);
-    if (klass->handle_destroy) {
-        klass->handle_destroy(dev);
+
+    if (klass->unrealize) {
+        klass->unrealize(dev, errp);
     }
 }
 
@@ -291,7 +291,7 @@ static void usb_qdev_unrealize(DeviceState *qdev, Error **errp)
     if (dev->attached) {
         usb_device_detach(dev);
     }
-    usb_device_handle_destroy(dev);
+    usb_device_unrealize(dev, errp);
     if (dev->port) {
         usb_release_port(dev);
     }
@@ -687,8 +687,6 @@ USBDevice *usbdevice_create(const char *cmdline)
     const char *params;
     int len;
     USBDevice *dev;
-    ObjectClass *klass;
-    DeviceClass *dc;
 
     params = strchr(cmdline,':');
     if (params) {
@@ -723,22 +721,6 @@ USBDevice *usbdevice_create(const char *cmdline)
         return NULL;
     }
 
-    klass = object_class_by_name(f->name);
-    if (klass == NULL) {
-        error_report("Device '%s' not found", f->name);
-        return NULL;
-    }
-
-    dc = DEVICE_CLASS(klass);
-
-    if (only_migratable) {
-        if (dc->vmsd->unmigratable) {
-            error_report("Device %s is not migratable, but --only-migratable "
-                         "was specified", f->name);
-            return NULL;
-        }
-    }
-
     if (f->usbdevice_init) {
         dev = f->usbdevice_init(bus, params);
     } else {
diff --git a/hw/usb/dev-audio.c b/hw/usb/dev-audio.c
index 87cab0a3d1..343345235c 100644
--- a/hw/usb/dev-audio.c
+++ b/hw/usb/dev-audio.c
@@ -617,7 +617,7 @@ static void usb_audio_handle_data(USBDevice *dev, USBPacket *p)
     }
 }
 
-static void usb_audio_handle_destroy(USBDevice *dev)
+static void usb_audio_unrealize(USBDevice *dev, Error **errp)
 {
     USBAudioState *s = USB_AUDIO(dev);
 
@@ -683,7 +683,7 @@ static void usb_audio_class_init(ObjectClass *klass, void *data)
     k->handle_reset   = usb_audio_handle_reset;
     k->handle_control = usb_audio_handle_control;
     k->handle_data    = usb_audio_handle_data;
-    k->handle_destroy = usb_audio_handle_destroy;
+    k->unrealize      = usb_audio_unrealize;
     k->set_interface  = usb_audio_set_interface;
 }
 
diff --git a/hw/usb/dev-bluetooth.c b/hw/usb/dev-bluetooth.c
index 91a4a0b8b9..443e3c301d 100644
--- a/hw/usb/dev-bluetooth.c
+++ b/hw/usb/dev-bluetooth.c
@@ -496,7 +496,7 @@ static void usb_bt_out_hci_packet_acl(void *opaque,
     usb_bt_fifo_enqueue(&s->acl, data, len);
 }
 
-static void usb_bt_handle_destroy(USBDevice *dev)
+static void usb_bt_unrealize(USBDevice *dev, Error **errp)
 {
     struct USBBtState *s = (struct USBBtState *) dev->opaque;
 
@@ -559,7 +559,7 @@ static void usb_bt_class_initfn(ObjectClass *klass, void *data)
     uc->handle_reset   = usb_bt_handle_reset;
     uc->handle_control = usb_bt_handle_control;
     uc->handle_data    = usb_bt_handle_data;
-    uc->handle_destroy = usb_bt_handle_destroy;
+    uc->unrealize      = usb_bt_unrealize;
     dc->vmsd = &vmstate_usb_bt;
     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
 }
diff --git a/hw/usb/dev-hid.c b/hw/usb/dev-hid.c
index dda0bf0df0..c40019df96 100644
--- a/hw/usb/dev-hid.c
+++ b/hw/usb/dev-hid.c
@@ -690,7 +690,7 @@ static void usb_hid_handle_data(USBDevice *dev, USBPacket *p)
     }
 }
 
-static void usb_hid_handle_destroy(USBDevice *dev)
+static void usb_hid_unrealize(USBDevice *dev, Error **errp)
 {
     USBHIDState *us = USB_HID(dev);
 
@@ -785,7 +785,7 @@ static void usb_hid_class_initfn(ObjectClass *klass, void *data)
     uc->handle_reset   = usb_hid_handle_reset;
     uc->handle_control = usb_hid_handle_control;
     uc->handle_data    = usb_hid_handle_data;
-    uc->handle_destroy = usb_hid_handle_destroy;
+    uc->unrealize      = usb_hid_unrealize;
     uc->handle_attach  = usb_desc_attach;
 }
 
diff --git a/hw/usb/dev-hub.c b/hw/usb/dev-hub.c
index a33f21cb38..9fe7333946 100644
--- a/hw/usb/dev-hub.c
+++ b/hw/usb/dev-hub.c
@@ -497,7 +497,7 @@ static void usb_hub_handle_data(USBDevice *dev, USBPacket *p)
     }
 }
 
-static void usb_hub_handle_destroy(USBDevice *dev)
+static void usb_hub_unrealize(USBDevice *dev, Error **errp)
 {
     USBHubState *s = (USBHubState *)dev;
     int i;
@@ -575,7 +575,7 @@ static void usb_hub_class_initfn(ObjectClass *klass, void *data)
     uc->handle_reset   = usb_hub_handle_reset;
     uc->handle_control = usb_hub_handle_control;
     uc->handle_data    = usb_hub_handle_data;
-    uc->handle_destroy = usb_hub_handle_destroy;
+    uc->unrealize      = usb_hub_unrealize;
     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
     dc->fw_name = "hub";
     dc->vmsd = &vmstate_usb_hub;
diff --git a/hw/usb/dev-network.c b/hw/usb/dev-network.c
index c0f1193ba9..85fc81bf43 100644
--- a/hw/usb/dev-network.c
+++ b/hw/usb/dev-network.c
@@ -1324,7 +1324,7 @@ static void usbnet_cleanup(NetClientState *nc)
     s->nic = NULL;
 }
 
-static void usb_net_handle_destroy(USBDevice *dev)
+static void usb_net_unrealize(USBDevice *dev, Error **errp)
 {
     USBNetState *s = (USBNetState *) dev;
 
@@ -1428,7 +1428,7 @@ static void usb_net_class_initfn(ObjectClass *klass, void *data)
     uc->handle_reset   = usb_net_handle_reset;
     uc->handle_control = usb_net_handle_control;
     uc->handle_data    = usb_net_handle_data;
-    uc->handle_destroy = usb_net_handle_destroy;
+    uc->unrealize      = usb_net_unrealize;
     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
     dc->fw_name = "network";
     dc->vmsd = &vmstate_usb_net;
diff --git a/hw/usb/dev-smartcard-reader.c b/hw/usb/dev-smartcard-reader.c
index 7cd4ed0d17..757b8b3f5a 100644
--- a/hw/usb/dev-smartcard-reader.c
+++ b/hw/usb/dev-smartcard-reader.c
@@ -1163,7 +1163,7 @@ static void ccid_handle_data(USBDevice *dev, USBPacket *p)
     }
 }
 
-static void ccid_handle_destroy(USBDevice *dev)
+static void ccid_unrealize(USBDevice *dev, Error **errp)
 {
     USBCCIDState *s = USB_CCID_DEV(dev);
 
@@ -1470,7 +1470,7 @@ static void ccid_class_initfn(ObjectClass *klass, void *data)
     uc->handle_reset   = ccid_handle_reset;
     uc->handle_control = ccid_handle_control;
     uc->handle_data    = ccid_handle_data;
-    uc->handle_destroy = ccid_handle_destroy;
+    uc->unrealize      = ccid_unrealize;
     dc->desc = "CCID Rev 1.1 smartcard reader";
     dc->vmsd = &ccid_vmstate;
     dc->props = ccid_properties;
diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c
index c607f7606d..8a61ec94c8 100644
--- a/hw/usb/dev-storage.c
+++ b/hw/usb/dev-storage.c
@@ -589,6 +589,13 @@ static const struct SCSIBusInfo usb_msd_scsi_info_bot = {
     .load_request = usb_msd_load_request,
 };
 
+static void usb_msd_unrealize_storage(USBDevice *dev, Error **errp)
+{
+    MSDState *s = USB_STORAGE_DEV(dev);
+
+    object_unref(OBJECT(&s->bus));
+}
+
 static void usb_msd_realize_storage(USBDevice *dev, Error **errp)
 {
     MSDState *s = USB_STORAGE_DEV(dev);
@@ -603,7 +610,11 @@ static void usb_msd_realize_storage(USBDevice *dev, Error **errp)
 
     blkconf_serial(&s->conf, &dev->serial);
     blkconf_blocksizes(&s->conf);
-    blkconf_apply_backend_options(&s->conf);
+    blkconf_apply_backend_options(&s->conf, blk_is_read_only(blk), true, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
 
     /*
      * Hack alert: this pretends to be a block device, but it's really
@@ -635,6 +646,13 @@ static void usb_msd_realize_storage(USBDevice *dev, Error **errp)
     s->scsi_dev = scsi_dev;
 }
 
+static void usb_msd_unrealize_bot(USBDevice *dev, Error **errp)
+{
+    MSDState *s = USB_STORAGE_DEV(dev);
+
+    object_unref(OBJECT(&s->bus));
+}
+
 static void usb_msd_realize_bot(USBDevice *dev, Error **errp)
 {
     MSDState *s = USB_STORAGE_DEV(dev);
@@ -755,6 +773,7 @@ static void usb_msd_class_initfn_storage(ObjectClass *klass, void *data)
     USBDeviceClass *uc = USB_DEVICE_CLASS(klass);
 
     uc->realize = usb_msd_realize_storage;
+    uc->unrealize = usb_msd_unrealize_storage;
     dc->props = msd_properties;
 }
 
@@ -817,6 +836,7 @@ static void usb_msd_class_initfn_bot(ObjectClass *klass, void *data)
     USBDeviceClass *uc = USB_DEVICE_CLASS(klass);
 
     uc->realize = usb_msd_realize_bot;
+    uc->unrealize = usb_msd_unrealize_bot;
     uc->attached_settable = true;
 }
 
diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c
index da2fb7017e..fffc424396 100644
--- a/hw/usb/dev-uas.c
+++ b/hw/usb/dev-uas.c
@@ -891,11 +891,13 @@ static void usb_uas_handle_data(USBDevice *dev, USBPacket *p)
     }
 }
 
-static void usb_uas_handle_destroy(USBDevice *dev)
+static void usb_uas_unrealize(USBDevice *dev, Error **errp)
 {
     UASDevice *uas = USB_UAS(dev);
 
     qemu_bh_delete(uas->status_bh);
+
+    object_unref(OBJECT(&uas->bus));
 }
 
 static void usb_uas_realize(USBDevice *dev, Error **errp)
@@ -944,7 +946,7 @@ static void usb_uas_class_initfn(ObjectClass *klass, void *data)
     uc->handle_reset   = usb_uas_handle_reset;
     uc->handle_control = usb_uas_handle_control;
     uc->handle_data    = usb_uas_handle_data;
-    uc->handle_destroy = usb_uas_handle_destroy;
+    uc->unrealize      = usb_uas_unrealize;
     uc->attached_settable = true;
     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
     dc->fw_name = "storage";
diff --git a/hw/usb/dev-wacom.c b/hw/usb/dev-wacom.c
index c4702dbba0..bf70013059 100644
--- a/hw/usb/dev-wacom.c
+++ b/hw/usb/dev-wacom.c
@@ -329,7 +329,7 @@ static void usb_wacom_handle_data(USBDevice *dev, USBPacket *p)
     }
 }
 
-static void usb_wacom_handle_destroy(USBDevice *dev)
+static void usb_wacom_unrealize(USBDevice *dev, Error **errp)
 {
     USBWacomState *s = (USBWacomState *) dev;
 
@@ -364,7 +364,7 @@ static void usb_wacom_class_init(ObjectClass *klass, void *data)
     uc->handle_reset   = usb_wacom_handle_reset;
     uc->handle_control = usb_wacom_handle_control;
     uc->handle_data    = usb_wacom_handle_data;
-    uc->handle_destroy = usb_wacom_handle_destroy;
+    uc->unrealize      = usb_wacom_unrealize;
     set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
     dc->desc = "QEMU PenPartner Tablet";
     dc->vmsd = &vmstate_usb_wacom;
diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
index 21c93e0372..fe8406ac64 100644
--- a/hw/usb/hcd-ohci.c
+++ b/hw/usb/hcd-ohci.c
@@ -1001,7 +1001,7 @@ static int ohci_service_td(OHCIState *ohci, struct ohci_ed *ed)
     if (ohci_read_td(ohci, addr, &td)) {
         trace_usb_ohci_td_read_error(addr);
         ohci_die(ohci);
-        return 0;
+        return 1;
     }
 
     dir = OHCI_BM(ed->flags, ED_D);
diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index 28dd2f2c9a..f0af852709 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -635,6 +635,11 @@ static bool xhci_get_flag(XHCIState *xhci, enum xhci_flags bit)
     return xhci->flags & (1 << bit);
 }
 
+static void xhci_set_flag(XHCIState *xhci, enum xhci_flags bit)
+{
+    xhci->flags |= (1 << bit);
+}
+
 static uint64_t xhci_mfindex_get(XHCIState *xhci)
 {
     int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
@@ -3839,17 +3844,21 @@ static const VMStateDescription vmstate_xhci = {
     }
 };
 
-static Property xhci_properties[] = {
+static Property nec_xhci_properties[] = {
     DEFINE_PROP_ON_OFF_AUTO("msi", XHCIState, msi, ON_OFF_AUTO_AUTO),
     DEFINE_PROP_ON_OFF_AUTO("msix", XHCIState, msix, ON_OFF_AUTO_AUTO),
     DEFINE_PROP_BIT("superspeed-ports-first",
                     XHCIState, flags, XHCI_FLAG_SS_FIRST, true),
     DEFINE_PROP_BIT("force-pcie-endcap", XHCIState, flags,
                     XHCI_FLAG_FORCE_PCIE_ENDCAP, false),
-    DEFINE_PROP_BIT("streams", XHCIState, flags,
-                    XHCI_FLAG_ENABLE_STREAMS, true),
     DEFINE_PROP_UINT32("intrs", XHCIState, numintrs, MAXINTRS),
     DEFINE_PROP_UINT32("slots", XHCIState, numslots, MAXSLOTS),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static Property xhci_properties[] = {
+    DEFINE_PROP_BIT("streams", XHCIState, flags,
+                    XHCI_FLAG_ENABLE_STREAMS, true),
     DEFINE_PROP_UINT32("p2",    XHCIState, numports_2, 4),
     DEFINE_PROP_UINT32("p3",    XHCIState, numports_3, 4),
     DEFINE_PROP_END_OF_LIST(),
@@ -3881,7 +3890,9 @@ static const TypeInfo xhci_info = {
 static void nec_xhci_class_init(ObjectClass *klass, void *data)
 {
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
+    dc->props       = nec_xhci_properties;
     k->vendor_id    = PCI_VENDOR_ID_NEC;
     k->device_id    = PCI_DEVICE_ID_NEC_UPD720200;
     k->revision     = 0x03;
@@ -3902,10 +3913,22 @@ static void qemu_xhci_class_init(ObjectClass *klass, void *data)
     k->revision     = 0x01;
 }
 
+static void qemu_xhci_instance_init(Object *obj)
+{
+    XHCIState *xhci = XHCI(obj);
+
+    xhci->msi      = ON_OFF_AUTO_OFF;
+    xhci->msix     = ON_OFF_AUTO_AUTO;
+    xhci->numintrs = MAXINTRS;
+    xhci->numslots = MAXSLOTS;
+    xhci_set_flag(xhci, XHCI_FLAG_SS_FIRST);
+}
+
 static const TypeInfo qemu_xhci_info = {
     .name          = TYPE_QEMU_XHCI,
     .parent        = TYPE_XHCI,
     .class_init    = qemu_xhci_class_init,
+    .instance_init = qemu_xhci_instance_init,
 };
 
 static void xhci_register_types(void)
diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c
index 7791c6d520..c9876a5b0f 100644
--- a/hw/usb/host-libusb.c
+++ b/hw/usb/host-libusb.c
@@ -1065,7 +1065,7 @@ static void usb_host_instance_init(Object *obj)
                                   &udev->qdev, NULL);
 }
 
-static void usb_host_handle_destroy(USBDevice *udev)
+static void usb_host_unrealize(USBDevice *udev, Error **errp)
 {
     USBHostDevice *s = USB_HOST_DEVICE(udev);
 
@@ -1568,7 +1568,7 @@ static void usb_host_class_initfn(ObjectClass *klass, void *data)
     uc->handle_data    = usb_host_handle_data;
     uc->handle_control = usb_host_handle_control;
     uc->handle_reset   = usb_host_handle_reset;
-    uc->handle_destroy = usb_host_handle_destroy;
+    uc->unrealize      = usb_host_unrealize;
     uc->flush_ep_queue = usb_host_flush_ep_queue;
     uc->alloc_streams  = usb_host_alloc_streams;
     uc->free_streams   = usb_host_free_streams;
diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
index 860f5c35eb..0efe62f725 100644
--- a/hw/usb/redirect.c
+++ b/hw/usb/redirect.c
@@ -1427,7 +1427,7 @@ static void usbredir_cleanup_device_queues(USBRedirDevice *dev)
     }
 }
 
-static void usbredir_handle_destroy(USBDevice *udev)
+static void usbredir_unrealize(USBDevice *udev, Error **errp)
 {
     USBRedirDevice *dev = USB_REDIRECT(udev);
     Chardev *chr = qemu_chr_fe_get_driver(&dev->cs);
@@ -2513,7 +2513,7 @@ static void usbredir_class_initfn(ObjectClass *klass, void *data)
 
     uc->realize        = usbredir_realize;
     uc->product_desc   = "USB Redirection Device";
-    uc->handle_destroy = usbredir_handle_destroy;
+    uc->unrealize      = usbredir_unrealize;
     uc->cancel_packet  = usbredir_cancel_packet;
     uc->handle_reset   = usbredir_handle_reset;
     uc->handle_data    = usbredir_handle_data;
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index e9b493b939..e995e32dee 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -1367,14 +1367,45 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
     uint16_t cmd_orig, cmd;
     Error *err = NULL;
 
+    /* This must be an Intel VGA device. */
+    if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) ||
+        !vfio_is_vga(vdev) || nr != 4) {
+        return;
+    }
+
     /*
-     * This must be an Intel VGA device at address 00:02.0 for us to even
-     * consider enabling legacy mode.  The vBIOS has dependencies on the
-     * PCI bus address.
+     * IGD is not a standard, they like to change their specs often.  We
+     * only attempt to support back to SandBridge and we hope that newer
+     * devices maintain compatibility with generation 8.
      */
-    if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) ||
-        !vfio_is_vga(vdev) || nr != 4 ||
-        &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev),
+    gen = igd_gen(vdev);
+    if (gen != 6 && gen != 8) {
+        error_report("IGD device %s is unsupported by IGD quirks, "
+                     "try SandyBridge or newer", vdev->vbasedev.name);
+        return;
+    }
+
+    /*
+     * Regardless of running in UPT or legacy mode, the guest graphics
+     * driver may attempt to use stolen memory, however only legacy mode
+     * has BIOS support for reserving stolen memory in the guest VM.
+     * Emulate the GMCH register in all cases and zero out the stolen
+     * memory size here. Legacy mode may request allocation and re-write
+     * this below.
+     */
+    gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4);
+    gmch &= ~((gen < 8 ? 0x1f : 0xff) << (gen < 8 ? 3 : 8));
+
+    /* GMCH is read-only, emulated */
+    pci_set_long(vdev->pdev.config + IGD_GMCH, gmch);
+    pci_set_long(vdev->pdev.wmask + IGD_GMCH, 0);
+    pci_set_long(vdev->emulated_config_bits + IGD_GMCH, ~0);
+
+    /*
+     * This must be at address 00:02.0 for us to even onsider enabling
+     * legacy mode.  The vBIOS has dependencies on the PCI bus address.
+     */
+    if (&vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev),
                                        0, PCI_DEVFN(0x2, 0))) {
         return;
     }
@@ -1394,18 +1425,6 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
     }
 
     /*
-     * IGD is not a standard, they like to change their specs often.  We
-     * only attempt to support back to SandBridge and we hope that newer
-     * devices maintain compatibility with generation 8.
-     */
-    gen = igd_gen(vdev);
-    if (gen != 6 && gen != 8) {
-        error_report("IGD device %s is unsupported in legacy mode, "
-                     "try SandyBridge or newer", vdev->vbasedev.name);
-        return;
-    }
-
-    /*
      * Most of what we're doing here is to enable the ROM to run, so if
      * there's no ROM, there's no point in setting up this quirk.
      * NB. We only seem to get BIOS ROMs, so a UEFI VM would need CSM support.
@@ -1460,8 +1479,6 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
         goto out;
     }
 
-    gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4);
-
     /*
      * If IGD VGA Disable is clear (expected) and VGA is not already enabled,
      * try to enable it.  Probably shouldn't be using legacy mode without VGA,
@@ -1532,12 +1549,11 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
      * when IVD (IGD VGA Disable) is clear, but the claim is that it's unused,
      * so let's not waste VM memory for it.
      */
-    gmch &= ~((gen < 8 ? 0x1f : 0xff) << (gen < 8 ? 3 : 8));
-
     if (vdev->igd_gms) {
         if (vdev->igd_gms <= 0x10) {
             gms_mb = vdev->igd_gms * 32;
             gmch |= vdev->igd_gms << (gen < 8 ? 3 : 8);
+            pci_set_long(vdev->pdev.config + IGD_GMCH, gmch);
         } else {
             error_report("Unsupported IGD GMS value 0x%x", vdev->igd_gms);
             vdev->igd_gms = 0;
@@ -1557,11 +1573,6 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
     fw_cfg_add_file(fw_cfg_find(), "etc/igd-bdsm-size",
                     bdsm_size, sizeof(*bdsm_size));
 
-    /* GMCH is read-only, emulated */
-    pci_set_long(vdev->pdev.config + IGD_GMCH, gmch);
-    pci_set_long(vdev->pdev.wmask + IGD_GMCH, 0);
-    pci_set_long(vdev->emulated_config_bits + IGD_GMCH, ~0);
-
     /* BDSM is read-write, emulated.  The BIOS needs to be able to write it */
     pci_set_long(vdev->pdev.config + IGD_BDSM, 0);
     pci_set_long(vdev->pdev.wmask + IGD_BDSM, ~0);
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 332f41d662..03a3d01549 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1880,16 +1880,26 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev)
     /*
      * Extended capabilities are chained with each pointing to the next, so we
      * can drop anything other than the head of the chain simply by modifying
-     * the previous next pointer.  For the head of the chain, we can modify the
-     * capability ID to something that cannot match a valid capability.  ID
-     * 0 is reserved for this since absence of capabilities is indicated by
-     * 0 for the ID, version, AND next pointer.  However, pcie_add_capability()
-     * uses ID 0 as reserved for list management and will incorrectly match and
-     * assert if we attempt to pre-load the head of the chain with this ID.
-     * Use ID 0xFFFF temporarily since it is also seems to be reserved in
-     * part for identifying absence of capabilities in a root complex register
-     * block.  If the ID still exists after adding capabilities, switch back to
-     * zero.  We'll mark this entire first dword as emulated for this purpose.
+     * the previous next pointer.  Seed the head of the chain here such that
+     * we can simply skip any capabilities we want to drop below, regardless
+     * of their position in the chain.  If this stub capability still exists
+     * after we add the capabilities we want to expose, update the capability
+     * ID to zero.  Note that we cannot seed with the capability header being
+     * zero as this conflicts with definition of an absent capability chain
+     * and prevents capabilities beyond the head of the list from being added.
+     * By replacing the dummy capability ID with zero after walking the device
+     * chain, we also transparently mark extended capabilities as absent if
+     * no capabilities were added.  Note that the PCIe spec defines an absence
+     * of extended capabilities to be determined by a value of zero for the
+     * capability ID, version, AND next pointer.  A non-zero next pointer
+     * should be sufficient to indicate additional capabilities are present,
+     * which will occur if we call pcie_add_capability() below.  The entire
+     * first dword is emulated to support this.
+     *
+     * NB. The kernel side does similar masking, so be prepared that our
+     * view of the device may also contain a capability ID zero in the head
+     * of the chain.  Skip it for the same reason that we cannot seed the
+     * chain with a zero capability.
      */
     pci_set_long(pdev->config + PCI_CONFIG_SPACE_SIZE,
                  PCI_EXT_CAP(0xFFFF, 0, 0));
@@ -1915,6 +1925,7 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev)
                                    PCI_EXT_CAP_NEXT_MASK);
 
         switch (cap_id) {
+        case 0: /* kernel masked capability */
         case PCI_EXT_CAP_ID_SRIOV: /* Read-only VF BARs confuse OVMF */
         case PCI_EXT_CAP_ID_ARI: /* XXX Needs next function virtualization */
             trace_vfio_add_ext_cap_dropped(vdev->vbasedev.name, cap_id, next);
@@ -2506,12 +2517,16 @@ static void vfio_unregister_err_notifier(VFIOPCIDevice *vdev)
 static void vfio_req_notifier_handler(void *opaque)
 {
     VFIOPCIDevice *vdev = opaque;
+    Error *err = NULL;
 
     if (!event_notifier_test_and_clear(&vdev->req_notifier)) {
         return;
     }
 
-    qdev_unplug(&vdev->pdev.qdev, NULL);
+    qdev_unplug(&vdev->pdev.qdev, &err);
+    if (err) {
+        error_reportf_err(err, WARN_PREFIX, vdev->vbasedev.name);
+    }
 }
 
 static void vfio_register_req_notifier(VFIOPCIDevice *vdev)
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 5ce42af9d4..b76f3f62a0 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1153,7 +1153,7 @@ static AddressSpace *virtio_pci_get_dma_as(DeviceState *d)
     VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
     PCIDevice *dev = &proxy->pci_dev;
 
-    return pci_get_address_space(dev);
+    return pci_device_iommu_address_space(dev);
 }
 
 static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy,
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 23483c752f..efce4b343a 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -282,12 +282,17 @@ static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
     caches = atomic_rcu_read(&vq->vring.caches);
     pa = offsetof(VRingUsed, ring[vq->vring.num]);
     virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
+    address_space_cache_invalidate(&caches->used, pa, sizeof(val));
 }
 
 void virtio_queue_set_notification(VirtQueue *vq, int enable)
 {
     vq->notification = enable;
 
+    if (!vq->vring.desc) {
+        return;
+    }
+
     rcu_read_lock();
     if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
         vring_set_avail_event(vq, vring_avail_idx(vq));
@@ -1852,7 +1857,10 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
         if (k->has_variable_vring_alignment) {
             qemu_put_be32(f, vdev->vq[i].vring.align);
         }
-        /* XXX virtio-1 devices */
+        /*
+         * Save desc now, the rest of the ring addresses are saved in
+         * subsections for VIRTIO-1 devices.
+         */
         qemu_put_be64(f, vdev->vq[i].vring.desc);
         qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
         if (k->save_queue) {
@@ -1993,14 +2001,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
         vdev->vq[i].signalled_used_valid = false;
         vdev->vq[i].notification = true;
 
-        if (vdev->vq[i].vring.desc) {
-            /* XXX virtio-1 devices */
-            virtio_queue_update_rings(vdev, i);
-        } else if (vdev->vq[i].last_avail_idx) {
+        if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
             error_report("VQ %d address 0x0 "
                          "inconsistent with Host index 0x%x",
                          i, vdev->vq[i].last_avail_idx);
-                return -1;
+            return -1;
         }
         if (k->load_queue) {
             ret = k->load_queue(qbus->parent, i, f);
@@ -2061,6 +2066,19 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
     for (i = 0; i < num; i++) {
         if (vdev->vq[i].vring.desc) {
             uint16_t nheads;
+
+            /*
+             * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
+             * only the region cache needs to be set up.  Legacy devices need
+             * to calculate used and avail ring addresses based on the desc
+             * address.
+             */
+            if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
+                virtio_init_region_cache(vdev, i);
+            } else {
+                virtio_queue_update_rings(vdev, i);
+            }
+
             nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
             /* Check it isn't doing strange things with descriptor numbers. */
             if (nheads > vdev->vq[i].vring.num) {
@@ -2291,7 +2309,7 @@ static bool virtio_queue_host_notifier_aio_poll(void *opaque)
     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
     bool progress;
 
-    if (virtio_queue_empty(vq)) {
+    if (!vq->vring.desc || virtio_queue_empty(vq)) {
         return false;
     }
 
diff --git a/include/block/block.h b/include/block/block.h
index 4e81f2069b..c7c4a3ac3a 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -82,6 +82,7 @@ typedef struct HDGeometry {
 } HDGeometry;
 
 #define BDRV_O_RDWR        0x0002
+#define BDRV_O_RESIZE      0x0004 /* request permission for resizing the node */
 #define BDRV_O_SNAPSHOT    0x0008 /* open the file read only and save writes in a snapshot */
 #define BDRV_O_TEMPORARY   0x0010 /* delete the file after use */
 #define BDRV_O_NOCACHE     0x0020 /* do not use the host page cache */
@@ -187,6 +188,42 @@ typedef enum BlockOpType {
     BLOCK_OP_TYPE_MAX,
 } BlockOpType;
 
+/* Block node permission constants */
+enum {
+    /**
+     * A user that has the "permission" of consistent reads is guaranteed that
+     * their view of the contents of the block device is complete and
+     * self-consistent, representing the contents of a disk at a specific
+     * point.
+     *
+     * For most block devices (including their backing files) this is true, but
+     * the property cannot be maintained in a few situations like for
+     * intermediate nodes of a commit block job.
+     */
+    BLK_PERM_CONSISTENT_READ    = 0x01,
+
+    /** This permission is required to change the visible disk contents. */
+    BLK_PERM_WRITE              = 0x02,
+
+    /**
+     * This permission (which is weaker than BLK_PERM_WRITE) is both enough and
+     * required for writes to the block node when the caller promises that
+     * the visible disk content doesn't change.
+     */
+    BLK_PERM_WRITE_UNCHANGED    = 0x04,
+
+    /** This permission is required to change the size of a block node. */
+    BLK_PERM_RESIZE             = 0x08,
+
+    /**
+     * This permission is required to change the node that this BdrvChild
+     * points to.
+     */
+    BLK_PERM_GRAPH_MOD          = 0x10,
+
+    BLK_PERM_ALL                = 0x1f,
+};
+
 /* disk I/O throttling */
 void bdrv_init(void);
 void bdrv_init_with_whitelist(void);
@@ -199,7 +236,8 @@ int bdrv_create(BlockDriver *drv, const char* filename,
                 QemuOpts *opts, Error **errp);
 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp);
 BlockDriverState *bdrv_new(void);
-void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top);
+void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
+                 Error **errp);
 void bdrv_replace_in_backing_chain(BlockDriverState *old,
                                    BlockDriverState *new);
 
@@ -210,11 +248,14 @@ BdrvChild *bdrv_open_child(const char *filename,
                            BlockDriverState* parent,
                            const BdrvChildRole *child_role,
                            bool allow_none, Error **errp);
-void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd);
+void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
+                         Error **errp);
 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
                            const char *bdref_key, Error **errp);
 BlockDriverState *bdrv_open(const char *filename, const char *reference,
                             QDict *options, int flags, Error **errp);
+BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
+                                       int flags, Error **errp);
 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
                                     BlockDriverState *bs,
                                     QDict *options, int flags);
@@ -253,7 +294,7 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
     const char *backing_file);
 int bdrv_get_backing_file_depth(BlockDriverState *bs);
 void bdrv_refresh_filename(BlockDriverState *bs);
-int bdrv_truncate(BlockDriverState *bs, int64_t offset);
+int bdrv_truncate(BdrvChild *child, int64_t offset);
 int64_t bdrv_nb_sectors(BlockDriverState *bs);
 int64_t bdrv_getlength(BlockDriverState *bs);
 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
@@ -482,7 +523,8 @@ void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
 BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
                              BlockDriverState *child_bs,
                              const char *child_name,
-                             const BdrvChildRole *child_role);
+                             const BdrvChildRole *child_role,
+                             Error **errp);
 
 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp);
 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 1670941da9..a57c0bfb55 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -320,6 +320,59 @@ struct BlockDriver {
     void (*bdrv_del_child)(BlockDriverState *parent, BdrvChild *child,
                            Error **errp);
 
+    /**
+     * Informs the block driver that a permission change is intended. The
+     * driver checks whether the change is permissible and may take other
+     * preparations for the change (e.g. get file system locks). This operation
+     * is always followed either by a call to either .bdrv_set_perm or
+     * .bdrv_abort_perm_update.
+     *
+     * Checks whether the requested set of cumulative permissions in @perm
+     * can be granted for accessing @bs and whether no other users are using
+     * permissions other than those given in @shared (both arguments take
+     * BLK_PERM_* bitmasks).
+     *
+     * If both conditions are met, 0 is returned. Otherwise, -errno is returned
+     * and errp is set to an error describing the conflict.
+     */
+    int (*bdrv_check_perm)(BlockDriverState *bs, uint64_t perm,
+                           uint64_t shared, Error **errp);
+
+    /**
+     * Called to inform the driver that the set of cumulative set of used
+     * permissions for @bs has changed to @perm, and the set of sharable
+     * permission to @shared. The driver can use this to propagate changes to
+     * its children (i.e. request permissions only if a parent actually needs
+     * them).
+     *
+     * This function is only invoked after bdrv_check_perm(), so block drivers
+     * may rely on preparations made in their .bdrv_check_perm implementation.
+     */
+    void (*bdrv_set_perm)(BlockDriverState *bs, uint64_t perm, uint64_t shared);
+
+    /*
+     * Called to inform the driver that after a previous bdrv_check_perm()
+     * call, the permission update is not performed and any preparations made
+     * for it (e.g. taken file locks) need to be undone.
+     *
+     * This function can be called even for nodes that never saw a
+     * bdrv_check_perm() call. It is a no-op then.
+     */
+    void (*bdrv_abort_perm_update)(BlockDriverState *bs);
+
+    /**
+     * Returns in @nperm and @nshared the permissions that the driver for @bs
+     * needs on its child @c, based on the cumulative permissions requested by
+     * the parents in @parent_perm and @parent_shared.
+     *
+     * If @c is NULL, return the permissions for attaching a new child for the
+     * given @role.
+     */
+     void (*bdrv_child_perm)(BlockDriverState *bs, BdrvChild *c,
+                             const BdrvChildRole *role,
+                             uint64_t parent_perm, uint64_t parent_shared,
+                             uint64_t *nperm, uint64_t *nshared);
+
     QLIST_ENTRY(BlockDriver) list;
 };
 
@@ -388,6 +441,10 @@ typedef struct BdrvAioNotifier {
 } BdrvAioNotifier;
 
 struct BdrvChildRole {
+    /* If true, bdrv_replace_in_backing_chain() doesn't change the node this
+     * BdrvChild points to. */
+    bool stay_at_node;
+
     void (*inherit_options)(int *child_flags, QDict *child_options,
                             int parent_flags, QDict *parent_options);
 
@@ -399,6 +456,12 @@ struct BdrvChildRole {
      * name), or NULL if the parent can't provide a better name. */
     const char* (*get_name)(BdrvChild *child);
 
+    /* Returns a malloced string that describes the parent of the child for a
+     * human reader. This could be a node-name, BlockBackend name, qdev ID or
+     * QOM path of the device owning the BlockBackend, job type and ID etc. The
+     * caller is responsible for freeing the memory. */
+    char* (*get_parent_desc)(BdrvChild *child);
+
     /*
      * If this pair of functions is implemented, the parent doesn't issue new
      * requests after returning from .drained_begin() until .drained_end() is
@@ -409,16 +472,32 @@ struct BdrvChildRole {
      */
     void (*drained_begin)(BdrvChild *child);
     void (*drained_end)(BdrvChild *child);
+
+    void (*attach)(BdrvChild *child);
+    void (*detach)(BdrvChild *child);
 };
 
 extern const BdrvChildRole child_file;
 extern const BdrvChildRole child_format;
+extern const BdrvChildRole child_backing;
 
 struct BdrvChild {
     BlockDriverState *bs;
     char *name;
     const BdrvChildRole *role;
     void *opaque;
+
+    /**
+     * Granted permissions for operating on this BdrvChild (BLK_PERM_* bitmask)
+     */
+    uint64_t perm;
+
+    /**
+     * Permissions that can still be granted to other users of @bs while this
+     * BdrvChild is still attached to it. (BLK_PERM_* bitmask)
+     */
+    uint64_t shared_perm;
+
     QLIST_ENTRY(BdrvChild) next;
     QLIST_ENTRY(BdrvChild) next_parent;
 };
@@ -701,13 +780,16 @@ void stream_start(const char *job_id, BlockDriverState *bs,
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @on_error: The action to take upon error.
  * @backing_file_str: String to use as the backing file in @top's overlay
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the commit job inserts into the graph above @top. NULL means
+ * that a node name should be autogenerated.
  * @errp: Error object.
  *
  */
 void commit_start(const char *job_id, BlockDriverState *bs,
                   BlockDriverState *base, BlockDriverState *top, int64_t speed,
                   BlockdevOnError on_error, const char *backing_file_str,
-                  Error **errp);
+                  const char *filter_node_name, Error **errp);
 /**
  * commit_active_start:
  * @job_id: The id of the newly-created job, or %NULL to use the
@@ -718,6 +800,9 @@ void commit_start(const char *job_id, BlockDriverState *bs,
  *                  See @BlockJobCreateFlags
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @on_error: The action to take upon error.
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the commit job inserts into the graph above @bs. NULL means that
+ * a node name should be autogenerated.
  * @cb: Completion function for the job.
  * @opaque: Opaque pointer value passed to @cb.
  * @errp: Error object.
@@ -727,8 +812,9 @@ void commit_start(const char *job_id, BlockDriverState *bs,
 void commit_active_start(const char *job_id, BlockDriverState *bs,
                          BlockDriverState *base, int creation_flags,
                          int64_t speed, BlockdevOnError on_error,
-                         BlockCompletionFunc *cb,
-                         void *opaque, Error **errp, bool auto_complete);
+                         const char *filter_node_name,
+                         BlockCompletionFunc *cb, void *opaque, Error **errp,
+                         bool auto_complete);
 /*
  * mirror_start:
  * @job_id: The id of the newly-created job, or %NULL to use the
@@ -745,6 +831,9 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
  * @on_source_error: The action to take upon error reading from the source.
  * @on_target_error: The action to take upon error writing to the target.
  * @unmap: Whether to unmap target where source sectors only contain zeroes.
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the mirror job inserts into the graph above @bs. NULL means that
+ * a node name should be autogenerated.
  * @errp: Error object.
  *
  * Start a mirroring operation on @bs.  Clusters that are allocated
@@ -758,7 +847,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
                   MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
                   BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
-                  bool unmap, Error **errp);
+                  bool unmap, const char *filter_node_name, Error **errp);
 
 /*
  * backup_job_create:
@@ -796,11 +885,36 @@ void hmp_drive_add_node(Monitor *mon, const char *optstr);
 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
                                   const char *child_name,
                                   const BdrvChildRole *child_role,
-                                  void *opaque);
+                                  uint64_t perm, uint64_t shared_perm,
+                                  void *opaque, Error **errp);
 void bdrv_root_unref_child(BdrvChild *child);
 
+int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
+                          Error **errp);
+void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared);
+void bdrv_child_abort_perm_update(BdrvChild *c);
+int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
+                            Error **errp);
+
+/* Default implementation for BlockDriver.bdrv_child_perm() that can be used by
+ * block filters: Forward CONSISTENT_READ, WRITE, WRITE_UNCHANGED and RESIZE to
+ * all children */
+void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
+                               const BdrvChildRole *role,
+                               uint64_t perm, uint64_t shared,
+                               uint64_t *nperm, uint64_t *nshared);
+
+/* Default implementation for BlockDriver.bdrv_child_perm() that can be used by
+ * (non-raw) image formats: Like above for bs->backing, but for bs->file it
+ * requires WRITE | RESIZE for read-write images, always requires
+ * CONSISTENT_READ and doesn't share WRITE. */
+void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
+                               const BdrvChildRole *role,
+                               uint64_t perm, uint64_t shared,
+                               uint64_t *nperm, uint64_t *nshared);
+
 const char *bdrv_get_parent_name(const BlockDriverState *bs);
-void blk_dev_change_media_cb(BlockBackend *blk, bool load);
+void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp);
 bool blk_dev_has_removable_media(BlockBackend *blk);
 bool blk_dev_has_tray(BlockBackend *blk);
 void blk_dev_eject_request(BlockBackend *blk, bool force);
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index 1acb256223..9e906f7d7e 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -169,13 +169,25 @@ BlockJob *block_job_get(const char *id);
 /**
  * block_job_add_bdrv:
  * @job: A block job
+ * @name: The name to assign to the new BdrvChild
  * @bs: A BlockDriverState that is involved in @job
+ * @perm, @shared_perm: Permissions to request on the node
  *
  * Add @bs to the list of BlockDriverState that are involved in
  * @job. This means that all operations will be blocked on @bs while
  * @job exists.
  */
-void block_job_add_bdrv(BlockJob *job, BlockDriverState *bs);
+int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
+                       uint64_t perm, uint64_t shared_perm, Error **errp);
+
+/**
+ * block_job_remove_all_bdrv:
+ * @job: The block job
+ *
+ * Remove all BlockDriverStates from the list of nodes that are involved in the
+ * job. This removes the blockers added with block_job_add_bdrv().
+ */
+void block_job_remove_all_bdrv(BlockJob *job);
 
 /**
  * block_job_set_speed:
diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h
index 82238229c6..3f86cc5acc 100644
--- a/include/block/blockjob_int.h
+++ b/include/block/blockjob_int.h
@@ -119,6 +119,7 @@ struct BlockJobDriver {
  * generated automatically.
  * @job_type: The class object for the newly-created job.
  * @bs: The block
+ * @perm, @shared_perm: Permissions to request for @bs
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @cb: Completion function for the job.
  * @opaque: Opaque pointer value passed to @cb.
@@ -134,7 +135,8 @@ struct BlockJobDriver {
  * called from a wrapper that is specific to the job type.
  */
 void *block_job_create(const char *job_id, const BlockJobDriver *driver,
-                       BlockDriverState *bs, int64_t speed, int flags,
+                       BlockDriverState *bs, uint64_t perm,
+                       uint64_t shared_perm, int64_t speed, int flags,
                        BlockCompletionFunc *cb, void *opaque, Error **errp);
 
 /**
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index bd15853e51..8c305aa4fa 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -64,6 +64,7 @@ void qemu_ram_set_idstr(RAMBlock *block, const char *name, DeviceState *dev);
 void qemu_ram_unset_idstr(RAMBlock *block);
 const char *qemu_ram_get_idstr(RAMBlock *rb);
 size_t qemu_ram_pagesize(RAMBlock *block);
+size_t qemu_ram_pagesize_largest(void);
 
 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
                             int len, int is_write);
@@ -105,6 +106,7 @@ typedef int (RAMBlockIterFunc)(const char *block_name, void *host_addr,
     ram_addr_t offset, ram_addr_t length, void *opaque);
 
 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
+int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length);
 
 #endif
 
diff --git a/include/exec/cputlb.h b/include/exec/cputlb.h
index d454c005b7..3f941783c5 100644
--- a/include/exec/cputlb.h
+++ b/include/exec/cputlb.h
@@ -23,8 +23,6 @@
 /* cputlb.c */
 void tlb_protect_code(ram_addr_t ram_addr);
 void tlb_unprotect_code(ram_addr_t ram_addr);
-void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
-                           uintptr_t length);
 extern int tlb_flush_count;
 
 #endif
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 21ab7bf3fd..bcde1e6a14 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -93,6 +93,27 @@ void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx);
  */
 void tlb_flush_page(CPUState *cpu, target_ulong addr);
 /**
+ * tlb_flush_page_all_cpus:
+ * @cpu: src CPU of the flush
+ * @addr: virtual address of page to be flushed
+ *
+ * Flush one page from the TLB of the specified CPU, for all
+ * MMU indexes.
+ */
+void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr);
+/**
+ * tlb_flush_page_all_cpus_synced:
+ * @cpu: src CPU of the flush
+ * @addr: virtual address of page to be flushed
+ *
+ * Flush one page from the TLB of the specified CPU, for all MMU
+ * indexes like tlb_flush_page_all_cpus except the source vCPUs work
+ * is scheduled as safe work meaning all flushes will be complete once
+ * the source vCPUs safe work is complete. This will depend on when
+ * the guests translation ends the TB.
+ */
+void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr);
+/**
  * tlb_flush:
  * @cpu: CPU whose TLB should be flushed
  *
@@ -103,24 +124,87 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr);
  */
 void tlb_flush(CPUState *cpu);
 /**
+ * tlb_flush_all_cpus:
+ * @cpu: src CPU of the flush
+ */
+void tlb_flush_all_cpus(CPUState *src_cpu);
+/**
+ * tlb_flush_all_cpus_synced:
+ * @cpu: src CPU of the flush
+ *
+ * Like tlb_flush_all_cpus except this except the source vCPUs work is
+ * scheduled as safe work meaning all flushes will be complete once
+ * the source vCPUs safe work is complete. This will depend on when
+ * the guests translation ends the TB.
+ */
+void tlb_flush_all_cpus_synced(CPUState *src_cpu);
+/**
  * tlb_flush_page_by_mmuidx:
  * @cpu: CPU whose TLB should be flushed
  * @addr: virtual address of page to be flushed
- * @...: list of MMU indexes to flush, terminated by a negative value
+ * @idxmap: bitmap of MMU indexes to flush
  *
  * Flush one page from the TLB of the specified CPU, for the specified
  * MMU indexes.
  */
-void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...);
+void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr,
+                              uint16_t idxmap);
+/**
+ * tlb_flush_page_by_mmuidx_all_cpus:
+ * @cpu: Originating CPU of the flush
+ * @addr: virtual address of page to be flushed
+ * @idxmap: bitmap of MMU indexes to flush
+ *
+ * Flush one page from the TLB of all CPUs, for the specified
+ * MMU indexes.
+ */
+void tlb_flush_page_by_mmuidx_all_cpus(CPUState *cpu, target_ulong addr,
+                                       uint16_t idxmap);
+/**
+ * tlb_flush_page_by_mmuidx_all_cpus_synced:
+ * @cpu: Originating CPU of the flush
+ * @addr: virtual address of page to be flushed
+ * @idxmap: bitmap of MMU indexes to flush
+ *
+ * Flush one page from the TLB of all CPUs, for the specified MMU
+ * indexes like tlb_flush_page_by_mmuidx_all_cpus except the source
+ * vCPUs work is scheduled as safe work meaning all flushes will be
+ * complete once  the source vCPUs safe work is complete. This will
+ * depend on when the guests translation ends the TB.
+ */
+void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *cpu, target_ulong addr,
+                                              uint16_t idxmap);
 /**
  * tlb_flush_by_mmuidx:
  * @cpu: CPU whose TLB should be flushed
- * @...: list of MMU indexes to flush, terminated by a negative value
+ * @wait: If true ensure synchronisation by exiting the cpu_loop
+ * @idxmap: bitmap of MMU indexes to flush
  *
  * Flush all entries from the TLB of the specified CPU, for the specified
  * MMU indexes.
  */
-void tlb_flush_by_mmuidx(CPUState *cpu, ...);
+void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap);
+/**
+ * tlb_flush_by_mmuidx_all_cpus:
+ * @cpu: Originating CPU of the flush
+ * @idxmap: bitmap of MMU indexes to flush
+ *
+ * Flush all entries from all TLBs of all CPUs, for the specified
+ * MMU indexes.
+ */
+void tlb_flush_by_mmuidx_all_cpus(CPUState *cpu, uint16_t idxmap);
+/**
+ * tlb_flush_by_mmuidx_all_cpus_synced:
+ * @cpu: Originating CPU of the flush
+ * @idxmap: bitmap of MMU indexes to flush
+ *
+ * Flush all entries from all TLBs of all CPUs, for the specified
+ * MMU indexes like tlb_flush_by_mmuidx_all_cpus except except the source
+ * vCPUs work is scheduled as safe work meaning all flushes will be
+ * complete once  the source vCPUs safe work is complete. This will
+ * depend on when the guests translation ends the TB.
+ */
+void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu, uint16_t idxmap);
 /**
  * tlb_set_page_with_attrs:
  * @cpu: CPU to add this TLB entry for
@@ -162,17 +246,45 @@ void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx,
 static inline void tlb_flush_page(CPUState *cpu, target_ulong addr)
 {
 }
-
+static inline void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
+{
+}
+static inline void tlb_flush_page_all_cpus_synced(CPUState *src,
+                                                  target_ulong addr)
+{
+}
 static inline void tlb_flush(CPUState *cpu)
 {
 }
-
+static inline void tlb_flush_all_cpus(CPUState *src_cpu)
+{
+}
+static inline void tlb_flush_all_cpus_synced(CPUState *src_cpu)
+{
+}
 static inline void tlb_flush_page_by_mmuidx(CPUState *cpu,
-                                            target_ulong addr, ...)
+                                            target_ulong addr, uint16_t idxmap)
 {
 }
 
-static inline void tlb_flush_by_mmuidx(CPUState *cpu, ...)
+static inline void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
+{
+}
+static inline void tlb_flush_page_by_mmuidx_all_cpus(CPUState *cpu,
+                                                     target_ulong addr,
+                                                     uint16_t idxmap)
+{
+}
+static inline void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *cpu,
+                                                            target_ulong addr,
+                                                            uint16_t idxmap)
+{
+}
+static inline void tlb_flush_by_mmuidx_all_cpus(CPUState *cpu, uint16_t idxmap)
+{
+}
+static inline void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu,
+                                                       uint16_t idxmap)
 {
 }
 #endif
@@ -404,8 +516,4 @@ bool memory_region_is_unassigned(MemoryRegion *mr);
 /* vl.c */
 extern int singlestep;
 
-/* cpu-exec.c, accessed with atomic_mb_read/atomic_mb_set */
-extern CPUState *tcg_current_cpu;
-extern bool exit_request;
-
 #endif
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index 842ec6b22a..f1288efa87 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -180,6 +180,8 @@ enum {
     float_round_up           = 2,
     float_round_to_zero      = 3,
     float_round_ties_away    = 4,
+    /* Not an IEEE rounding mode: round to the closest odd mantissa value */
+    float_round_to_odd       = 5,
 };
 
 /*----------------------------------------------------------------------------
@@ -712,6 +714,9 @@ int32_t float128_to_int32(float128, float_status *status);
 int32_t float128_to_int32_round_to_zero(float128, float_status *status);
 int64_t float128_to_int64(float128, float_status *status);
 int64_t float128_to_int64_round_to_zero(float128, float_status *status);
+uint64_t float128_to_uint64(float128, float_status *status);
+uint64_t float128_to_uint64_round_to_zero(float128, float_status *status);
+uint32_t float128_to_uint32_round_to_zero(float128, float_status *status);
 float32 float128_to_float32(float128, float_status *status);
 float64 float128_to_float64(float128, float_status *status);
 floatx80 float128_to_floatx80(float128, float_status *status);
diff --git a/include/glib-compat.h b/include/glib-compat.h
index 0cd24ffbe9..863c8cf73d 100644
--- a/include/glib-compat.h
+++ b/include/glib-compat.h
@@ -328,4 +328,25 @@ static inline void g_source_set_name_by_id(guint tag, const char *name)
 #define g_test_subprocess() (0)
 #endif
 
+
+#if !GLIB_CHECK_VERSION(2, 34, 0)
+static inline void
+g_test_add_data_func_full(const char *path,
+                          gpointer data,
+                          gpointer fn,
+                          gpointer data_free_func)
+{
+#if GLIB_CHECK_VERSION(2, 26, 0)
+    /* back-compat casts, remove this once we can require new-enough glib */
+    g_test_add_vtable(path, 0, data, NULL,
+                      (GTestFixtureFunc)fn, (GTestFixtureFunc) data_free_func);
+#else
+    /* back-compat casts, remove this once we can require new-enough glib */
+    g_test_add_vtable(path, 0, data, NULL,
+                      (void (*)(void)) fn, (void (*)(void)) data_free_func);
+#endif
+}
+#endif
+
+
 #endif
diff --git a/include/hw/acpi/acpi_dev_interface.h b/include/hw/acpi/acpi_dev_interface.h
index 71d3c48e7d..3c2e4e95a5 100644
--- a/include/hw/acpi/acpi_dev_interface.h
+++ b/include/hw/acpi/acpi_dev_interface.h
@@ -11,6 +11,7 @@ typedef enum {
     ACPI_CPU_HOTPLUG_STATUS = 4,
     ACPI_MEMORY_HOTPLUG_STATUS = 8,
     ACPI_NVDIMM_HOTPLUG_STATUS = 16,
+    ACPI_VMGENID_CHANGE_STATUS = 32,
 } AcpiEventStatusBits;
 
 #define TYPE_ACPI_DEVICE_IF "acpi-device-interface"
diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
index 559326cbd5..00c21f160c 100644
--- a/include/hw/acpi/aml-build.h
+++ b/include/hw/acpi/aml-build.h
@@ -210,6 +210,7 @@ struct AcpiBuildTables {
     GArray *table_data;
     GArray *rsdp;
     GArray *tcpalog;
+    GArray *vmgenid;
     BIOSLinker *linker;
 } AcpiBuildTables;
 
diff --git a/include/hw/acpi/bios-linker-loader.h b/include/hw/acpi/bios-linker-loader.h
index fa1e5d1a4e..efe17b0b9c 100644
--- a/include/hw/acpi/bios-linker-loader.h
+++ b/include/hw/acpi/bios-linker-loader.h
@@ -26,5 +26,12 @@ void bios_linker_loader_add_pointer(BIOSLinker *linker,
                                     const char *src_file,
                                     uint32_t src_offset);
 
+void bios_linker_loader_write_pointer(BIOSLinker *linker,
+                                      const char *dest_file,
+                                      uint32_t dst_patched_offset,
+                                      uint8_t dst_patched_size,
+                                      const char *src_file,
+                                      uint32_t src_offset);
+
 void bios_linker_loader_cleanup(BIOSLinker *linker);
 #endif
diff --git a/include/hw/acpi/vmgenid.h b/include/hw/acpi/vmgenid.h
new file mode 100644
index 0000000000..db7fa0e633
--- /dev/null
+++ b/include/hw/acpi/vmgenid.h
@@ -0,0 +1,35 @@
+#ifndef ACPI_VMGENID_H
+#define ACPI_VMGENID_H
+
+#include "hw/acpi/bios-linker-loader.h"
+#include "hw/qdev.h"
+#include "qemu/uuid.h"
+
+#define VMGENID_DEVICE           "vmgenid"
+#define VMGENID_GUID             "guid"
+#define VMGENID_GUID_FW_CFG_FILE      "etc/vmgenid_guid"
+#define VMGENID_ADDR_FW_CFG_FILE      "etc/vmgenid_addr"
+
+#define VMGENID_FW_CFG_SIZE      4096 /* Occupy a page of memory */
+#define VMGENID_GUID_OFFSET      40   /* allow space for
+                                       * OVMF SDT Header Probe Supressor
+                                       */
+
+#define VMGENID(obj) OBJECT_CHECK(VmGenIdState, (obj), VMGENID_DEVICE)
+
+typedef struct VmGenIdState {
+    DeviceClass parent_obj;
+    QemuUUID guid;                /* The 128-bit GUID seen by the guest */
+    uint8_t vmgenid_addr_le[8];   /* Address of the GUID (little-endian) */
+} VmGenIdState;
+
+static inline Object *find_vmgenid_dev(void)
+{
+    return object_resolve_path_type("", VMGENID_DEVICE, NULL);
+}
+
+void vmgenid_build_acpi(VmGenIdState *vms, GArray *table_data, GArray *guid,
+                        BIOSLinker *linker);
+void vmgenid_add_fw_cfg(VmGenIdState *vms, FWCfgState *s, GArray *guid);
+
+#endif
diff --git a/include/hw/arm/arm.h b/include/hw/arm/arm.h
index c175c0e999..a3f79d3379 100644
--- a/include/hw/arm/arm.h
+++ b/include/hw/arm/arm.h
@@ -26,6 +26,18 @@ typedef enum {
 /* armv7m.c */
 DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq,
                       const char *kernel_filename, const char *cpu_model);
+/**
+ * armv7m_load_kernel:
+ * @cpu: CPU
+ * @kernel_filename: file to load
+ * @mem_size: mem_size: maximum image size to load
+ *
+ * Load the guest image for an ARMv7M system. This must be called by
+ * any ARMv7M board, either directly or via armv7m_init(). (This is
+ * necessary to ensure that the CPU resets correctly on system reset,
+ * as well as for kernel loading.)
+ */
+void armv7m_load_kernel(ARMCPU *cpu, const char *kernel_filename, int mem_size);
 
 /*
  * struct used as a parameter of the arm_load_kernel machine init
diff --git a/include/hw/arm/armv7m.h b/include/hw/arm/armv7m.h
new file mode 100644
index 0000000000..a9b3f2ab35
--- /dev/null
+++ b/include/hw/arm/armv7m.h
@@ -0,0 +1,63 @@
+/*
+ * ARMv7M CPU object
+ *
+ * Copyright (c) 2017 Linaro Ltd
+ * Written by Peter Maydell <peter.maydell@linaro.org>
+ *
+ * This code is licensed under the GPL version 2 or later.
+ */
+
+#ifndef HW_ARM_ARMV7M_H
+#define HW_ARM_ARMV7M_H
+
+#include "hw/sysbus.h"
+#include "hw/arm/armv7m_nvic.h"
+
+#define TYPE_BITBAND "ARM,bitband-memory"
+#define BITBAND(obj) OBJECT_CHECK(BitBandState, (obj), TYPE_BITBAND)
+
+typedef struct {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    AddressSpace *source_as;
+    MemoryRegion iomem;
+    uint32_t base;
+    MemoryRegion *source_memory;
+} BitBandState;
+
+#define TYPE_ARMV7M "armv7m"
+#define ARMV7M(obj) OBJECT_CHECK(ARMv7MState, (obj), TYPE_ARMV7M)
+
+#define ARMV7M_NUM_BITBANDS 2
+
+/* ARMv7M container object.
+ * + Unnamed GPIO input lines: external IRQ lines for the NVIC
+ * + Named GPIO output SYSRESETREQ: signalled for guest AIRCR.SYSRESETREQ
+ * + Property "cpu-model": CPU model to instantiate
+ * + Property "num-irq": number of external IRQ lines
+ * + Property "memory": MemoryRegion defining the physical address space
+ *   that CPU accesses see. (The NVIC, bitbanding and other CPU-internal
+ *   devices will be automatically layered on top of this view.)
+ */
+typedef struct ARMv7MState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+    NVICState nvic;
+    BitBandState bitband[ARMV7M_NUM_BITBANDS];
+    ARMCPU *cpu;
+
+    /* MemoryRegion we pass to the CPU, with our devices layered on
+     * top of the ones the board provides in board_memory.
+     */
+    MemoryRegion container;
+
+    /* Properties */
+    char *cpu_model;
+    /* MemoryRegion the board provides to us (with its devices, RAM, etc) */
+    MemoryRegion *board_memory;
+} ARMv7MState;
+
+#endif
diff --git a/include/hw/arm/armv7m_nvic.h b/include/hw/arm/armv7m_nvic.h
new file mode 100644
index 0000000000..1d145fb75f
--- /dev/null
+++ b/include/hw/arm/armv7m_nvic.h
@@ -0,0 +1,62 @@
+/*
+ * ARMv7M NVIC object
+ *
+ * Copyright (c) 2017 Linaro Ltd
+ * Written by Peter Maydell <peter.maydell@linaro.org>
+ *
+ * This code is licensed under the GPL version 2 or later.
+ */
+
+#ifndef HW_ARM_ARMV7M_NVIC_H
+#define HW_ARM_ARMV7M_NVIC_H
+
+#include "target/arm/cpu.h"
+#include "hw/sysbus.h"
+#include "hw/timer/armv7m_systick.h"
+
+#define TYPE_NVIC "armv7m_nvic"
+
+#define NVIC(obj) \
+    OBJECT_CHECK(NVICState, (obj), TYPE_NVIC)
+
+/* Highest permitted number of exceptions (architectural limit) */
+#define NVIC_MAX_VECTORS 512
+
+typedef struct VecInfo {
+    /* Exception priorities can range from -3 to 255; only the unmodifiable
+     * priority values for RESET, NMI and HardFault can be negative.
+     */
+    int16_t prio;
+    uint8_t enabled;
+    uint8_t pending;
+    uint8_t active;
+    uint8_t level; /* exceptions <=15 never set level */
+} VecInfo;
+
+typedef struct NVICState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    ARMCPU *cpu;
+
+    VecInfo vectors[NVIC_MAX_VECTORS];
+    uint32_t prigroup;
+
+    /* vectpending and exception_prio are both cached state that can
+     * be recalculated from the vectors[] array and the prigroup field.
+     */
+    unsigned int vectpending; /* highest prio pending enabled exception */
+    int exception_prio; /* group prio of the highest prio active exception */
+
+    MemoryRegion sysregmem;
+    MemoryRegion container;
+
+    uint32_t num_irq;
+    qemu_irq excpout;
+    qemu_irq sysresetreq;
+
+    SysTickState systick;
+} NVICState;
+
+#endif
diff --git a/include/hw/arm/bcm2835_peripherals.h b/include/hw/arm/bcm2835_peripherals.h
index e12ae3721a..122b286de7 100644
--- a/include/hw/arm/bcm2835_peripherals.h
+++ b/include/hw/arm/bcm2835_peripherals.h
@@ -19,8 +19,11 @@
 #include "hw/dma/bcm2835_dma.h"
 #include "hw/intc/bcm2835_ic.h"
 #include "hw/misc/bcm2835_property.h"
+#include "hw/misc/bcm2835_rng.h"
 #include "hw/misc/bcm2835_mbox.h"
 #include "hw/sd/sdhci.h"
+#include "hw/sd/bcm2835_sdhost.h"
+#include "hw/gpio/bcm2835_gpio.h"
 
 #define TYPE_BCM2835_PERIPHERALS "bcm2835-peripherals"
 #define BCM2835_PERIPHERALS(obj) \
@@ -41,8 +44,11 @@ typedef struct BCM2835PeripheralState {
     BCM2835DMAState dma;
     BCM2835ICState ic;
     BCM2835PropertyState property;
+    BCM2835RngState rng;
     BCM2835MboxState mboxes;
     SDHCIState sdhci;
+    BCM2835SDHostState sdhost;
+    BCM2835GpioState gpio;
 } BCM2835PeripheralState;
 
 #endif /* BCM2835_PERIPHERALS_H */
diff --git a/include/hw/arm/stm32f205_soc.h b/include/hw/arm/stm32f205_soc.h
index 133214195b..e2dce1122e 100644
--- a/include/hw/arm/stm32f205_soc.h
+++ b/include/hw/arm/stm32f205_soc.h
@@ -31,6 +31,7 @@
 #include "hw/adc/stm32f2xx_adc.h"
 #include "hw/or-irq.h"
 #include "hw/ssi/stm32f2xx_spi.h"
+#include "hw/arm/armv7m.h"
 
 #define TYPE_STM32F205_SOC "stm32f205-soc"
 #define STM32F205_SOC(obj) \
@@ -51,9 +52,10 @@ typedef struct STM32F205State {
     SysBusDevice parent_obj;
     /*< public >*/
 
-    char *kernel_filename;
     char *cpu_model;
 
+    ARMv7MState armv7m;
+
     STM32F2XXSyscfgState syscfg;
     STM32F2XXUsartState usart[STM_NUM_USARTS];
     STM32F2XXTimerState timer[STM_NUM_TIMERS];
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 58ce74e0e5..33b0ff3892 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -93,6 +93,7 @@ typedef struct {
     FWCfgState *fw_cfg;
     bool secure;
     bool highmem;
+    bool its;
     bool virt;
     int32_t gic_version;
     struct arm_boot_info bootinfo;
diff --git a/include/hw/block/block.h b/include/hw/block/block.h
index df9d207d81..f3f6e8ef02 100644
--- a/include/hw/block/block.h
+++ b/include/hw/block/block.h
@@ -26,6 +26,7 @@ typedef struct BlockConf {
     /* geometry, not all devices use this */
     uint32_t cyls, heads, secs;
     OnOffAuto wce;
+    bool share_rw;
     BlockdevOnError rerror;
     BlockdevOnError werror;
 } BlockConf;
@@ -53,7 +54,9 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf)
     DEFINE_PROP_UINT32("opt_io_size", _state, _conf.opt_io_size, 0),    \
     DEFINE_PROP_UINT32("discard_granularity", _state, \
                        _conf.discard_granularity, -1), \
-    DEFINE_PROP_ON_OFF_AUTO("write-cache", _state, _conf.wce, ON_OFF_AUTO_AUTO)
+    DEFINE_PROP_ON_OFF_AUTO("write-cache", _state, _conf.wce, \
+                            ON_OFF_AUTO_AUTO), \
+    DEFINE_PROP_BOOL("share-rw", _state, _conf.share_rw, false)
 
 #define DEFINE_BLOCK_CHS_PROPERTIES(_state, _conf)      \
     DEFINE_PROP_UINT32("cyls", _state, _conf.cyls, 0),  \
@@ -73,7 +76,8 @@ void blkconf_geometry(BlockConf *conf, int *trans,
                       unsigned cyls_max, unsigned heads_max, unsigned secs_max,
                       Error **errp);
 void blkconf_blocksizes(BlockConf *conf);
-void blkconf_apply_backend_options(BlockConf *conf);
+void blkconf_apply_backend_options(BlockConf *conf, bool readonly,
+                                   bool resizable, Error **errp);
 
 /* Hard disk geometry */
 
diff --git a/include/hw/boards.h b/include/hw/boards.h
index ac891a828b..269d0ba399 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -41,15 +41,20 @@ int machine_phandle_start(MachineState *machine);
 bool machine_dump_guest_core(MachineState *machine);
 bool machine_mem_merge(MachineState *machine);
 void machine_register_compat_props(MachineState *machine);
+HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine);
 
 /**
  * CPUArchId:
  * @arch_id - architecture-dependent CPU ID of present or possible CPU
  * @cpu - pointer to corresponding CPU object if it's present on NULL otherwise
+ * @props - CPU object properties, initialized by board
+ * #vcpus_count - number of threads provided by @cpu object
  */
 typedef struct {
     uint64_t arch_id;
-    struct CPUState *cpu;
+    int64_t vcpus_count;
+    CpuInstanceProperties props;
+    Object *cpu;
 } CPUArchId;
 
 /**
@@ -82,10 +87,8 @@ typedef struct {
  *    Returns an array of @CPUArchId architecture-dependent CPU IDs
  *    which includes CPU IDs for present and possible to hotplug CPUs.
  *    Caller is responsible for freeing returned list.
- * @query_hotpluggable_cpus:
- *    Returns a @HotpluggableCPUList, which describes CPUs objects which
- *    could be added with -device/device_add.
- *    Caller is responsible for freeing returned list.
+ * @has_hotpluggable_cpus:
+ *    If true, board supports CPUs creation with -device/device_add.
  * @minimum_page_bits:
  *    If non-zero, the board promises never to create a CPU with a page size
  *    smaller than this, so QEMU can use a more efficient larger page
@@ -131,12 +134,12 @@ struct MachineClass {
     bool option_rom_has_mr;
     bool rom_file_has_mr;
     int minimum_page_bits;
+    bool has_hotpluggable_cpus;
 
     HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
                                            DeviceState *dev);
     unsigned (*cpu_index_to_socket_id)(unsigned cpu_index);
     const CPUArchIdList *(*possible_cpu_arch_ids)(MachineState *machine);
-    HotpluggableCPUList *(*query_hotpluggable_cpus)(MachineState *machine);
 };
 
 /**
@@ -178,6 +181,7 @@ struct MachineState {
     char *initrd_filename;
     const char *cpu_model;
     AccelState *accelerator;
+    CPUArchIdList *possible_cpus;
 };
 
 #define DEFINE_MACHINE(namestr, machine_initfn) \
diff --git a/include/hw/elf_ops.h b/include/hw/elf_ops.h
index 25659b93be..a172a6068a 100644
--- a/include/hw/elf_ops.h
+++ b/include/hw/elf_ops.h
@@ -264,7 +264,7 @@ static int glue(load_elf, SZ)(const char *name, int fd,
                               int must_swab, uint64_t *pentry,
                               uint64_t *lowaddr, uint64_t *highaddr,
                               int elf_machine, int clear_lsb, int data_swab,
-                              AddressSpace *as)
+                              AddressSpace *as, bool load_rom)
 {
     struct elfhdr ehdr;
     struct elf_phdr *phdr = NULL, *ph;
@@ -403,10 +403,15 @@ static int glue(load_elf, SZ)(const char *name, int fd,
                 *pentry = ehdr.e_entry - ph->p_vaddr + ph->p_paddr;
             }
 
-            snprintf(label, sizeof(label), "phdr #%d: %s", i, name);
+            if (load_rom) {
+                snprintf(label, sizeof(label), "phdr #%d: %s", i, name);
 
-            /* rom_add_elf_program() seize the ownership of 'data' */
-            rom_add_elf_program(label, data, file_size, mem_size, addr, as);
+                /* rom_add_elf_program() seize the ownership of 'data' */
+                rom_add_elf_program(label, data, file_size, mem_size, addr, as);
+            } else {
+                cpu_physical_memory_write(addr, data, file_size);
+                g_free(data);
+            }
 
             total_size += mem_size;
             if (addr < low)
diff --git a/include/hw/gpio/bcm2835_gpio.h b/include/hw/gpio/bcm2835_gpio.h
new file mode 100644
index 0000000000..9f8e0c720c
--- /dev/null
+++ b/include/hw/gpio/bcm2835_gpio.h
@@ -0,0 +1,39 @@
+/*
+ * Raspberry Pi (BCM2835) GPIO Controller
+ *
+ * Copyright (c) 2017 Antfield SAS
+ *
+ * Authors:
+ *  Clement Deschamps <clement.deschamps@antfield.fr>
+ *  Luc Michel <luc.michel@antfield.fr>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef BCM2835_GPIO_H
+#define BCM2835_GPIO_H
+
+#include "hw/sd/sd.h"
+
+typedef struct BCM2835GpioState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+
+    /* SDBus selector */
+    SDBus sdbus;
+    SDBus *sdbus_sdhci;
+    SDBus *sdbus_sdhost;
+
+    uint8_t fsel[54];
+    uint32_t lev0, lev1;
+    uint8_t sd_fsel;
+    qemu_irq out[54];
+} BCM2835GpioState;
+
+#define TYPE_BCM2835_GPIO "bcm2835_gpio"
+#define BCM2835_GPIO(obj) \
+    OBJECT_CHECK(BCM2835GpioState, (obj), TYPE_BCM2835_GPIO)
+
+#endif
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 079e8d9393..d1f45540a1 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -73,7 +73,6 @@ struct PCMachineState {
     /* CPU and apic information: */
     bool apic_xrupt_override;
     unsigned apic_id_limit;
-    CPUArchIdList *possible_cpus;
     uint16_t boot_cpus;
 
     /* NUMA information: */
diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h
index 4156051d98..bccdfe17c6 100644
--- a/include/hw/intc/arm_gicv3_common.h
+++ b/include/hw/intc/arm_gicv3_common.h
@@ -172,6 +172,7 @@ struct GICv3CPUState {
     uint8_t gicr_ipriorityr[GIC_INTERNAL];
 
     /* CPU interface */
+    uint64_t icc_sre_el1;
     uint64_t icc_ctlr_el1[2];
     uint64_t icc_pmr_el1;
     uint64_t icc_bpr[3];
diff --git a/include/hw/loader-fit.h b/include/hw/loader-fit.h
new file mode 100644
index 0000000000..9e2a068a20
--- /dev/null
+++ b/include/hw/loader-fit.h
@@ -0,0 +1,41 @@
+/*
+ * Flattened Image Tree loader.
+ *
+ * Copyright (c) 2016 Imagination Technologies
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_LOADER_FIT_H
+#define HW_LOADER_FIT_H
+
+#include <exec/hwaddr.h>
+
+struct fit_loader_match {
+    const char *compatible;
+    const void *data;
+};
+
+struct fit_loader {
+    const struct fit_loader_match *matches;
+    hwaddr (*addr_to_phys)(void *opaque, uint64_t addr);
+    const void *(*fdt_filter)(void *opaque, const void *fdt,
+                              const void *match_data, hwaddr *load_addr);
+    const void *(*kernel_filter)(void *opaque, const void *kernel,
+                                 hwaddr *load_addr, hwaddr *entry_addr);
+};
+
+int load_fit(const struct fit_loader *ldr, const char *filename, void *opaque);
+
+#endif /* HW_LOADER_FIT_H */
diff --git a/include/hw/loader.h b/include/hw/loader.h
index 0dbd8d6bf3..490c9ff8e6 100644
--- a/include/hw/loader.h
+++ b/include/hw/loader.h
@@ -65,7 +65,7 @@ int load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz);
 #define ELF_LOAD_WRONG_ENDIAN -4
 const char *load_elf_strerror(int error);
 
-/** load_elf_as:
+/** load_elf_ram:
  * @filename: Path of ELF file
  * @translate_fn: optional function to translate load addresses
  * @translate_opaque: opaque data passed to @translate_fn
@@ -81,6 +81,7 @@ const char *load_elf_strerror(int error);
  *             words and 3 for within doublewords.
  * @as: The AddressSpace to load the ELF to. The value of address_space_memory
  *      is used if nothing is supplied here.
+ * @load_rom : Load ELF binary as ROM
  *
  * Load an ELF file's contents to the emulated system's address space.
  * Clients may optionally specify a callback to perform address
@@ -93,6 +94,16 @@ const char *load_elf_strerror(int error);
  * If @elf_machine is EM_NONE then the machine type will be read from the
  * ELF header and no checks will be carried out against the machine type.
  */
+int load_elf_ram(const char *filename,
+                 uint64_t (*translate_fn)(void *, uint64_t),
+                 void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
+                 uint64_t *highaddr, int big_endian, int elf_machine,
+                 int clear_lsb, int data_swab, AddressSpace *as,
+                 bool load_rom);
+
+/** load_elf_as:
+ * Same as load_elf_ram(), but always loads the elf as ROM
+ */
 int load_elf_as(const char *filename,
                 uint64_t (*translate_fn)(void *, uint64_t),
                 void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
@@ -164,6 +175,8 @@ int load_uimage(const char *filename, hwaddr *ep,
  */
 int load_ramdisk(const char *filename, hwaddr addr, uint64_t max_sz);
 
+ssize_t gunzip(void *dst, size_t dstlen, uint8_t *src, size_t srclen);
+
 ssize_t read_targphys(const char *name,
                       int fd, hwaddr dst_addr, size_t nbytes);
 void pstrcpy_targphys(const char *name,
@@ -214,4 +227,8 @@ void hmp_info_roms(Monitor *mon, const QDict *qdict);
 int rom_add_vga(const char *file);
 int rom_add_option(const char *file, int32_t bootindex);
 
+/* This is the usual maximum in uboot, so if a uImage overflows this, it would
+ * overflow on real hardware too. */
+#define UBOOT_MAX_GUNZIP_BYTES (64 << 20)
+
 #endif
diff --git a/include/hw/misc/bcm2835_rng.h b/include/hw/misc/bcm2835_rng.h
new file mode 100644
index 0000000000..41a531bce7
--- /dev/null
+++ b/include/hw/misc/bcm2835_rng.h
@@ -0,0 +1,27 @@
+/*
+ * BCM2835 Random Number Generator emulation
+ *
+ * Copyright (C) 2017 Marcin Chojnacki <marcinch7@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef BCM2835_RNG_H
+#define BCM2835_RNG_H
+
+#include "hw/sysbus.h"
+
+#define TYPE_BCM2835_RNG "bcm2835-rng"
+#define BCM2835_RNG(obj) \
+        OBJECT_CHECK(BCM2835RngState, (obj), TYPE_BCM2835_RNG)
+
+typedef struct {
+    SysBusDevice busdev;
+    MemoryRegion iomem;
+
+    uint32_t rng_ctrl;
+    uint32_t rng_status;
+} BCM2835RngState;
+
+#endif
diff --git a/include/hw/misc/mips_cmgcr.h b/include/hw/misc/mips_cmgcr.h
index a209d91ded..c9dfcb4b84 100644
--- a/include/hw/misc/mips_cmgcr.h
+++ b/include/hw/misc/mips_cmgcr.h
@@ -41,6 +41,9 @@
 #define GCR_L2_CONFIG_BYPASS_SHF    20
 #define GCR_L2_CONFIG_BYPASS_MSK    ((0x1ULL) << GCR_L2_CONFIG_BYPASS_SHF)
 
+/* GCR_BASE register fields */
+#define GCR_BASE_GCRBASE_MSK     0xffffffff8000ULL
+
 /* GCR_GIC_BASE register fields */
 #define GCR_GIC_BASE_GICEN_MSK   1
 #define GCR_GIC_BASE_GICBASE_MSK 0xFFFFFFFE0000ULL
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index 092294ed5a..dfa76143f3 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -106,7 +106,7 @@ static inline qemu_irq spapr_phb_lsi_qirq(struct sPAPRPHBState *phb, int pin)
 {
     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
 
-    return xics_get_qirq(spapr->xics, phb->lsi_table[pin].irq);
+    return xics_get_qirq(XICS_FABRIC(spapr), phb->lsi_table[pin].irq);
 }
 
 PCIHostState *spapr_create_phb(sPAPRMachineState *spapr, int index);
diff --git a/include/hw/pci-host/xilinx-pcie.h b/include/hw/pci-host/xilinx-pcie.h
new file mode 100644
index 0000000000..bec66b27c5
--- /dev/null
+++ b/include/hw/pci-host/xilinx-pcie.h
@@ -0,0 +1,68 @@
+/*
+ * Xilinx PCIe host controller emulation.
+ *
+ * Copyright (c) 2016 Imagination Technologies
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_XILINX_PCIE_H
+#define HW_XILINX_PCIE_H
+
+#include "hw/hw.h"
+#include "hw/sysbus.h"
+#include "hw/pci/pci.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/pci/pcie_host.h"
+
+#define TYPE_XILINX_PCIE_HOST "xilinx-pcie-host"
+#define XILINX_PCIE_HOST(obj) \
+     OBJECT_CHECK(XilinxPCIEHost, (obj), TYPE_XILINX_PCIE_HOST)
+
+#define TYPE_XILINX_PCIE_ROOT "xilinx-pcie-root"
+#define XILINX_PCIE_ROOT(obj) \
+     OBJECT_CHECK(XilinxPCIERoot, (obj), TYPE_XILINX_PCIE_ROOT)
+
+typedef struct XilinxPCIERoot {
+    PCIBridge parent_obj;
+} XilinxPCIERoot;
+
+typedef struct XilinxPCIEInt {
+    uint32_t fifo_reg1;
+    uint32_t fifo_reg2;
+} XilinxPCIEInt;
+
+typedef struct XilinxPCIEHost {
+    PCIExpressHost parent_obj;
+
+    char name[16];
+
+    uint32_t bus_nr;
+    uint64_t cfg_base, cfg_size;
+    uint64_t mmio_base, mmio_size;
+    bool link_up;
+    qemu_irq irq;
+
+    MemoryRegion mmio, io;
+
+    XilinxPCIERoot root;
+
+    uint32_t intr;
+    uint32_t intr_mask;
+    XilinxPCIEInt intr_fifo[16];
+    unsigned int intr_fifo_r, intr_fifo_w;
+    uint32_t rpscr;
+} XilinxPCIEHost;
+
+#endif /* HW_XILINX_PCIE_H */
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 6983f13745..9349acbfb2 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -429,6 +429,10 @@ int pci_bus_numa_node(PCIBus *bus);
 void pci_for_each_device(PCIBus *bus, int bus_num,
                          void (*fn)(PCIBus *bus, PCIDevice *d, void *opaque),
                          void *opaque);
+void pci_for_each_device_reverse(PCIBus *bus, int bus_num,
+                                 void (*fn)(PCIBus *bus, PCIDevice *d,
+                                            void *opaque),
+                                 void *opaque);
 void pci_for_each_bus_depth_first(PCIBus *bus,
                                   void *(*begin)(PCIBus *bus, void *parent_state),
                                   void (*end)(PCIBus *bus, void *state),
diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h
index d77ca60a0e..d22ad8dd3b 100644
--- a/include/hw/pci/pci_ids.h
+++ b/include/hw/pci/pci_ids.h
@@ -13,41 +13,84 @@
 
 /* Device classes and subclasses */
 
-#define PCI_BASE_CLASS_STORAGE           0x01
-#define PCI_BASE_CLASS_NETWORK           0x02
+#define PCI_CLASS_NOT_DEFINED            0x0000
+#define PCI_CLASS_NOT_DEFINED_VGA        0x0001
 
+#define PCI_BASE_CLASS_STORAGE           0x01
 #define PCI_CLASS_STORAGE_SCSI           0x0100
 #define PCI_CLASS_STORAGE_IDE            0x0101
+#define PCI_CLASS_STORAGE_FLOPPY         0x0102
+#define PCI_CLASS_STORAGE_IPI            0x0103
 #define PCI_CLASS_STORAGE_RAID           0x0104
+#define PCI_CLASS_STORAGE_ATA            0x0105
 #define PCI_CLASS_STORAGE_SATA           0x0106
+#define PCI_CLASS_STORAGE_SAS            0x0107
 #define PCI_CLASS_STORAGE_EXPRESS        0x0108
 #define PCI_CLASS_STORAGE_OTHER          0x0180
 
+#define PCI_BASE_CLASS_NETWORK           0x02
 #define PCI_CLASS_NETWORK_ETHERNET       0x0200
+#define PCI_CLASS_NETWORK_TOKEN_RING     0x0201
+#define PCI_CLASS_NETWORK_FDDI           0x0202
+#define PCI_CLASS_NETWORK_ATM            0x0203
+#define PCI_CLASS_NETWORK_ISDN           0x0204
+#define PCI_CLASS_NETWORK_WORLDFIP       0x0205
+#define PCI_CLASS_NETWORK_PICMG214       0x0206
 #define PCI_CLASS_NETWORK_OTHER          0x0280
 
+#define PCI_BASE_CLASS_DISPLAY           0x03
 #define PCI_CLASS_DISPLAY_VGA            0x0300
+#define PCI_CLASS_DISPLAY_XGA            0x0301
+#define PCI_CLASS_DISPLAY_3D             0x0302
 #define PCI_CLASS_DISPLAY_OTHER          0x0380
 
+#define PCI_BASE_CLASS_MULTIMEDIA        0x04
+#define PCI_CLASS_MULTIMEDIA_VIDEO       0x0400
 #define PCI_CLASS_MULTIMEDIA_AUDIO       0x0401
+#define PCI_CLASS_MULTIMEDIA_PHONE       0x0402
+#define PCI_CLASS_MULTIMEDIA_OTHER       0x0480
 
+#define PCI_BASE_CLASS_MEMORY            0x05
 #define PCI_CLASS_MEMORY_RAM             0x0500
+#define PCI_CLASS_MEMORY_FLASH           0x0501
+#define PCI_CLASS_MEMORY_OTHER           0x0580
 
-#define PCI_CLASS_SYSTEM_SDHCI           0x0805
-#define PCI_CLASS_SYSTEM_OTHER           0x0880
-
-#define PCI_CLASS_SERIAL_USB             0x0c03
-#define PCI_CLASS_SERIAL_SMBUS           0x0c05
-
+#define PCI_BASE_CLASS_BRIDGE            0x06
 #define PCI_CLASS_BRIDGE_HOST            0x0600
 #define PCI_CLASS_BRIDGE_ISA             0x0601
+#define PCI_CLASS_BRIDGE_EISA            0x0602
+#define PCI_CLASS_BRIDGE_MC              0x0603
 #define PCI_CLASS_BRIDGE_PCI             0x0604
 #define PCI_CLASS_BRIDGE_PCI_INF_SUB     0x01
+#define PCI_CLASS_BRIDGE_PCMCIA          0x0605
+#define PCI_CLASS_BRIDGE_NUBUS           0x0606
+#define PCI_CLASS_BRIDGE_CARDBUS         0x0607
+#define PCI_CLASS_BRIDGE_RACEWAY         0x0608
+#define PCI_CLASS_BRIDGE_PCI_SEMITP      0x0609
+#define PCI_CLASS_BRIDGE_IB_PCI          0x060a
 #define PCI_CLASS_BRIDGE_OTHER           0x0680
 
+#define PCI_BASE_CLASS_COMMUNICATION     0x07
 #define PCI_CLASS_COMMUNICATION_SERIAL   0x0700
+#define PCI_CLASS_COMMUNICATION_PARALLEL 0x0701
+#define PCI_CLASS_COMMUNICATION_MULTISERIAL 0x0702
+#define PCI_CLASS_COMMUNICATION_MODEM    0x0703
+#define PCI_CLASS_COMMUNICATION_GPIB     0x0704
+#define PCI_CLASS_COMMUNICATION_SC       0x0705
 #define PCI_CLASS_COMMUNICATION_OTHER    0x0780
 
+#define PCI_BASE_CLASS_SYSTEM            0x08
+#define PCI_CLASS_SYSTEM_PIC             0x0800
+#define PCI_CLASS_SYSTEM_PIC_IOAPIC      0x080010
+#define PCI_CLASS_SYSTEM_PIC_IOXAPIC     0x080020
+#define PCI_CLASS_SYSTEM_DMA             0x0801
+#define PCI_CLASS_SYSTEM_TIMER           0x0802
+#define PCI_CLASS_SYSTEM_RTC             0x0803
+#define PCI_CLASS_SYSTEM_PCI_HOTPLUG     0x0804
+#define PCI_CLASS_SYSTEM_SDHCI           0x0805
+#define PCI_CLASS_SYSTEM_OTHER           0x0880
+
+#define PCI_BASE_CLASS_INPUT             0x09
 #define PCI_CLASS_INPUT_KEYBOARD         0x0900
 #define PCI_CLASS_INPUT_PEN              0x0901
 #define PCI_CLASS_INPUT_MOUSE            0x0902
@@ -55,8 +98,59 @@
 #define PCI_CLASS_INPUT_GAMEPORT         0x0904
 #define PCI_CLASS_INPUT_OTHER            0x0980
 
-#define PCI_CLASS_PROCESSOR_CO           0x0b40
+#define PCI_BASE_CLASS_DOCKING           0x0a
+#define PCI_CLASS_DOCKING_GENERIC        0x0a00
+#define PCI_CLASS_DOCKING_OTHER          0x0a80
+
+#define PCI_BASE_CLASS_PROCESSOR         0x0b
+#define PCI_CLASS_PROCESSOR_PENTIUM      0x0b02
 #define PCI_CLASS_PROCESSOR_POWERPC      0x0b20
+#define PCI_CLASS_PROCESSOR_MIPS         0x0b30
+#define PCI_CLASS_PROCESSOR_CO           0x0b40
+
+#define PCI_BASE_CLASS_SERIAL            0x0c
+#define PCI_CLASS_SERIAL_FIREWIRE        0x0c00
+#define PCI_CLASS_SERIAL_ACCESS          0x0c01
+#define PCI_CLASS_SERIAL_SSA             0x0c02
+#define PCI_CLASS_SERIAL_USB             0x0c03
+#define PCI_CLASS_SERIAL_USB_UHCI        0x0c0300
+#define PCI_CLASS_SERIAL_USB_OHCI        0x0c0310
+#define PCI_CLASS_SERIAL_USB_EHCI        0x0c0320
+#define PCI_CLASS_SERIAL_USB_XHCI        0x0c0330
+#define PCI_CLASS_SERIAL_USB_UNKNOWN     0x0c0380
+#define PCI_CLASS_SERIAL_USB_DEVICE      0x0c03fe
+#define PCI_CLASS_SERIAL_FIBER           0x0c04
+#define PCI_CLASS_SERIAL_SMBUS           0x0c05
+#define PCI_CLASS_SERIAL_IB              0x0c06
+#define PCI_CLASS_SERIAL_IPMI            0x0c07
+#define PCI_CLASS_SERIAL_SERCOS          0x0c08
+#define PCI_CLASS_SERIAL_CANBUS          0x0c09
+
+#define PCI_BASE_CLASS_WIRELESS          0x0d
+#define PCI_CLASS_WIRELESS_IRDA          0x0d00
+#define PCI_CLASS_WIRELESS_CIR           0x0d01
+#define PCI_CLASS_WIRELESS_RF_CONTROLLER 0x0d10
+#define PCI_CLASS_WIRELESS_BLUETOOTH     0x0d11
+#define PCI_CLASS_WIRELESS_BROADBAND     0x0d12
+#define PCI_CLASS_WIRELESS_OTHER         0x0d80
+
+#define PCI_BASE_CLASS_SATELLITE         0x0f
+#define PCI_CLASS_SATELLITE_TV           0x0f00
+#define PCI_CLASS_SATELLITE_AUDIO        0x0f01
+#define PCI_CLASS_SATELLITE_VOICE        0x0f03
+#define PCI_CLASS_SATELLITE_DATA         0x0f04
+
+#define PCI_BASE_CLASS_CRYPT             0x10
+#define PCI_CLASS_CRYPT_NETWORK          0x1000
+#define PCI_CLASS_CRYPT_ENTERTAINMENT    0x1001
+#define PCI_CLASS_CRYPT_OTHER            0x1080
+
+#define PCI_BASE_CLASS_SIGNAL_PROCESSING 0x11
+#define PCI_CLASS_SP_DPIO                0x1100
+#define PCI_CLASS_SP_PERF                0x1101
+#define PCI_CLASS_SP_SYNCH               0x1110
+#define PCI_CLASS_SP_MANAGEMENT          0x1120
+#define PCI_CLASS_SP_OTHER               0x1180
 
 #define PCI_CLASS_OTHERS                 0xff
 
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index a2d8964f7e..cfd271129d 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -58,7 +58,7 @@ struct sPAPRMachineState {
     struct VIOsPAPRBus *vio_bus;
     QLIST_HEAD(, sPAPRPHBState) phbs;
     struct sPAPRNVRAM *nvram;
-    XICSState *xics;
+    ICSState *ics;
     DeviceState *rtc;
 
     void *htab;
@@ -94,7 +94,9 @@ struct sPAPRMachineState {
     /*< public >*/
     char *kvm_type;
     MemoryHotplugState hotplug_memory;
-    Object **cores;
+
+    uint32_t nr_servers;
+    ICPState *icps;
 };
 
 #define H_SUCCESS         0
diff --git a/include/hw/ppc/spapr_cpu_core.h b/include/hw/ppc/spapr_cpu_core.h
index 50292f48b1..3c35665221 100644
--- a/include/hw/ppc/spapr_cpu_core.h
+++ b/include/hw/ppc/spapr_cpu_core.h
@@ -34,12 +34,6 @@ typedef struct sPAPRCPUCoreClass {
     ObjectClass *cpu_class;
 } sPAPRCPUCoreClass;
 
-void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
-                         Error **errp);
 char *spapr_get_cpu_core_type(const char *model);
-void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
-                     Error **errp);
-void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
-                       Error **errp);
 void spapr_cpu_core_class_init(ObjectClass *oc, void *data);
 #endif
diff --git a/include/hw/ppc/spapr_vio.h b/include/hw/ppc/spapr_vio.h
index fc6f673ea0..2e9685a5d9 100644
--- a/include/hw/ppc/spapr_vio.h
+++ b/include/hw/ppc/spapr_vio.h
@@ -87,7 +87,7 @@ static inline qemu_irq spapr_vio_qirq(VIOsPAPRDevice *dev)
 {
     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
 
-    return xics_get_qirq(spapr->xics, dev->irq);
+    return xics_get_qirq(XICS_FABRIC(spapr), dev->irq);
 }
 
 static inline bool spapr_vio_dma_valid(VIOsPAPRDevice *dev, uint64_t taddr,
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index 3f0c31610a..1945913bf1 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -30,29 +30,6 @@
 
 #include "hw/sysbus.h"
 
-#define TYPE_XICS_COMMON "xics-common"
-#define XICS_COMMON(obj) OBJECT_CHECK(XICSState, (obj), TYPE_XICS_COMMON)
-
-/*
- * Retain xics as the type name to be compatible for migration. Rest all the
- * functions, class and variables are renamed as xics_spapr.
- */
-#define TYPE_XICS_SPAPR "xics"
-#define XICS_SPAPR(obj) OBJECT_CHECK(XICSState, (obj), TYPE_XICS_SPAPR)
-
-#define TYPE_XICS_SPAPR_KVM "xics-spapr-kvm"
-#define XICS_SPAPR_KVM(obj) \
-     OBJECT_CHECK(KVMXICSState, (obj), TYPE_XICS_SPAPR_KVM)
-
-#define XICS_COMMON_CLASS(klass) \
-     OBJECT_CLASS_CHECK(XICSStateClass, (klass), TYPE_XICS_COMMON)
-#define XICS_SPAPR_CLASS(klass) \
-     OBJECT_CLASS_CHECK(XICSStateClass, (klass), TYPE_XICS_SPAPR)
-#define XICS_COMMON_GET_CLASS(obj) \
-     OBJECT_GET_CLASS(XICSStateClass, (obj), TYPE_XICS_COMMON)
-#define XICS_SPAPR_GET_CLASS(obj) \
-     OBJECT_GET_CLASS(XICSStateClass, (obj), TYPE_XICS_SPAPR)
-
 #define XICS_IPI        0x2
 #define XICS_BUID       0x1
 #define XICS_IRQ_BASE   (XICS_BUID << 12)
@@ -62,31 +39,12 @@
  * (the kernel implementation supports more but we don't exploit
  *  that yet)
  */
-typedef struct XICSStateClass XICSStateClass;
-typedef struct XICSState XICSState;
 typedef struct ICPStateClass ICPStateClass;
 typedef struct ICPState ICPState;
 typedef struct ICSStateClass ICSStateClass;
 typedef struct ICSState ICSState;
 typedef struct ICSIRQState ICSIRQState;
-
-struct XICSStateClass {
-    DeviceClass parent_class;
-
-    void (*cpu_setup)(XICSState *icp, PowerPCCPU *cpu);
-    void (*set_nr_irqs)(XICSState *icp, uint32_t nr_irqs, Error **errp);
-    void (*set_nr_servers)(XICSState *icp, uint32_t nr_servers, Error **errp);
-};
-
-struct XICSState {
-    /*< private >*/
-    SysBusDevice parent_obj;
-    /*< public >*/
-    uint32_t nr_servers;
-    uint32_t nr_irqs;
-    ICPState *ss;
-    QLIST_HEAD(, ICSState) ics;
-};
+typedef struct XICSFabric XICSFabric;
 
 #define TYPE_ICP "icp"
 #define ICP(obj) OBJECT_CHECK(ICPState, (obj), TYPE_ICP)
@@ -104,6 +62,7 @@ struct ICPStateClass {
 
     void (*pre_save)(ICPState *s);
     int (*post_load)(ICPState *s, int version_id);
+    void (*cpu_setup)(ICPState *icp, PowerPCCPU *cpu);
 };
 
 struct ICPState {
@@ -118,7 +77,7 @@ struct ICPState {
     qemu_irq output;
     bool cap_irq_xics_enabled;
 
-    XICSState *xics;
+    XICSFabric *xics;
 };
 
 #define TYPE_ICS_BASE "ics-base"
@@ -139,6 +98,7 @@ struct ICPState {
 struct ICSStateClass {
     DeviceClass parent_class;
 
+    void (*realize)(DeviceState *dev, Error **errp);
     void (*pre_save)(ICSState *s);
     int (*post_load)(ICSState *s, int version_id);
     void (*reject)(ICSState *s, uint32_t irq);
@@ -154,8 +114,7 @@ struct ICSState {
     uint32_t offset;
     qemu_irq *qirqs;
     ICSIRQState *irqs;
-    XICSState *xics;
-    QLIST_ENTRY(ICSState) list;
+    XICSFabric *xics;
 };
 
 static inline bool ics_valid_irq(ICSState *ics, uint32_t nr)
@@ -180,19 +139,37 @@ struct ICSIRQState {
     uint8_t flags;
 };
 
+typedef struct XICSFabric {
+    Object parent;
+} XICSFabric;
+
+#define TYPE_XICS_FABRIC "xics-fabric"
+#define XICS_FABRIC(obj)                                     \
+    OBJECT_CHECK(XICSFabric, (obj), TYPE_XICS_FABRIC)
+#define XICS_FABRIC_CLASS(klass)                                     \
+    OBJECT_CLASS_CHECK(XICSFabricClass, (klass), TYPE_XICS_FABRIC)
+#define XICS_FABRIC_GET_CLASS(obj)                                   \
+    OBJECT_GET_CLASS(XICSFabricClass, (obj), TYPE_XICS_FABRIC)
+
+typedef struct XICSFabricClass {
+    InterfaceClass parent;
+    ICSState *(*ics_get)(XICSFabric *xi, int irq);
+    void (*ics_resend)(XICSFabric *xi);
+    ICPState *(*icp_get)(XICSFabric *xi, int server);
+} XICSFabricClass;
+
 #define XICS_IRQS_SPAPR               1024
 
-qemu_irq xics_get_qirq(XICSState *icp, int irq);
-int xics_spapr_alloc(XICSState *icp, int irq_hint, bool lsi, Error **errp);
-int xics_spapr_alloc_block(XICSState *icp, int num, bool lsi, bool align,
+int spapr_ics_alloc(ICSState *ics, int irq_hint, bool lsi, Error **errp);
+int spapr_ics_alloc_block(ICSState *ics, int num, bool lsi, bool align,
                            Error **errp);
-void xics_spapr_free(XICSState *icp, int irq, int num);
-void spapr_dt_xics(XICSState *xics, void *fdt, uint32_t phandle);
+void spapr_ics_free(ICSState *ics, int irq, int num);
+void spapr_dt_xics(int nr_servers, void *fdt, uint32_t phandle);
 
-void xics_cpu_setup(XICSState *icp, PowerPCCPU *cpu);
-void xics_cpu_destroy(XICSState *icp, PowerPCCPU *cpu);
-void xics_set_nr_servers(XICSState *xics, uint32_t nr_servers,
-                         const char *typename, Error **errp);
+qemu_irq xics_get_qirq(XICSFabric *xi, int irq);
+ICPState *xics_icp_get(XICSFabric *xi, int server);
+void xics_cpu_setup(XICSFabric *xi, PowerPCCPU *cpu);
+void xics_cpu_destroy(XICSFabric *xi, PowerPCCPU *cpu);
 
 /* Internal XICS interfaces */
 int xics_get_cpu_index_by_dt_id(int cpu_dt_id);
@@ -207,7 +184,15 @@ void ics_simple_write_xive(ICSState *ics, int nr, int server,
                            uint8_t priority, uint8_t saved_priority);
 
 void ics_set_irq_type(ICSState *ics, int srcno, bool lsi);
+void icp_pic_print_info(ICPState *icp, Monitor *mon);
+void ics_pic_print_info(ICSState *ics, Monitor *mon);
+
+void ics_resend(ICSState *ics);
+void icp_resend(ICPState *ss);
+
+typedef struct sPAPRMachineState sPAPRMachineState;
 
-ICSState *xics_find_source(XICSState *icp, int irq);
+int xics_kvm_init(sPAPRMachineState *spapr, Error **errp);
+int xics_spapr_init(sPAPRMachineState *spapr, Error **errp);
 
 #endif /* XICS_H */
diff --git a/include/hw/ptimer.h b/include/hw/ptimer.h
index 48cccbdb51..eafc3f0a86 100644
--- a/include/hw/ptimer.h
+++ b/include/hw/ptimer.h
@@ -60,6 +60,7 @@ typedef struct ptimer_state ptimer_state;
 typedef void (*ptimer_cb)(void *opaque);
 
 ptimer_state *ptimer_init(QEMUBH *bh, uint8_t policy_mask);
+void ptimer_free(ptimer_state *s);
 void ptimer_set_period(ptimer_state *s, int64_t period);
 void ptimer_set_freq(ptimer_state *s, uint32_t freq);
 uint64_t ptimer_get_limit(ptimer_state *s);
diff --git a/include/hw/s390x/s390_flic.h b/include/hw/s390x/s390_flic.h
index 9094edadf5..f9e6890c90 100644
--- a/include/hw/s390x/s390_flic.h
+++ b/include/hw/s390x/s390_flic.h
@@ -17,8 +17,13 @@
 #include "hw/s390x/adapter.h"
 #include "hw/virtio/virtio.h"
 
-#define ADAPTER_ROUTES_MAX_GSI 64
-#define VIRTIO_CCW_QUEUE_MAX ADAPTER_ROUTES_MAX_GSI
+/*
+ * Reserve enough gsis to accommodate all virtio devices.
+ * If any other user of adapter routes needs more of these,
+ * we need to bump the value; but virtio looks like the
+ * maximum right now.
+ */
+#define ADAPTER_ROUTES_MAX_GSI VIRTIO_QUEUE_MAX
 
 typedef struct AdapterRoutes {
     AdapterInfo adapter;
@@ -32,6 +37,8 @@ typedef struct AdapterRoutes {
 
 typedef struct S390FLICState {
     SysBusDevice parent_obj;
+    /* to limit AdapterRoutes.num_routes for compat */
+    uint32_t adapter_routes_max_batch;
 
 } S390FLICState;
 
diff --git a/include/hw/sd/bcm2835_sdhost.h b/include/hw/sd/bcm2835_sdhost.h
new file mode 100644
index 0000000000..7520dd6507
--- /dev/null
+++ b/include/hw/sd/bcm2835_sdhost.h
@@ -0,0 +1,48 @@
+/*
+ * Raspberry Pi (BCM2835) SD Host Controller
+ *
+ * Copyright (c) 2017 Antfield SAS
+ *
+ * Authors:
+ *  Clement Deschamps <clement.deschamps@antfield.fr>
+ *  Luc Michel <luc.michel@antfield.fr>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef BCM2835_SDHOST_H
+#define BCM2835_SDHOST_H
+
+#include "hw/sysbus.h"
+#include "hw/sd/sd.h"
+
+#define TYPE_BCM2835_SDHOST "bcm2835-sdhost"
+#define BCM2835_SDHOST(obj) \
+        OBJECT_CHECK(BCM2835SDHostState, (obj), TYPE_BCM2835_SDHOST)
+
+#define BCM2835_SDHOST_FIFO_LEN 16
+
+typedef struct {
+    SysBusDevice busdev;
+    SDBus sdbus;
+    MemoryRegion iomem;
+
+    uint32_t cmd;
+    uint32_t cmdarg;
+    uint32_t status;
+    uint32_t rsp[4];
+    uint32_t config;
+    uint32_t edm;
+    uint32_t vdd;
+    uint32_t hbct;
+    uint32_t hblc;
+    int32_t fifo_pos;
+    int32_t fifo_len;
+    uint32_t fifo[BCM2835_SDHOST_FIFO_LEN];
+    uint32_t datacnt;
+
+    qemu_irq irq;
+} BCM2835SDHostState;
+
+#endif
diff --git a/include/hw/sd/sd.h b/include/hw/sd/sd.h
index 79909b2478..96caefe373 100644
--- a/include/hw/sd/sd.h
+++ b/include/hw/sd/sd.h
@@ -140,6 +140,17 @@ uint8_t sdbus_read_data(SDBus *sd);
 bool sdbus_data_ready(SDBus *sd);
 bool sdbus_get_inserted(SDBus *sd);
 bool sdbus_get_readonly(SDBus *sd);
+/**
+ * sdbus_reparent_card: Reparent an SD card from one controller to another
+ * @from: controller bus to remove card from
+ * @to: controller bus to move card to
+ *
+ * Reparent an SD card, effectively unplugging it from one controller
+ * and inserting it into another. This is useful for SoCs like the
+ * bcm2835 which have two SD controllers and connect a single SD card
+ * to them, selected by the guest reprogramming GPIO line routing.
+ */
+void sdbus_reparent_card(SDBus *from, SDBus *to);
 
 /* Functions to be used by SD devices to report back to qdevified controllers */
 void sdbus_set_inserted(SDBus *sd, bool inserted);
diff --git a/include/hw/timer/armv7m_systick.h b/include/hw/timer/armv7m_systick.h
new file mode 100644
index 0000000000..cca04defd8
--- /dev/null
+++ b/include/hw/timer/armv7m_systick.h
@@ -0,0 +1,34 @@
+/*
+ * ARMv7M SysTick timer
+ *
+ * Copyright (c) 2006-2007 CodeSourcery.
+ * Written by Paul Brook
+ * Copyright (c) 2017 Linaro Ltd
+ * Written by Peter Maydell
+ *
+ * This code is licensed under the GPL (version 2 or later).
+ */
+
+#ifndef HW_TIMER_ARMV7M_SYSTICK_H
+#define HW_TIMER_ARMV7M_SYSTICK_H
+
+#include "hw/sysbus.h"
+
+#define TYPE_SYSTICK "armv7m_systick"
+
+#define SYSTICK(obj) OBJECT_CHECK(SysTickState, (obj), TYPE_SYSTICK)
+
+typedef struct SysTickState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    uint32_t control;
+    uint32_t reload;
+    int64_t tick;
+    QEMUTimer *timer;
+    MemoryRegion iomem;
+    qemu_irq irq;
+} SysTickState;
+
+#endif
diff --git a/include/hw/timer/mips_gictimer.h b/include/hw/timer/mips_gictimer.h
index c8bc5d2541..c7ca6c821d 100644
--- a/include/hw/timer/mips_gictimer.h
+++ b/include/hw/timer/mips_gictimer.h
@@ -31,6 +31,7 @@ struct MIPSGICTimerState {
     MIPSGICTimerCB *cb;
 };
 
+uint32_t mips_gictimer_get_freq(MIPSGICTimerState *gic);
 uint32_t mips_gictimer_get_sh_count(MIPSGICTimerState *gic);
 void mips_gictimer_store_sh_count(MIPSGICTimerState *gic, uint64_t count);
 uint32_t mips_gictimer_get_vp_compare(MIPSGICTimerState *gictimer,
diff --git a/include/hw/usb.h b/include/hw/usb.h
index c42b29c866..eb28655270 100644
--- a/include/hw/usb.h
+++ b/include/hw/usb.h
@@ -291,11 +291,6 @@ typedef struct USBDeviceClass {
     void (*cancel_packet)(USBDevice *dev, USBPacket *p);
 
     /*
-     * Called when device is destroyed.
-     */
-    void (*handle_destroy)(USBDevice *dev);
-
-    /*
      * Attach the device
      */
     void (*handle_attach)(USBDevice *dev);
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 1735d66512..5720c884f4 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -22,6 +22,7 @@
 #include "qapi-types.h"
 #include "exec/cpu-common.h"
 #include "qemu/coroutine_int.h"
+#include "qom/object.h"
 
 #define QEMU_VM_FILE_MAGIC           0x5145564d
 #define QEMU_VM_FILE_VERSION_COMPAT  0x00000002
@@ -92,6 +93,7 @@ struct MigrationIncomingState {
      */
     QemuEvent main_thread_load_event;
 
+    size_t         largest_page_size;
     bool           have_fault_thread;
     QemuThread     fault_thread;
     QemuSemaphore  fault_thread_sem;
@@ -107,6 +109,7 @@ struct MigrationIncomingState {
     QEMUFile *to_src_file;
     QemuMutex rp_mutex;    /* We send replies from multiple threads */
     void     *postcopy_tmp_page;
+    void     *postcopy_tmp_zero_page;
 
     QEMUBH *bh;
 
@@ -313,6 +316,8 @@ int migrate_add_blocker(Error *reason, Error **errp);
  */
 void migrate_del_blocker(Error *reason);
 
+int check_migratable(Object *obj, Error **err);
+
 bool migrate_release_ram(void);
 bool migrate_postcopy_ram(void);
 bool migrate_zero_blocks(void);
@@ -375,6 +380,7 @@ void global_state_store_running(void);
 void flush_page_queue(MigrationState *ms);
 int ram_save_queue_pages(MigrationState *ms, const char *rbname,
                          ram_addr_t start, ram_addr_t len);
+uint64_t ram_pagesize_summary(void);
 
 PostcopyState postcopy_state_get(void);
 /* Set the state and return the old state */
diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h
index b6a7491f2d..8e036b95a2 100644
--- a/include/migration/postcopy-ram.h
+++ b/include/migration/postcopy-ram.h
@@ -35,13 +35,6 @@ int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages);
 int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis);
 
 /*
- * Discard the contents of 'length' bytes from 'start'
- * We can assume that if we've been called postcopy_ram_hosttest returned true
- */
-int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
-                               size_t length);
-
-/*
  * Userfault requires us to mark RAM as NOHUGEPAGE prior to discard
  * however leaving it until after precopy means that most of the precopy
  * data is still THPd
@@ -81,13 +74,15 @@ void postcopy_discard_send_finish(MigrationState *ms,
  *    to use other postcopy_ routines to allocate.
  * returns 0 on success
  */
-int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from);
+int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
+                        size_t pagesize);
 
 /*
  * Place a zero page at (host) atomically
  * returns 0 on success
  */
-int postcopy_place_page_zero(MigrationIncomingState *mis, void *host);
+int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
+                             size_t pagesize);
 
 /*
  * Allocate a page of memory that can be mapped at a later point in time
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 63e7b02e05..f2dbf8410a 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -253,6 +253,10 @@ extern const VMStateInfo vmstate_info_uint16;
 extern const VMStateInfo vmstate_info_uint32;
 extern const VMStateInfo vmstate_info_uint64;
 
+/** Put this in the stream when migrating a null pointer.*/
+#define VMS_NULLPTR_MARKER (0x30U) /* '0' */
+extern const VMStateInfo vmstate_info_nullptr;
+
 extern const VMStateInfo vmstate_info_float64;
 extern const VMStateInfo vmstate_info_cpudouble;
 
diff --git a/include/qemu-common.h b/include/qemu-common.h
index 1430390eb6..d218821c14 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -19,7 +19,7 @@
 #include "qemu/option.h"
 
 /* Copyright string for -version arguments, About dialogs, etc */
-#define QEMU_COPYRIGHT "Copyright (c) 2003-2016 " \
+#define QEMU_COPYRIGHT "Copyright (c) 2003-2017 " \
     "Fabrice Bellard and the QEMU Project developers"
 
 /* main function, renamed */
diff --git a/include/qemu-io.h b/include/qemu-io.h
index 4d402b9b01..196fde0f3a 100644
--- a/include/qemu-io.h
+++ b/include/qemu-io.h
@@ -36,6 +36,7 @@ typedef struct cmdinfo {
     const char  *args;
     const char  *oneline;
     helpfunc_t  help;
+    uint64_t    perm;
 } cmdinfo_t;
 
 extern bool qemuio_misalign;
diff --git a/include/qemu/cutils.h b/include/qemu/cutils.h
index 8033929139..f0878eaafa 100644
--- a/include/qemu/cutils.h
+++ b/include/qemu/cutils.h
@@ -130,34 +130,19 @@ int qemu_strtol(const char *nptr, const char **endptr, int base,
                 long *result);
 int qemu_strtoul(const char *nptr, const char **endptr, int base,
                  unsigned long *result);
-int qemu_strtoll(const char *nptr, const char **endptr, int base,
-                 int64_t *result);
-int qemu_strtoull(const char *nptr, const char **endptr, int base,
+int qemu_strtoi64(const char *nptr, const char **endptr, int base,
+                  int64_t *result);
+int qemu_strtou64(const char *nptr, const char **endptr, int base,
                   uint64_t *result);
 
 int parse_uint(const char *s, unsigned long long *value, char **endptr,
                int base);
 int parse_uint_full(const char *s, unsigned long long *value, int base);
 
-/*
- * qemu_strtosz() suffixes used to specify the default treatment of an
- * argument passed to qemu_strtosz() without an explicit suffix.
- * These should be defined using upper case characters in the range
- * A-Z, as qemu_strtosz() will use qemu_toupper() on the given argument
- * prior to comparison.
- */
-#define QEMU_STRTOSZ_DEFSUFFIX_EB 'E'
-#define QEMU_STRTOSZ_DEFSUFFIX_PB 'P'
-#define QEMU_STRTOSZ_DEFSUFFIX_TB 'T'
-#define QEMU_STRTOSZ_DEFSUFFIX_GB 'G'
-#define QEMU_STRTOSZ_DEFSUFFIX_MB 'M'
-#define QEMU_STRTOSZ_DEFSUFFIX_KB 'K'
-#define QEMU_STRTOSZ_DEFSUFFIX_B 'B'
-int64_t qemu_strtosz(const char *nptr, char **end);
-int64_t qemu_strtosz_suffix(const char *nptr, char **end,
-                            const char default_suffix);
-int64_t qemu_strtosz_suffix_unit(const char *nptr, char **end,
-                            const char default_suffix, int64_t unit);
+int qemu_strtosz(const char *nptr, char **end, uint64_t *result);
+int qemu_strtosz_MiB(const char *nptr, char **end, uint64_t *result);
+int qemu_strtosz_metric(const char *nptr, char **end, uint64_t *result);
+
 #define K_BYTE     (1ULL << 10)
 #define M_BYTE     (1ULL << 20)
 #define G_BYTE     (1ULL << 30)
diff --git a/include/qemu/throttle-options.h b/include/qemu/throttle-options.h
new file mode 100644
index 0000000000..3133d1ca40
--- /dev/null
+++ b/include/qemu/throttle-options.h
@@ -0,0 +1,92 @@
+/*
+ * QEMU throttling command line options
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.
+ *
+ * See the COPYING file in the top-level directory for details.
+ *
+ */
+#ifndef THROTTLE_OPTIONS_H
+#define THROTTLE_OPTIONS_H
+
+#define THROTTLE_OPTS \
+          { \
+            .name = "throttling.iops-total",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "limit total I/O operations per second",\
+        },{ \
+            .name = "throttling.iops-read",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "limit read operations per second",\
+        },{ \
+            .name = "throttling.iops-write",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "limit write operations per second",\
+        },{ \
+            .name = "throttling.bps-total",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "limit total bytes per second",\
+        },{ \
+            .name = "throttling.bps-read",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "limit read bytes per second",\
+        },{ \
+            .name = "throttling.bps-write",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "limit write bytes per second",\
+        },{ \
+            .name = "throttling.iops-total-max",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "I/O operations burst",\
+        },{ \
+            .name = "throttling.iops-read-max",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "I/O operations read burst",\
+        },{ \
+            .name = "throttling.iops-write-max",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "I/O operations write burst",\
+        },{ \
+            .name = "throttling.bps-total-max",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "total bytes burst",\
+        },{ \
+            .name = "throttling.bps-read-max",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "total bytes read burst",\
+        },{ \
+            .name = "throttling.bps-write-max",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "total bytes write burst",\
+        },{ \
+            .name = "throttling.iops-total-max-length",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "length of the iops-total-max burst period, in seconds",\
+        },{ \
+            .name = "throttling.iops-read-max-length",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "length of the iops-read-max burst period, in seconds",\
+        },{ \
+            .name = "throttling.iops-write-max-length",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "length of the iops-write-max burst period, in seconds",\
+        },{ \
+            .name = "throttling.bps-total-max-length",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "length of the bps-total-max burst period, in seconds",\
+        },{ \
+            .name = "throttling.bps-read-max-length",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "length of the bps-read-max burst period, in seconds",\
+        },{ \
+            .name = "throttling.bps-write-max-length",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "length of the bps-write-max burst period, in seconds",\
+        },{ \
+            .name = "throttling.iops-size",\
+            .type = QEMU_OPT_NUMBER,\
+            .help = "when limiting by iops max size of an I/O in bytes",\
+        }
+
+#endif
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index 9abed51ae8..26e628584c 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -610,7 +610,10 @@ void timer_deinit(QEMUTimer *ts);
  *
  * Free a timer (it must not be on the active list)
  */
-void timer_free(QEMUTimer *ts);
+static inline void timer_free(QEMUTimer *ts)
+{
+    g_free(ts);
+}
 
 /**
  * timer_del:
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 1bc3ad230a..c3292efe1c 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -329,6 +329,7 @@ struct CPUState {
     bool unplug;
     bool crash_occurred;
     bool exit_request;
+    /* updates protected by BQL */
     uint32_t interrupt_request;
     int singlestep_enabled;
     int64_t icount_extra;
@@ -400,6 +401,12 @@ struct CPUState {
 
     bool hax_vcpu_dirty;
     struct hax_vcpu_state *hax_vcpu;
+
+    /* The pending_tlb_flush flag is set and cleared atomically to
+     * avoid potential races. The aim of the flag is to avoid
+     * unnecessary flushes.
+     */
+    uint16_t pending_tlb_flush;
 };
 
 QTAILQ_HEAD(CPUTailQ, CPUState);
@@ -415,6 +422,15 @@ extern struct CPUTailQ cpus;
 extern __thread CPUState *current_cpu;
 
 /**
+ * qemu_tcg_mttcg_enabled:
+ * Check whether we are running MultiThread TCG or not.
+ *
+ * Returns: %true if we are in MTTCG mode %false otherwise.
+ */
+extern bool mttcg_enabled;
+#define qemu_tcg_mttcg_enabled() (mttcg_enabled)
+
+/**
  * cpu_paging_enabled:
  * @cpu: The CPU whose state is to be inspected.
  *
diff --git a/include/standard-headers/asm-x86/hyperv.h b/include/standard-headers/asm-x86/hyperv.h
index 47b38fb816..eca9a2ca22 100644
--- a/include/standard-headers/asm-x86/hyperv.h
+++ b/include/standard-headers/asm-x86/hyperv.h
@@ -73,6 +73,9 @@
   */
 #define HV_X64_MSR_STAT_PAGES_AVAILABLE		(1 << 8)
 
+/* Crash MSR available */
+#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE (1 << 10)
+
 /*
  * Feature identification: EBX indicates which flags were specified at
  * partition creation. The format is the same as the partition creation
@@ -144,6 +147,11 @@
  */
 #define HV_X64_RELAXED_TIMING_RECOMMENDED	(1 << 5)
 
+/*
+ * Crash notification flag.
+ */
+#define HV_CRASH_CTL_CRASH_NOTIFY (1ULL << 63)
+
 /* MSR used to identify the guest OS. */
 #define HV_X64_MSR_GUEST_OS_ID			0x40000000
 
diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h
index 5c10f7e25d..c8b3338375 100644
--- a/include/standard-headers/linux/input-event-codes.h
+++ b/include/standard-headers/linux/input-event-codes.h
@@ -640,7 +640,7 @@
  * Control a data application associated with the currently viewed channel,
  * e.g. teletext or data broadcast application (MHEG, MHP, HbbTV, etc.)
  */
-#define KEY_DATA			0x275
+#define KEY_DATA			0x277
 
 #define BTN_TRIGGER_HAPPY		0x2c0
 #define BTN_TRIGGER_HAPPY1		0x2c0
diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h
index e5a2e68b22..634c9c44ed 100644
--- a/include/standard-headers/linux/pci_regs.h
+++ b/include/standard-headers/linux/pci_regs.h
@@ -23,6 +23,14 @@
 #define LINUX_PCI_REGS_H
 
 /*
+ * Conventional PCI and PCI-X Mode 1 devices have 256 bytes of
+ * configuration space.  PCI-X Mode 2 and PCIe devices have 4096 bytes of
+ * configuration space.
+ */
+#define PCI_CFG_SPACE_SIZE	256
+#define PCI_CFG_SPACE_EXP_SIZE	4096
+
+/*
  * Under PCI, each device has 256 bytes of configuration address space,
  * of which the first 64 bytes are standardized as follows:
  */
@@ -674,6 +682,7 @@
 #define PCI_EXT_CAP_ID_PMUX	0x1A	/* Protocol Multiplexing */
 #define PCI_EXT_CAP_ID_PASID	0x1B	/* Process Address Space ID */
 #define PCI_EXT_CAP_ID_DPC	0x1D	/* Downstream Port Containment */
+#define PCI_EXT_CAP_ID_L1SS	0x1E	/* L1 PM Substates */
 #define PCI_EXT_CAP_ID_PTM	0x1F	/* Precision Time Measurement */
 #define PCI_EXT_CAP_ID_MAX	PCI_EXT_CAP_ID_PTM
 
@@ -965,6 +974,7 @@
 #define PCI_EXP_DPC_STATUS		8	/* DPC Status */
 #define  PCI_EXP_DPC_STATUS_TRIGGER	0x01	/* Trigger Status */
 #define  PCI_EXP_DPC_STATUS_INTERRUPT	0x08	/* Interrupt Status */
+#define  PCI_EXP_DPC_RP_BUSY		0x10	/* Root Port Busy */
 
 #define PCI_EXP_DPC_SOURCE_ID		10	/* DPC Source Identifier */
 
@@ -977,4 +987,19 @@
 #define  PCI_PTM_CTRL_ENABLE		0x00000001  /* PTM enable */
 #define  PCI_PTM_CTRL_ROOT		0x00000002  /* Root select */
 
+/* L1 PM Substates */
+#define PCI_L1SS_CAP		    4	/* capability register */
+#define  PCI_L1SS_CAP_PCIPM_L1_2	 1	/* PCI PM L1.2 Support */
+#define  PCI_L1SS_CAP_PCIPM_L1_1	 2	/* PCI PM L1.1 Support */
+#define  PCI_L1SS_CAP_ASPM_L1_2		 4	/* ASPM L1.2 Support */
+#define  PCI_L1SS_CAP_ASPM_L1_1		 8	/* ASPM L1.1 Support */
+#define  PCI_L1SS_CAP_L1_PM_SS		16	/* L1 PM Substates Support */
+#define PCI_L1SS_CTL1		    8	/* Control Register 1 */
+#define  PCI_L1SS_CTL1_PCIPM_L1_2	1	/* PCI PM L1.2 Enable */
+#define  PCI_L1SS_CTL1_PCIPM_L1_1	2	/* PCI PM L1.1 Support */
+#define  PCI_L1SS_CTL1_ASPM_L1_2	4	/* ASPM L1.2 Support */
+#define  PCI_L1SS_CTL1_ASPM_L1_1	8	/* ASPM L1.1 Support */
+#define  PCI_L1SS_CTL1_L1SS_MASK	0x0000000F
+#define PCI_L1SS_CTL2		    0xC	/* Control Register 2 */
+
 #endif /* LINUX_PCI_REGS_H */
diff --git a/include/standard-headers/linux/virtio_ids.h b/include/standard-headers/linux/virtio_ids.h
index fe74e422d4..6d5c3b2d4f 100644
--- a/include/standard-headers/linux/virtio_ids.h
+++ b/include/standard-headers/linux/virtio_ids.h
@@ -43,4 +43,5 @@
 #define VIRTIO_ID_INPUT        18 /* virtio input */
 #define VIRTIO_ID_VSOCK        19 /* virtio vsock transport */
 #define VIRTIO_ID_CRYPTO       20 /* virtio crypto */
+
 #endif /* _LINUX_VIRTIO_IDS_H */
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index f365a51acf..096c17fce0 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -34,7 +34,7 @@ typedef struct BlockDevOps {
      * changes.  Sure would be useful if it did.
      * Device models with removable media must implement this callback.
      */
-    void (*change_media_cb)(void *opaque, bool load);
+    void (*change_media_cb)(void *opaque, bool load, Error **errp);
     /*
      * Runs when an eject request is issued from the monitor, the tray
      * is closed, and the medium is locked.
@@ -84,7 +84,7 @@ typedef struct BlockBackendPublic {
     QLIST_ENTRY(BlockBackendPublic) round_robin;
 } BlockBackendPublic;
 
-BlockBackend *blk_new(void);
+BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm);
 BlockBackend *blk_new_open(const char *filename, const char *reference,
                            QDict *options, int flags, Error **errp);
 int blk_get_refcnt(BlockBackend *blk);
@@ -102,9 +102,12 @@ BlockBackend *blk_by_public(BlockBackendPublic *public);
 
 BlockDriverState *blk_bs(BlockBackend *blk);
 void blk_remove_bs(BlockBackend *blk);
-void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs);
+int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp);
 bool bdrv_has_blk(BlockDriverState *bs);
 bool bdrv_is_root_node(BlockDriverState *bs);
+int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
+                 Error **errp);
+void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm);
 
 void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow);
 void blk_iostatus_enable(BlockBackend *blk);
diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h
index 3728a1ea7e..a73b5d4bce 100644
--- a/include/sysemu/cpus.h
+++ b/include/sysemu/cpus.h
@@ -36,4 +36,6 @@ extern int smp_threads;
 
 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg);
 
+void qemu_tcg_configure(QemuOpts *opts, Error **errp);
+
 #endif
diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h
index 7aad20b07f..f1c0712795 100644
--- a/include/sysemu/replay.h
+++ b/include/sysemu/replay.h
@@ -152,6 +152,13 @@ void replay_unregister_net(ReplayNetState *rns);
 void replay_net_packet_event(ReplayNetState *rns, unsigned flags,
                              const struct iovec *iov, int iovcnt);
 
+/* Audio */
+
+/*! Saves/restores number of played samples of audio out operation. */
+void replay_audio_out(int *played);
+/*! Saves/restores recorded samples of audio in operation. */
+void replay_audio_in(int *recorded, void *samples, int *wpos, int size);
+
 /* VM state operations */
 
 /*! Called at the start of execution.
diff --git a/include/ui/console.h b/include/ui/console.h
index af6350e96f..ac2895ca93 100644
--- a/include/ui/console.h
+++ b/include/ui/console.h
@@ -215,10 +215,14 @@ typedef struct DisplayChangeListenerOps {
                                    QEMUGLContext ctx);
     QEMUGLContext (*dpy_gl_ctx_get_current)(DisplayChangeListener *dcl);
 
-    void (*dpy_gl_scanout)(DisplayChangeListener *dcl,
-                           uint32_t backing_id, bool backing_y_0_top,
-                           uint32_t backing_width, uint32_t backing_height,
-                           uint32_t x, uint32_t y, uint32_t w, uint32_t h);
+    void (*dpy_gl_scanout_disable)(DisplayChangeListener *dcl);
+    void (*dpy_gl_scanout_texture)(DisplayChangeListener *dcl,
+                                   uint32_t backing_id,
+                                   bool backing_y_0_top,
+                                   uint32_t backing_width,
+                                   uint32_t backing_height,
+                                   uint32_t x, uint32_t y,
+                                   uint32_t w, uint32_t h);
     void (*dpy_gl_update)(DisplayChangeListener *dcl,
                           uint32_t x, uint32_t y, uint32_t w, uint32_t h);
 
@@ -284,10 +288,11 @@ bool dpy_cursor_define_supported(QemuConsole *con);
 bool dpy_gfx_check_format(QemuConsole *con,
                           pixman_format_code_t format);
 
-void dpy_gl_scanout(QemuConsole *con,
-                    uint32_t backing_id, bool backing_y_0_top,
-                    uint32_t backing_width, uint32_t backing_height,
-                    uint32_t x, uint32_t y, uint32_t w, uint32_t h);
+void dpy_gl_scanout_disable(QemuConsole *con);
+void dpy_gl_scanout_texture(QemuConsole *con,
+                            uint32_t backing_id, bool backing_y_0_top,
+                            uint32_t backing_width, uint32_t backing_height,
+                            uint32_t x, uint32_t y, uint32_t w, uint32_t h);
 void dpy_gl_update(QemuConsole *con,
                    uint32_t x, uint32_t y, uint32_t w, uint32_t h);
 
diff --git a/include/ui/gtk.h b/include/ui/gtk.h
index 47ffddb5b4..ca9a2268de 100644
--- a/include/ui/gtk.h
+++ b/include/ui/gtk.h
@@ -103,11 +103,14 @@ void gd_egl_switch(DisplayChangeListener *dcl,
                    DisplaySurface *surface);
 QEMUGLContext gd_egl_create_context(DisplayChangeListener *dcl,
                                     QEMUGLParams *params);
-void gd_egl_scanout(DisplayChangeListener *dcl,
-                    uint32_t backing_id, bool backing_y_0_top,
-                    uint32_t backing_width, uint32_t backing_height,
-                    uint32_t x, uint32_t y,
-                    uint32_t w, uint32_t h);
+void gd_egl_scanout_disable(DisplayChangeListener *dcl);
+void gd_egl_scanout_texture(DisplayChangeListener *dcl,
+                            uint32_t backing_id,
+                            bool backing_y_0_top,
+                            uint32_t backing_width,
+                            uint32_t backing_height,
+                            uint32_t x, uint32_t y,
+                            uint32_t w, uint32_t h);
 void gd_egl_scanout_flush(DisplayChangeListener *dcl,
                           uint32_t x, uint32_t y, uint32_t w, uint32_t h);
 void gtk_egl_init(void);
@@ -126,11 +129,13 @@ QEMUGLContext gd_gl_area_create_context(DisplayChangeListener *dcl,
                                         QEMUGLParams *params);
 void gd_gl_area_destroy_context(DisplayChangeListener *dcl,
                                 QEMUGLContext ctx);
-void gd_gl_area_scanout(DisplayChangeListener *dcl,
-                        uint32_t backing_id, bool backing_y_0_top,
-                        uint32_t backing_width, uint32_t backing_height,
-                        uint32_t x, uint32_t y,
-                        uint32_t w, uint32_t h);
+void gd_gl_area_scanout_texture(DisplayChangeListener *dcl,
+                                uint32_t backing_id,
+                                bool backing_y_0_top,
+                                uint32_t backing_width,
+                                uint32_t backing_height,
+                                uint32_t x, uint32_t y,
+                                uint32_t w, uint32_t h);
 void gd_gl_area_scanout_flush(DisplayChangeListener *dcl,
                               uint32_t x, uint32_t y, uint32_t w, uint32_t h);
 void gtk_gl_area_init(void);
diff --git a/include/ui/sdl2.h b/include/ui/sdl2.h
index 683bb6af2e..aaf226c2c0 100644
--- a/include/ui/sdl2.h
+++ b/include/ui/sdl2.h
@@ -62,11 +62,14 @@ int sdl2_gl_make_context_current(DisplayChangeListener *dcl,
                                  QEMUGLContext ctx);
 QEMUGLContext sdl2_gl_get_current_context(DisplayChangeListener *dcl);
 
-void sdl2_gl_scanout(DisplayChangeListener *dcl,
-                     uint32_t backing_id, bool backing_y_0_top,
-                     uint32_t backing_width, uint32_t backing_height,
-                     uint32_t x, uint32_t y,
-                     uint32_t w, uint32_t h);
+void sdl2_gl_scanout_disable(DisplayChangeListener *dcl);
+void sdl2_gl_scanout_texture(DisplayChangeListener *dcl,
+                             uint32_t backing_id,
+                             bool backing_y_0_top,
+                             uint32_t backing_width,
+                             uint32_t backing_height,
+                             uint32_t x, uint32_t y,
+                             uint32_t w, uint32_t h);
 void sdl2_gl_scanout_flush(DisplayChangeListener *dcl,
                            uint32_t x, uint32_t y, uint32_t w, uint32_t h);
 
diff --git a/iothread.c b/iothread.c
index 257b01d5f1..beeb870534 100644
--- a/iothread.c
+++ b/iothread.c
@@ -268,6 +268,9 @@ static int query_one_iothread(Object *object, void *opaque)
     info = g_new0(IOThreadInfo, 1);
     info->id = iothread_get_id(iothread);
     info->thread_id = iothread->thread_id;
+    info->poll_max_ns = iothread->poll_max_ns;
+    info->poll_grow = iothread->poll_grow;
+    info->poll_shrink = iothread->poll_shrink;
 
     elem = g_new0(IOThreadInfoList, 1);
     elem->value = info;
diff --git a/linux-headers/asm-arm/kvm.h b/linux-headers/asm-arm/kvm.h
index 2fb7859465..1101d55d2f 100644
--- a/linux-headers/asm-arm/kvm.h
+++ b/linux-headers/asm-arm/kvm.h
@@ -87,9 +87,11 @@ struct kvm_regs {
 /* Supported VGICv3 address types  */
 #define KVM_VGIC_V3_ADDR_TYPE_DIST	2
 #define KVM_VGIC_V3_ADDR_TYPE_REDIST	3
+#define KVM_VGIC_ITS_ADDR_TYPE		4
 
 #define KVM_VGIC_V3_DIST_SIZE		SZ_64K
 #define KVM_VGIC_V3_REDIST_SIZE		(2 * SZ_64K)
+#define KVM_VGIC_V3_ITS_SIZE		(2 * SZ_64K)
 
 #define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
 #define KVM_ARM_VCPU_PSCI_0_2		1 /* CPU uses PSCI v0.2 */
@@ -179,10 +181,23 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS	2
 #define   KVM_DEV_ARM_VGIC_CPUID_SHIFT	32
 #define   KVM_DEV_ARM_VGIC_CPUID_MASK	(0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
+			(0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define   KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff)
 #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS	3
 #define KVM_DEV_ARM_VGIC_GRP_CTRL       4
+#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
+#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
+#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT	10
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
+			(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
+#define VGIC_LEVEL_INFO_LINE_LEVEL	0
+
 #define   KVM_DEV_ARM_VGIC_CTRL_INIT    0
 
 /* KVM_IRQ_LINE irq field index values */
diff --git a/linux-headers/asm-arm/unistd-common.h b/linux-headers/asm-arm/unistd-common.h
new file mode 100644
index 0000000000..13a74afd02
--- /dev/null
+++ b/linux-headers/asm-arm/unistd-common.h
@@ -0,0 +1,357 @@
+#ifndef _ASM_ARM_UNISTD_COMMON_H
+#define _ASM_ARM_UNISTD_COMMON_H 1
+
+#define __NR_restart_syscall (__NR_SYSCALL_BASE + 0)
+#define __NR_exit (__NR_SYSCALL_BASE + 1)
+#define __NR_fork (__NR_SYSCALL_BASE + 2)
+#define __NR_read (__NR_SYSCALL_BASE + 3)
+#define __NR_write (__NR_SYSCALL_BASE + 4)
+#define __NR_open (__NR_SYSCALL_BASE + 5)
+#define __NR_close (__NR_SYSCALL_BASE + 6)
+#define __NR_creat (__NR_SYSCALL_BASE + 8)
+#define __NR_link (__NR_SYSCALL_BASE + 9)
+#define __NR_unlink (__NR_SYSCALL_BASE + 10)
+#define __NR_execve (__NR_SYSCALL_BASE + 11)
+#define __NR_chdir (__NR_SYSCALL_BASE + 12)
+#define __NR_mknod (__NR_SYSCALL_BASE + 14)
+#define __NR_chmod (__NR_SYSCALL_BASE + 15)
+#define __NR_lchown (__NR_SYSCALL_BASE + 16)
+#define __NR_lseek (__NR_SYSCALL_BASE + 19)
+#define __NR_getpid (__NR_SYSCALL_BASE + 20)
+#define __NR_mount (__NR_SYSCALL_BASE + 21)
+#define __NR_setuid (__NR_SYSCALL_BASE + 23)
+#define __NR_getuid (__NR_SYSCALL_BASE + 24)
+#define __NR_ptrace (__NR_SYSCALL_BASE + 26)
+#define __NR_pause (__NR_SYSCALL_BASE + 29)
+#define __NR_access (__NR_SYSCALL_BASE + 33)
+#define __NR_nice (__NR_SYSCALL_BASE + 34)
+#define __NR_sync (__NR_SYSCALL_BASE + 36)
+#define __NR_kill (__NR_SYSCALL_BASE + 37)
+#define __NR_rename (__NR_SYSCALL_BASE + 38)
+#define __NR_mkdir (__NR_SYSCALL_BASE + 39)
+#define __NR_rmdir (__NR_SYSCALL_BASE + 40)
+#define __NR_dup (__NR_SYSCALL_BASE + 41)
+#define __NR_pipe (__NR_SYSCALL_BASE + 42)
+#define __NR_times (__NR_SYSCALL_BASE + 43)
+#define __NR_brk (__NR_SYSCALL_BASE + 45)
+#define __NR_setgid (__NR_SYSCALL_BASE + 46)
+#define __NR_getgid (__NR_SYSCALL_BASE + 47)
+#define __NR_geteuid (__NR_SYSCALL_BASE + 49)
+#define __NR_getegid (__NR_SYSCALL_BASE + 50)
+#define __NR_acct (__NR_SYSCALL_BASE + 51)
+#define __NR_umount2 (__NR_SYSCALL_BASE + 52)
+#define __NR_ioctl (__NR_SYSCALL_BASE + 54)
+#define __NR_fcntl (__NR_SYSCALL_BASE + 55)
+#define __NR_setpgid (__NR_SYSCALL_BASE + 57)
+#define __NR_umask (__NR_SYSCALL_BASE + 60)
+#define __NR_chroot (__NR_SYSCALL_BASE + 61)
+#define __NR_ustat (__NR_SYSCALL_BASE + 62)
+#define __NR_dup2 (__NR_SYSCALL_BASE + 63)
+#define __NR_getppid (__NR_SYSCALL_BASE + 64)
+#define __NR_getpgrp (__NR_SYSCALL_BASE + 65)
+#define __NR_setsid (__NR_SYSCALL_BASE + 66)
+#define __NR_sigaction (__NR_SYSCALL_BASE + 67)
+#define __NR_setreuid (__NR_SYSCALL_BASE + 70)
+#define __NR_setregid (__NR_SYSCALL_BASE + 71)
+#define __NR_sigsuspend (__NR_SYSCALL_BASE + 72)
+#define __NR_sigpending (__NR_SYSCALL_BASE + 73)
+#define __NR_sethostname (__NR_SYSCALL_BASE + 74)
+#define __NR_setrlimit (__NR_SYSCALL_BASE + 75)
+#define __NR_getrusage (__NR_SYSCALL_BASE + 77)
+#define __NR_gettimeofday (__NR_SYSCALL_BASE + 78)
+#define __NR_settimeofday (__NR_SYSCALL_BASE + 79)
+#define __NR_getgroups (__NR_SYSCALL_BASE + 80)
+#define __NR_setgroups (__NR_SYSCALL_BASE + 81)
+#define __NR_symlink (__NR_SYSCALL_BASE + 83)
+#define __NR_readlink (__NR_SYSCALL_BASE + 85)
+#define __NR_uselib (__NR_SYSCALL_BASE + 86)
+#define __NR_swapon (__NR_SYSCALL_BASE + 87)
+#define __NR_reboot (__NR_SYSCALL_BASE + 88)
+#define __NR_munmap (__NR_SYSCALL_BASE + 91)
+#define __NR_truncate (__NR_SYSCALL_BASE + 92)
+#define __NR_ftruncate (__NR_SYSCALL_BASE + 93)
+#define __NR_fchmod (__NR_SYSCALL_BASE + 94)
+#define __NR_fchown (__NR_SYSCALL_BASE + 95)
+#define __NR_getpriority (__NR_SYSCALL_BASE + 96)
+#define __NR_setpriority (__NR_SYSCALL_BASE + 97)
+#define __NR_statfs (__NR_SYSCALL_BASE + 99)
+#define __NR_fstatfs (__NR_SYSCALL_BASE + 100)
+#define __NR_syslog (__NR_SYSCALL_BASE + 103)
+#define __NR_setitimer (__NR_SYSCALL_BASE + 104)
+#define __NR_getitimer (__NR_SYSCALL_BASE + 105)
+#define __NR_stat (__NR_SYSCALL_BASE + 106)
+#define __NR_lstat (__NR_SYSCALL_BASE + 107)
+#define __NR_fstat (__NR_SYSCALL_BASE + 108)
+#define __NR_vhangup (__NR_SYSCALL_BASE + 111)
+#define __NR_wait4 (__NR_SYSCALL_BASE + 114)
+#define __NR_swapoff (__NR_SYSCALL_BASE + 115)
+#define __NR_sysinfo (__NR_SYSCALL_BASE + 116)
+#define __NR_fsync (__NR_SYSCALL_BASE + 118)
+#define __NR_sigreturn (__NR_SYSCALL_BASE + 119)
+#define __NR_clone (__NR_SYSCALL_BASE + 120)
+#define __NR_setdomainname (__NR_SYSCALL_BASE + 121)
+#define __NR_uname (__NR_SYSCALL_BASE + 122)
+#define __NR_adjtimex (__NR_SYSCALL_BASE + 124)
+#define __NR_mprotect (__NR_SYSCALL_BASE + 125)
+#define __NR_sigprocmask (__NR_SYSCALL_BASE + 126)
+#define __NR_init_module (__NR_SYSCALL_BASE + 128)
+#define __NR_delete_module (__NR_SYSCALL_BASE + 129)
+#define __NR_quotactl (__NR_SYSCALL_BASE + 131)
+#define __NR_getpgid (__NR_SYSCALL_BASE + 132)
+#define __NR_fchdir (__NR_SYSCALL_BASE + 133)
+#define __NR_bdflush (__NR_SYSCALL_BASE + 134)
+#define __NR_sysfs (__NR_SYSCALL_BASE + 135)
+#define __NR_personality (__NR_SYSCALL_BASE + 136)
+#define __NR_setfsuid (__NR_SYSCALL_BASE + 138)
+#define __NR_setfsgid (__NR_SYSCALL_BASE + 139)
+#define __NR__llseek (__NR_SYSCALL_BASE + 140)
+#define __NR_getdents (__NR_SYSCALL_BASE + 141)
+#define __NR__newselect (__NR_SYSCALL_BASE + 142)
+#define __NR_flock (__NR_SYSCALL_BASE + 143)
+#define __NR_msync (__NR_SYSCALL_BASE + 144)
+#define __NR_readv (__NR_SYSCALL_BASE + 145)
+#define __NR_writev (__NR_SYSCALL_BASE + 146)
+#define __NR_getsid (__NR_SYSCALL_BASE + 147)
+#define __NR_fdatasync (__NR_SYSCALL_BASE + 148)
+#define __NR__sysctl (__NR_SYSCALL_BASE + 149)
+#define __NR_mlock (__NR_SYSCALL_BASE + 150)
+#define __NR_munlock (__NR_SYSCALL_BASE + 151)
+#define __NR_mlockall (__NR_SYSCALL_BASE + 152)
+#define __NR_munlockall (__NR_SYSCALL_BASE + 153)
+#define __NR_sched_setparam (__NR_SYSCALL_BASE + 154)
+#define __NR_sched_getparam (__NR_SYSCALL_BASE + 155)
+#define __NR_sched_setscheduler (__NR_SYSCALL_BASE + 156)
+#define __NR_sched_getscheduler (__NR_SYSCALL_BASE + 157)
+#define __NR_sched_yield (__NR_SYSCALL_BASE + 158)
+#define __NR_sched_get_priority_max (__NR_SYSCALL_BASE + 159)
+#define __NR_sched_get_priority_min (__NR_SYSCALL_BASE + 160)
+#define __NR_sched_rr_get_interval (__NR_SYSCALL_BASE + 161)
+#define __NR_nanosleep (__NR_SYSCALL_BASE + 162)
+#define __NR_mremap (__NR_SYSCALL_BASE + 163)
+#define __NR_setresuid (__NR_SYSCALL_BASE + 164)
+#define __NR_getresuid (__NR_SYSCALL_BASE + 165)
+#define __NR_poll (__NR_SYSCALL_BASE + 168)
+#define __NR_nfsservctl (__NR_SYSCALL_BASE + 169)
+#define __NR_setresgid (__NR_SYSCALL_BASE + 170)
+#define __NR_getresgid (__NR_SYSCALL_BASE + 171)
+#define __NR_prctl (__NR_SYSCALL_BASE + 172)
+#define __NR_rt_sigreturn (__NR_SYSCALL_BASE + 173)
+#define __NR_rt_sigaction (__NR_SYSCALL_BASE + 174)
+#define __NR_rt_sigprocmask (__NR_SYSCALL_BASE + 175)
+#define __NR_rt_sigpending (__NR_SYSCALL_BASE + 176)
+#define __NR_rt_sigtimedwait (__NR_SYSCALL_BASE + 177)
+#define __NR_rt_sigqueueinfo (__NR_SYSCALL_BASE + 178)
+#define __NR_rt_sigsuspend (__NR_SYSCALL_BASE + 179)
+#define __NR_pread64 (__NR_SYSCALL_BASE + 180)
+#define __NR_pwrite64 (__NR_SYSCALL_BASE + 181)
+#define __NR_chown (__NR_SYSCALL_BASE + 182)
+#define __NR_getcwd (__NR_SYSCALL_BASE + 183)
+#define __NR_capget (__NR_SYSCALL_BASE + 184)
+#define __NR_capset (__NR_SYSCALL_BASE + 185)
+#define __NR_sigaltstack (__NR_SYSCALL_BASE + 186)
+#define __NR_sendfile (__NR_SYSCALL_BASE + 187)
+#define __NR_vfork (__NR_SYSCALL_BASE + 190)
+#define __NR_ugetrlimit (__NR_SYSCALL_BASE + 191)
+#define __NR_mmap2 (__NR_SYSCALL_BASE + 192)
+#define __NR_truncate64 (__NR_SYSCALL_BASE + 193)
+#define __NR_ftruncate64 (__NR_SYSCALL_BASE + 194)
+#define __NR_stat64 (__NR_SYSCALL_BASE + 195)
+#define __NR_lstat64 (__NR_SYSCALL_BASE + 196)
+#define __NR_fstat64 (__NR_SYSCALL_BASE + 197)
+#define __NR_lchown32 (__NR_SYSCALL_BASE + 198)
+#define __NR_getuid32 (__NR_SYSCALL_BASE + 199)
+#define __NR_getgid32 (__NR_SYSCALL_BASE + 200)
+#define __NR_geteuid32 (__NR_SYSCALL_BASE + 201)
+#define __NR_getegid32 (__NR_SYSCALL_BASE + 202)
+#define __NR_setreuid32 (__NR_SYSCALL_BASE + 203)
+#define __NR_setregid32 (__NR_SYSCALL_BASE + 204)
+#define __NR_getgroups32 (__NR_SYSCALL_BASE + 205)
+#define __NR_setgroups32 (__NR_SYSCALL_BASE + 206)
+#define __NR_fchown32 (__NR_SYSCALL_BASE + 207)
+#define __NR_setresuid32 (__NR_SYSCALL_BASE + 208)
+#define __NR_getresuid32 (__NR_SYSCALL_BASE + 209)
+#define __NR_setresgid32 (__NR_SYSCALL_BASE + 210)
+#define __NR_getresgid32 (__NR_SYSCALL_BASE + 211)
+#define __NR_chown32 (__NR_SYSCALL_BASE + 212)
+#define __NR_setuid32 (__NR_SYSCALL_BASE + 213)
+#define __NR_setgid32 (__NR_SYSCALL_BASE + 214)
+#define __NR_setfsuid32 (__NR_SYSCALL_BASE + 215)
+#define __NR_setfsgid32 (__NR_SYSCALL_BASE + 216)
+#define __NR_getdents64 (__NR_SYSCALL_BASE + 217)
+#define __NR_pivot_root (__NR_SYSCALL_BASE + 218)
+#define __NR_mincore (__NR_SYSCALL_BASE + 219)
+#define __NR_madvise (__NR_SYSCALL_BASE + 220)
+#define __NR_fcntl64 (__NR_SYSCALL_BASE + 221)
+#define __NR_gettid (__NR_SYSCALL_BASE + 224)
+#define __NR_readahead (__NR_SYSCALL_BASE + 225)
+#define __NR_setxattr (__NR_SYSCALL_BASE + 226)
+#define __NR_lsetxattr (__NR_SYSCALL_BASE + 227)
+#define __NR_fsetxattr (__NR_SYSCALL_BASE + 228)
+#define __NR_getxattr (__NR_SYSCALL_BASE + 229)
+#define __NR_lgetxattr (__NR_SYSCALL_BASE + 230)
+#define __NR_fgetxattr (__NR_SYSCALL_BASE + 231)
+#define __NR_listxattr (__NR_SYSCALL_BASE + 232)
+#define __NR_llistxattr (__NR_SYSCALL_BASE + 233)
+#define __NR_flistxattr (__NR_SYSCALL_BASE + 234)
+#define __NR_removexattr (__NR_SYSCALL_BASE + 235)
+#define __NR_lremovexattr (__NR_SYSCALL_BASE + 236)
+#define __NR_fremovexattr (__NR_SYSCALL_BASE + 237)
+#define __NR_tkill (__NR_SYSCALL_BASE + 238)
+#define __NR_sendfile64 (__NR_SYSCALL_BASE + 239)
+#define __NR_futex (__NR_SYSCALL_BASE + 240)
+#define __NR_sched_setaffinity (__NR_SYSCALL_BASE + 241)
+#define __NR_sched_getaffinity (__NR_SYSCALL_BASE + 242)
+#define __NR_io_setup (__NR_SYSCALL_BASE + 243)
+#define __NR_io_destroy (__NR_SYSCALL_BASE + 244)
+#define __NR_io_getevents (__NR_SYSCALL_BASE + 245)
+#define __NR_io_submit (__NR_SYSCALL_BASE + 246)
+#define __NR_io_cancel (__NR_SYSCALL_BASE + 247)
+#define __NR_exit_group (__NR_SYSCALL_BASE + 248)
+#define __NR_lookup_dcookie (__NR_SYSCALL_BASE + 249)
+#define __NR_epoll_create (__NR_SYSCALL_BASE + 250)
+#define __NR_epoll_ctl (__NR_SYSCALL_BASE + 251)
+#define __NR_epoll_wait (__NR_SYSCALL_BASE + 252)
+#define __NR_remap_file_pages (__NR_SYSCALL_BASE + 253)
+#define __NR_set_tid_address (__NR_SYSCALL_BASE + 256)
+#define __NR_timer_create (__NR_SYSCALL_BASE + 257)
+#define __NR_timer_settime (__NR_SYSCALL_BASE + 258)
+#define __NR_timer_gettime (__NR_SYSCALL_BASE + 259)
+#define __NR_timer_getoverrun (__NR_SYSCALL_BASE + 260)
+#define __NR_timer_delete (__NR_SYSCALL_BASE + 261)
+#define __NR_clock_settime (__NR_SYSCALL_BASE + 262)
+#define __NR_clock_gettime (__NR_SYSCALL_BASE + 263)
+#define __NR_clock_getres (__NR_SYSCALL_BASE + 264)
+#define __NR_clock_nanosleep (__NR_SYSCALL_BASE + 265)
+#define __NR_statfs64 (__NR_SYSCALL_BASE + 266)
+#define __NR_fstatfs64 (__NR_SYSCALL_BASE + 267)
+#define __NR_tgkill (__NR_SYSCALL_BASE + 268)
+#define __NR_utimes (__NR_SYSCALL_BASE + 269)
+#define __NR_arm_fadvise64_64 (__NR_SYSCALL_BASE + 270)
+#define __NR_pciconfig_iobase (__NR_SYSCALL_BASE + 271)
+#define __NR_pciconfig_read (__NR_SYSCALL_BASE + 272)
+#define __NR_pciconfig_write (__NR_SYSCALL_BASE + 273)
+#define __NR_mq_open (__NR_SYSCALL_BASE + 274)
+#define __NR_mq_unlink (__NR_SYSCALL_BASE + 275)
+#define __NR_mq_timedsend (__NR_SYSCALL_BASE + 276)
+#define __NR_mq_timedreceive (__NR_SYSCALL_BASE + 277)
+#define __NR_mq_notify (__NR_SYSCALL_BASE + 278)
+#define __NR_mq_getsetattr (__NR_SYSCALL_BASE + 279)
+#define __NR_waitid (__NR_SYSCALL_BASE + 280)
+#define __NR_socket (__NR_SYSCALL_BASE + 281)
+#define __NR_bind (__NR_SYSCALL_BASE + 282)
+#define __NR_connect (__NR_SYSCALL_BASE + 283)
+#define __NR_listen (__NR_SYSCALL_BASE + 284)
+#define __NR_accept (__NR_SYSCALL_BASE + 285)
+#define __NR_getsockname (__NR_SYSCALL_BASE + 286)
+#define __NR_getpeername (__NR_SYSCALL_BASE + 287)
+#define __NR_socketpair (__NR_SYSCALL_BASE + 288)
+#define __NR_send (__NR_SYSCALL_BASE + 289)
+#define __NR_sendto (__NR_SYSCALL_BASE + 290)
+#define __NR_recv (__NR_SYSCALL_BASE + 291)
+#define __NR_recvfrom (__NR_SYSCALL_BASE + 292)
+#define __NR_shutdown (__NR_SYSCALL_BASE + 293)
+#define __NR_setsockopt (__NR_SYSCALL_BASE + 294)
+#define __NR_getsockopt (__NR_SYSCALL_BASE + 295)
+#define __NR_sendmsg (__NR_SYSCALL_BASE + 296)
+#define __NR_recvmsg (__NR_SYSCALL_BASE + 297)
+#define __NR_semop (__NR_SYSCALL_BASE + 298)
+#define __NR_semget (__NR_SYSCALL_BASE + 299)
+#define __NR_semctl (__NR_SYSCALL_BASE + 300)
+#define __NR_msgsnd (__NR_SYSCALL_BASE + 301)
+#define __NR_msgrcv (__NR_SYSCALL_BASE + 302)
+#define __NR_msgget (__NR_SYSCALL_BASE + 303)
+#define __NR_msgctl (__NR_SYSCALL_BASE + 304)
+#define __NR_shmat (__NR_SYSCALL_BASE + 305)
+#define __NR_shmdt (__NR_SYSCALL_BASE + 306)
+#define __NR_shmget (__NR_SYSCALL_BASE + 307)
+#define __NR_shmctl (__NR_SYSCALL_BASE + 308)
+#define __NR_add_key (__NR_SYSCALL_BASE + 309)
+#define __NR_request_key (__NR_SYSCALL_BASE + 310)
+#define __NR_keyctl (__NR_SYSCALL_BASE + 311)
+#define __NR_semtimedop (__NR_SYSCALL_BASE + 312)
+#define __NR_vserver (__NR_SYSCALL_BASE + 313)
+#define __NR_ioprio_set (__NR_SYSCALL_BASE + 314)
+#define __NR_ioprio_get (__NR_SYSCALL_BASE + 315)
+#define __NR_inotify_init (__NR_SYSCALL_BASE + 316)
+#define __NR_inotify_add_watch (__NR_SYSCALL_BASE + 317)
+#define __NR_inotify_rm_watch (__NR_SYSCALL_BASE + 318)
+#define __NR_mbind (__NR_SYSCALL_BASE + 319)
+#define __NR_get_mempolicy (__NR_SYSCALL_BASE + 320)
+#define __NR_set_mempolicy (__NR_SYSCALL_BASE + 321)
+#define __NR_openat (__NR_SYSCALL_BASE + 322)
+#define __NR_mkdirat (__NR_SYSCALL_BASE + 323)
+#define __NR_mknodat (__NR_SYSCALL_BASE + 324)
+#define __NR_fchownat (__NR_SYSCALL_BASE + 325)
+#define __NR_futimesat (__NR_SYSCALL_BASE + 326)
+#define __NR_fstatat64 (__NR_SYSCALL_BASE + 327)
+#define __NR_unlinkat (__NR_SYSCALL_BASE + 328)
+#define __NR_renameat (__NR_SYSCALL_BASE + 329)
+#define __NR_linkat (__NR_SYSCALL_BASE + 330)
+#define __NR_symlinkat (__NR_SYSCALL_BASE + 331)
+#define __NR_readlinkat (__NR_SYSCALL_BASE + 332)
+#define __NR_fchmodat (__NR_SYSCALL_BASE + 333)
+#define __NR_faccessat (__NR_SYSCALL_BASE + 334)
+#define __NR_pselect6 (__NR_SYSCALL_BASE + 335)
+#define __NR_ppoll (__NR_SYSCALL_BASE + 336)
+#define __NR_unshare (__NR_SYSCALL_BASE + 337)
+#define __NR_set_robust_list (__NR_SYSCALL_BASE + 338)
+#define __NR_get_robust_list (__NR_SYSCALL_BASE + 339)
+#define __NR_splice (__NR_SYSCALL_BASE + 340)
+#define __NR_arm_sync_file_range (__NR_SYSCALL_BASE + 341)
+#define __NR_tee (__NR_SYSCALL_BASE + 342)
+#define __NR_vmsplice (__NR_SYSCALL_BASE + 343)
+#define __NR_move_pages (__NR_SYSCALL_BASE + 344)
+#define __NR_getcpu (__NR_SYSCALL_BASE + 345)
+#define __NR_epoll_pwait (__NR_SYSCALL_BASE + 346)
+#define __NR_kexec_load (__NR_SYSCALL_BASE + 347)
+#define __NR_utimensat (__NR_SYSCALL_BASE + 348)
+#define __NR_signalfd (__NR_SYSCALL_BASE + 349)
+#define __NR_timerfd_create (__NR_SYSCALL_BASE + 350)
+#define __NR_eventfd (__NR_SYSCALL_BASE + 351)
+#define __NR_fallocate (__NR_SYSCALL_BASE + 352)
+#define __NR_timerfd_settime (__NR_SYSCALL_BASE + 353)
+#define __NR_timerfd_gettime (__NR_SYSCALL_BASE + 354)
+#define __NR_signalfd4 (__NR_SYSCALL_BASE + 355)
+#define __NR_eventfd2 (__NR_SYSCALL_BASE + 356)
+#define __NR_epoll_create1 (__NR_SYSCALL_BASE + 357)
+#define __NR_dup3 (__NR_SYSCALL_BASE + 358)
+#define __NR_pipe2 (__NR_SYSCALL_BASE + 359)
+#define __NR_inotify_init1 (__NR_SYSCALL_BASE + 360)
+#define __NR_preadv (__NR_SYSCALL_BASE + 361)
+#define __NR_pwritev (__NR_SYSCALL_BASE + 362)
+#define __NR_rt_tgsigqueueinfo (__NR_SYSCALL_BASE + 363)
+#define __NR_perf_event_open (__NR_SYSCALL_BASE + 364)
+#define __NR_recvmmsg (__NR_SYSCALL_BASE + 365)
+#define __NR_accept4 (__NR_SYSCALL_BASE + 366)
+#define __NR_fanotify_init (__NR_SYSCALL_BASE + 367)
+#define __NR_fanotify_mark (__NR_SYSCALL_BASE + 368)
+#define __NR_prlimit64 (__NR_SYSCALL_BASE + 369)
+#define __NR_name_to_handle_at (__NR_SYSCALL_BASE + 370)
+#define __NR_open_by_handle_at (__NR_SYSCALL_BASE + 371)
+#define __NR_clock_adjtime (__NR_SYSCALL_BASE + 372)
+#define __NR_syncfs (__NR_SYSCALL_BASE + 373)
+#define __NR_sendmmsg (__NR_SYSCALL_BASE + 374)
+#define __NR_setns (__NR_SYSCALL_BASE + 375)
+#define __NR_process_vm_readv (__NR_SYSCALL_BASE + 376)
+#define __NR_process_vm_writev (__NR_SYSCALL_BASE + 377)
+#define __NR_kcmp (__NR_SYSCALL_BASE + 378)
+#define __NR_finit_module (__NR_SYSCALL_BASE + 379)
+#define __NR_sched_setattr (__NR_SYSCALL_BASE + 380)
+#define __NR_sched_getattr (__NR_SYSCALL_BASE + 381)
+#define __NR_renameat2 (__NR_SYSCALL_BASE + 382)
+#define __NR_seccomp (__NR_SYSCALL_BASE + 383)
+#define __NR_getrandom (__NR_SYSCALL_BASE + 384)
+#define __NR_memfd_create (__NR_SYSCALL_BASE + 385)
+#define __NR_bpf (__NR_SYSCALL_BASE + 386)
+#define __NR_execveat (__NR_SYSCALL_BASE + 387)
+#define __NR_userfaultfd (__NR_SYSCALL_BASE + 388)
+#define __NR_membarrier (__NR_SYSCALL_BASE + 389)
+#define __NR_mlock2 (__NR_SYSCALL_BASE + 390)
+#define __NR_copy_file_range (__NR_SYSCALL_BASE + 391)
+#define __NR_preadv2 (__NR_SYSCALL_BASE + 392)
+#define __NR_pwritev2 (__NR_SYSCALL_BASE + 393)
+#define __NR_pkey_mprotect (__NR_SYSCALL_BASE + 394)
+#define __NR_pkey_alloc (__NR_SYSCALL_BASE + 395)
+#define __NR_pkey_free (__NR_SYSCALL_BASE + 396)
+
+#endif /* _ASM_ARM_UNISTD_COMMON_H */
diff --git a/linux-headers/asm-arm/unistd-eabi.h b/linux-headers/asm-arm/unistd-eabi.h
new file mode 100644
index 0000000000..266f1fcdfb
--- /dev/null
+++ b/linux-headers/asm-arm/unistd-eabi.h
@@ -0,0 +1,5 @@
+#ifndef _ASM_ARM_UNISTD_EABI_H
+#define _ASM_ARM_UNISTD_EABI_H 1
+
+
+#endif /* _ASM_ARM_UNISTD_EABI_H */
diff --git a/linux-headers/asm-arm/unistd-oabi.h b/linux-headers/asm-arm/unistd-oabi.h
new file mode 100644
index 0000000000..47d9afb96d
--- /dev/null
+++ b/linux-headers/asm-arm/unistd-oabi.h
@@ -0,0 +1,17 @@
+#ifndef _ASM_ARM_UNISTD_OABI_H
+#define _ASM_ARM_UNISTD_OABI_H 1
+
+#define __NR_time (__NR_SYSCALL_BASE + 13)
+#define __NR_umount (__NR_SYSCALL_BASE + 22)
+#define __NR_stime (__NR_SYSCALL_BASE + 25)
+#define __NR_alarm (__NR_SYSCALL_BASE + 27)
+#define __NR_utime (__NR_SYSCALL_BASE + 30)
+#define __NR_getrlimit (__NR_SYSCALL_BASE + 76)
+#define __NR_select (__NR_SYSCALL_BASE + 82)
+#define __NR_readdir (__NR_SYSCALL_BASE + 89)
+#define __NR_mmap (__NR_SYSCALL_BASE + 90)
+#define __NR_socketcall (__NR_SYSCALL_BASE + 102)
+#define __NR_syscall (__NR_SYSCALL_BASE + 113)
+#define __NR_ipc (__NR_SYSCALL_BASE + 117)
+
+#endif /* _ASM_ARM_UNISTD_OABI_H */
diff --git a/linux-headers/asm-arm/unistd.h b/linux-headers/asm-arm/unistd.h
index ceb5450c81..155571b874 100644
--- a/linux-headers/asm-arm/unistd.h
+++ b/linux-headers/asm-arm/unistd.h
@@ -17,409 +17,14 @@
 
 #if defined(__thumb__) || defined(__ARM_EABI__)
 #define __NR_SYSCALL_BASE	0
+#include <asm/unistd-eabi.h>
 #else
 #define __NR_SYSCALL_BASE	__NR_OABI_SYSCALL_BASE
+#include <asm/unistd-oabi.h>
 #endif
 
-/*
- * This file contains the system call numbers.
- */
-
-#define __NR_restart_syscall		(__NR_SYSCALL_BASE+  0)
-#define __NR_exit			(__NR_SYSCALL_BASE+  1)
-#define __NR_fork			(__NR_SYSCALL_BASE+  2)
-#define __NR_read			(__NR_SYSCALL_BASE+  3)
-#define __NR_write			(__NR_SYSCALL_BASE+  4)
-#define __NR_open			(__NR_SYSCALL_BASE+  5)
-#define __NR_close			(__NR_SYSCALL_BASE+  6)
-					/* 7 was sys_waitpid */
-#define __NR_creat			(__NR_SYSCALL_BASE+  8)
-#define __NR_link			(__NR_SYSCALL_BASE+  9)
-#define __NR_unlink			(__NR_SYSCALL_BASE+ 10)
-#define __NR_execve			(__NR_SYSCALL_BASE+ 11)
-#define __NR_chdir			(__NR_SYSCALL_BASE+ 12)
-#define __NR_time			(__NR_SYSCALL_BASE+ 13)
-#define __NR_mknod			(__NR_SYSCALL_BASE+ 14)
-#define __NR_chmod			(__NR_SYSCALL_BASE+ 15)
-#define __NR_lchown			(__NR_SYSCALL_BASE+ 16)
-					/* 17 was sys_break */
-					/* 18 was sys_stat */
-#define __NR_lseek			(__NR_SYSCALL_BASE+ 19)
-#define __NR_getpid			(__NR_SYSCALL_BASE+ 20)
-#define __NR_mount			(__NR_SYSCALL_BASE+ 21)
-#define __NR_umount			(__NR_SYSCALL_BASE+ 22)
-#define __NR_setuid			(__NR_SYSCALL_BASE+ 23)
-#define __NR_getuid			(__NR_SYSCALL_BASE+ 24)
-#define __NR_stime			(__NR_SYSCALL_BASE+ 25)
-#define __NR_ptrace			(__NR_SYSCALL_BASE+ 26)
-#define __NR_alarm			(__NR_SYSCALL_BASE+ 27)
-					/* 28 was sys_fstat */
-#define __NR_pause			(__NR_SYSCALL_BASE+ 29)
-#define __NR_utime			(__NR_SYSCALL_BASE+ 30)
-					/* 31 was sys_stty */
-					/* 32 was sys_gtty */
-#define __NR_access			(__NR_SYSCALL_BASE+ 33)
-#define __NR_nice			(__NR_SYSCALL_BASE+ 34)
-					/* 35 was sys_ftime */
-#define __NR_sync			(__NR_SYSCALL_BASE+ 36)
-#define __NR_kill			(__NR_SYSCALL_BASE+ 37)
-#define __NR_rename			(__NR_SYSCALL_BASE+ 38)
-#define __NR_mkdir			(__NR_SYSCALL_BASE+ 39)
-#define __NR_rmdir			(__NR_SYSCALL_BASE+ 40)
-#define __NR_dup			(__NR_SYSCALL_BASE+ 41)
-#define __NR_pipe			(__NR_SYSCALL_BASE+ 42)
-#define __NR_times			(__NR_SYSCALL_BASE+ 43)
-					/* 44 was sys_prof */
-#define __NR_brk			(__NR_SYSCALL_BASE+ 45)
-#define __NR_setgid			(__NR_SYSCALL_BASE+ 46)
-#define __NR_getgid			(__NR_SYSCALL_BASE+ 47)
-					/* 48 was sys_signal */
-#define __NR_geteuid			(__NR_SYSCALL_BASE+ 49)
-#define __NR_getegid			(__NR_SYSCALL_BASE+ 50)
-#define __NR_acct			(__NR_SYSCALL_BASE+ 51)
-#define __NR_umount2			(__NR_SYSCALL_BASE+ 52)
-					/* 53 was sys_lock */
-#define __NR_ioctl			(__NR_SYSCALL_BASE+ 54)
-#define __NR_fcntl			(__NR_SYSCALL_BASE+ 55)
-					/* 56 was sys_mpx */
-#define __NR_setpgid			(__NR_SYSCALL_BASE+ 57)
-					/* 58 was sys_ulimit */
-					/* 59 was sys_olduname */
-#define __NR_umask			(__NR_SYSCALL_BASE+ 60)
-#define __NR_chroot			(__NR_SYSCALL_BASE+ 61)
-#define __NR_ustat			(__NR_SYSCALL_BASE+ 62)
-#define __NR_dup2			(__NR_SYSCALL_BASE+ 63)
-#define __NR_getppid			(__NR_SYSCALL_BASE+ 64)
-#define __NR_getpgrp			(__NR_SYSCALL_BASE+ 65)
-#define __NR_setsid			(__NR_SYSCALL_BASE+ 66)
-#define __NR_sigaction			(__NR_SYSCALL_BASE+ 67)
-					/* 68 was sys_sgetmask */
-					/* 69 was sys_ssetmask */
-#define __NR_setreuid			(__NR_SYSCALL_BASE+ 70)
-#define __NR_setregid			(__NR_SYSCALL_BASE+ 71)
-#define __NR_sigsuspend			(__NR_SYSCALL_BASE+ 72)
-#define __NR_sigpending			(__NR_SYSCALL_BASE+ 73)
-#define __NR_sethostname		(__NR_SYSCALL_BASE+ 74)
-#define __NR_setrlimit			(__NR_SYSCALL_BASE+ 75)
-#define __NR_getrlimit			(__NR_SYSCALL_BASE+ 76)	/* Back compat 2GB limited rlimit */
-#define __NR_getrusage			(__NR_SYSCALL_BASE+ 77)
-#define __NR_gettimeofday		(__NR_SYSCALL_BASE+ 78)
-#define __NR_settimeofday		(__NR_SYSCALL_BASE+ 79)
-#define __NR_getgroups			(__NR_SYSCALL_BASE+ 80)
-#define __NR_setgroups			(__NR_SYSCALL_BASE+ 81)
-#define __NR_select			(__NR_SYSCALL_BASE+ 82)
-#define __NR_symlink			(__NR_SYSCALL_BASE+ 83)
-					/* 84 was sys_lstat */
-#define __NR_readlink			(__NR_SYSCALL_BASE+ 85)
-#define __NR_uselib			(__NR_SYSCALL_BASE+ 86)
-#define __NR_swapon			(__NR_SYSCALL_BASE+ 87)
-#define __NR_reboot			(__NR_SYSCALL_BASE+ 88)
-#define __NR_readdir			(__NR_SYSCALL_BASE+ 89)
-#define __NR_mmap			(__NR_SYSCALL_BASE+ 90)
-#define __NR_munmap			(__NR_SYSCALL_BASE+ 91)
-#define __NR_truncate			(__NR_SYSCALL_BASE+ 92)
-#define __NR_ftruncate			(__NR_SYSCALL_BASE+ 93)
-#define __NR_fchmod			(__NR_SYSCALL_BASE+ 94)
-#define __NR_fchown			(__NR_SYSCALL_BASE+ 95)
-#define __NR_getpriority		(__NR_SYSCALL_BASE+ 96)
-#define __NR_setpriority		(__NR_SYSCALL_BASE+ 97)
-					/* 98 was sys_profil */
-#define __NR_statfs			(__NR_SYSCALL_BASE+ 99)
-#define __NR_fstatfs			(__NR_SYSCALL_BASE+100)
-					/* 101 was sys_ioperm */
-#define __NR_socketcall			(__NR_SYSCALL_BASE+102)
-#define __NR_syslog			(__NR_SYSCALL_BASE+103)
-#define __NR_setitimer			(__NR_SYSCALL_BASE+104)
-#define __NR_getitimer			(__NR_SYSCALL_BASE+105)
-#define __NR_stat			(__NR_SYSCALL_BASE+106)
-#define __NR_lstat			(__NR_SYSCALL_BASE+107)
-#define __NR_fstat			(__NR_SYSCALL_BASE+108)
-					/* 109 was sys_uname */
-					/* 110 was sys_iopl */
-#define __NR_vhangup			(__NR_SYSCALL_BASE+111)
-					/* 112 was sys_idle */
-#define __NR_syscall			(__NR_SYSCALL_BASE+113) /* syscall to call a syscall! */
-#define __NR_wait4			(__NR_SYSCALL_BASE+114)
-#define __NR_swapoff			(__NR_SYSCALL_BASE+115)
-#define __NR_sysinfo			(__NR_SYSCALL_BASE+116)
-#define __NR_ipc			(__NR_SYSCALL_BASE+117)
-#define __NR_fsync			(__NR_SYSCALL_BASE+118)
-#define __NR_sigreturn			(__NR_SYSCALL_BASE+119)
-#define __NR_clone			(__NR_SYSCALL_BASE+120)
-#define __NR_setdomainname		(__NR_SYSCALL_BASE+121)
-#define __NR_uname			(__NR_SYSCALL_BASE+122)
-					/* 123 was sys_modify_ldt */
-#define __NR_adjtimex			(__NR_SYSCALL_BASE+124)
-#define __NR_mprotect			(__NR_SYSCALL_BASE+125)
-#define __NR_sigprocmask		(__NR_SYSCALL_BASE+126)
-					/* 127 was sys_create_module */
-#define __NR_init_module		(__NR_SYSCALL_BASE+128)
-#define __NR_delete_module		(__NR_SYSCALL_BASE+129)
-					/* 130 was sys_get_kernel_syms */
-#define __NR_quotactl			(__NR_SYSCALL_BASE+131)
-#define __NR_getpgid			(__NR_SYSCALL_BASE+132)
-#define __NR_fchdir			(__NR_SYSCALL_BASE+133)
-#define __NR_bdflush			(__NR_SYSCALL_BASE+134)
-#define __NR_sysfs			(__NR_SYSCALL_BASE+135)
-#define __NR_personality		(__NR_SYSCALL_BASE+136)
-					/* 137 was sys_afs_syscall */
-#define __NR_setfsuid			(__NR_SYSCALL_BASE+138)
-#define __NR_setfsgid			(__NR_SYSCALL_BASE+139)
-#define __NR__llseek			(__NR_SYSCALL_BASE+140)
-#define __NR_getdents			(__NR_SYSCALL_BASE+141)
-#define __NR__newselect			(__NR_SYSCALL_BASE+142)
-#define __NR_flock			(__NR_SYSCALL_BASE+143)
-#define __NR_msync			(__NR_SYSCALL_BASE+144)
-#define __NR_readv			(__NR_SYSCALL_BASE+145)
-#define __NR_writev			(__NR_SYSCALL_BASE+146)
-#define __NR_getsid			(__NR_SYSCALL_BASE+147)
-#define __NR_fdatasync			(__NR_SYSCALL_BASE+148)
-#define __NR__sysctl			(__NR_SYSCALL_BASE+149)
-#define __NR_mlock			(__NR_SYSCALL_BASE+150)
-#define __NR_munlock			(__NR_SYSCALL_BASE+151)
-#define __NR_mlockall			(__NR_SYSCALL_BASE+152)
-#define __NR_munlockall			(__NR_SYSCALL_BASE+153)
-#define __NR_sched_setparam		(__NR_SYSCALL_BASE+154)
-#define __NR_sched_getparam		(__NR_SYSCALL_BASE+155)
-#define __NR_sched_setscheduler		(__NR_SYSCALL_BASE+156)
-#define __NR_sched_getscheduler		(__NR_SYSCALL_BASE+157)
-#define __NR_sched_yield		(__NR_SYSCALL_BASE+158)
-#define __NR_sched_get_priority_max	(__NR_SYSCALL_BASE+159)
-#define __NR_sched_get_priority_min	(__NR_SYSCALL_BASE+160)
-#define __NR_sched_rr_get_interval	(__NR_SYSCALL_BASE+161)
-#define __NR_nanosleep			(__NR_SYSCALL_BASE+162)
-#define __NR_mremap			(__NR_SYSCALL_BASE+163)
-#define __NR_setresuid			(__NR_SYSCALL_BASE+164)
-#define __NR_getresuid			(__NR_SYSCALL_BASE+165)
-					/* 166 was sys_vm86 */
-					/* 167 was sys_query_module */
-#define __NR_poll			(__NR_SYSCALL_BASE+168)
-#define __NR_nfsservctl			(__NR_SYSCALL_BASE+169)
-#define __NR_setresgid			(__NR_SYSCALL_BASE+170)
-#define __NR_getresgid			(__NR_SYSCALL_BASE+171)
-#define __NR_prctl			(__NR_SYSCALL_BASE+172)
-#define __NR_rt_sigreturn		(__NR_SYSCALL_BASE+173)
-#define __NR_rt_sigaction		(__NR_SYSCALL_BASE+174)
-#define __NR_rt_sigprocmask		(__NR_SYSCALL_BASE+175)
-#define __NR_rt_sigpending		(__NR_SYSCALL_BASE+176)
-#define __NR_rt_sigtimedwait		(__NR_SYSCALL_BASE+177)
-#define __NR_rt_sigqueueinfo		(__NR_SYSCALL_BASE+178)
-#define __NR_rt_sigsuspend		(__NR_SYSCALL_BASE+179)
-#define __NR_pread64			(__NR_SYSCALL_BASE+180)
-#define __NR_pwrite64			(__NR_SYSCALL_BASE+181)
-#define __NR_chown			(__NR_SYSCALL_BASE+182)
-#define __NR_getcwd			(__NR_SYSCALL_BASE+183)
-#define __NR_capget			(__NR_SYSCALL_BASE+184)
-#define __NR_capset			(__NR_SYSCALL_BASE+185)
-#define __NR_sigaltstack		(__NR_SYSCALL_BASE+186)
-#define __NR_sendfile			(__NR_SYSCALL_BASE+187)
-					/* 188 reserved */
-					/* 189 reserved */
-#define __NR_vfork			(__NR_SYSCALL_BASE+190)
-#define __NR_ugetrlimit			(__NR_SYSCALL_BASE+191)	/* SuS compliant getrlimit */
-#define __NR_mmap2			(__NR_SYSCALL_BASE+192)
-#define __NR_truncate64			(__NR_SYSCALL_BASE+193)
-#define __NR_ftruncate64		(__NR_SYSCALL_BASE+194)
-#define __NR_stat64			(__NR_SYSCALL_BASE+195)
-#define __NR_lstat64			(__NR_SYSCALL_BASE+196)
-#define __NR_fstat64			(__NR_SYSCALL_BASE+197)
-#define __NR_lchown32			(__NR_SYSCALL_BASE+198)
-#define __NR_getuid32			(__NR_SYSCALL_BASE+199)
-#define __NR_getgid32			(__NR_SYSCALL_BASE+200)
-#define __NR_geteuid32			(__NR_SYSCALL_BASE+201)
-#define __NR_getegid32			(__NR_SYSCALL_BASE+202)
-#define __NR_setreuid32			(__NR_SYSCALL_BASE+203)
-#define __NR_setregid32			(__NR_SYSCALL_BASE+204)
-#define __NR_getgroups32		(__NR_SYSCALL_BASE+205)
-#define __NR_setgroups32		(__NR_SYSCALL_BASE+206)
-#define __NR_fchown32			(__NR_SYSCALL_BASE+207)
-#define __NR_setresuid32		(__NR_SYSCALL_BASE+208)
-#define __NR_getresuid32		(__NR_SYSCALL_BASE+209)
-#define __NR_setresgid32		(__NR_SYSCALL_BASE+210)
-#define __NR_getresgid32		(__NR_SYSCALL_BASE+211)
-#define __NR_chown32			(__NR_SYSCALL_BASE+212)
-#define __NR_setuid32			(__NR_SYSCALL_BASE+213)
-#define __NR_setgid32			(__NR_SYSCALL_BASE+214)
-#define __NR_setfsuid32			(__NR_SYSCALL_BASE+215)
-#define __NR_setfsgid32			(__NR_SYSCALL_BASE+216)
-#define __NR_getdents64			(__NR_SYSCALL_BASE+217)
-#define __NR_pivot_root			(__NR_SYSCALL_BASE+218)
-#define __NR_mincore			(__NR_SYSCALL_BASE+219)
-#define __NR_madvise			(__NR_SYSCALL_BASE+220)
-#define __NR_fcntl64			(__NR_SYSCALL_BASE+221)
-					/* 222 for tux */
-					/* 223 is unused */
-#define __NR_gettid			(__NR_SYSCALL_BASE+224)
-#define __NR_readahead			(__NR_SYSCALL_BASE+225)
-#define __NR_setxattr			(__NR_SYSCALL_BASE+226)
-#define __NR_lsetxattr			(__NR_SYSCALL_BASE+227)
-#define __NR_fsetxattr			(__NR_SYSCALL_BASE+228)
-#define __NR_getxattr			(__NR_SYSCALL_BASE+229)
-#define __NR_lgetxattr			(__NR_SYSCALL_BASE+230)
-#define __NR_fgetxattr			(__NR_SYSCALL_BASE+231)
-#define __NR_listxattr			(__NR_SYSCALL_BASE+232)
-#define __NR_llistxattr			(__NR_SYSCALL_BASE+233)
-#define __NR_flistxattr			(__NR_SYSCALL_BASE+234)
-#define __NR_removexattr		(__NR_SYSCALL_BASE+235)
-#define __NR_lremovexattr		(__NR_SYSCALL_BASE+236)
-#define __NR_fremovexattr		(__NR_SYSCALL_BASE+237)
-#define __NR_tkill			(__NR_SYSCALL_BASE+238)
-#define __NR_sendfile64			(__NR_SYSCALL_BASE+239)
-#define __NR_futex			(__NR_SYSCALL_BASE+240)
-#define __NR_sched_setaffinity		(__NR_SYSCALL_BASE+241)
-#define __NR_sched_getaffinity		(__NR_SYSCALL_BASE+242)
-#define __NR_io_setup			(__NR_SYSCALL_BASE+243)
-#define __NR_io_destroy			(__NR_SYSCALL_BASE+244)
-#define __NR_io_getevents		(__NR_SYSCALL_BASE+245)
-#define __NR_io_submit			(__NR_SYSCALL_BASE+246)
-#define __NR_io_cancel			(__NR_SYSCALL_BASE+247)
-#define __NR_exit_group			(__NR_SYSCALL_BASE+248)
-#define __NR_lookup_dcookie		(__NR_SYSCALL_BASE+249)
-#define __NR_epoll_create		(__NR_SYSCALL_BASE+250)
-#define __NR_epoll_ctl			(__NR_SYSCALL_BASE+251)
-#define __NR_epoll_wait			(__NR_SYSCALL_BASE+252)
-#define __NR_remap_file_pages		(__NR_SYSCALL_BASE+253)
-					/* 254 for set_thread_area */
-					/* 255 for get_thread_area */
-#define __NR_set_tid_address		(__NR_SYSCALL_BASE+256)
-#define __NR_timer_create		(__NR_SYSCALL_BASE+257)
-#define __NR_timer_settime		(__NR_SYSCALL_BASE+258)
-#define __NR_timer_gettime		(__NR_SYSCALL_BASE+259)
-#define __NR_timer_getoverrun		(__NR_SYSCALL_BASE+260)
-#define __NR_timer_delete		(__NR_SYSCALL_BASE+261)
-#define __NR_clock_settime		(__NR_SYSCALL_BASE+262)
-#define __NR_clock_gettime		(__NR_SYSCALL_BASE+263)
-#define __NR_clock_getres		(__NR_SYSCALL_BASE+264)
-#define __NR_clock_nanosleep		(__NR_SYSCALL_BASE+265)
-#define __NR_statfs64			(__NR_SYSCALL_BASE+266)
-#define __NR_fstatfs64			(__NR_SYSCALL_BASE+267)
-#define __NR_tgkill			(__NR_SYSCALL_BASE+268)
-#define __NR_utimes			(__NR_SYSCALL_BASE+269)
-#define __NR_arm_fadvise64_64		(__NR_SYSCALL_BASE+270)
-#define __NR_pciconfig_iobase		(__NR_SYSCALL_BASE+271)
-#define __NR_pciconfig_read		(__NR_SYSCALL_BASE+272)
-#define __NR_pciconfig_write		(__NR_SYSCALL_BASE+273)
-#define __NR_mq_open			(__NR_SYSCALL_BASE+274)
-#define __NR_mq_unlink			(__NR_SYSCALL_BASE+275)
-#define __NR_mq_timedsend		(__NR_SYSCALL_BASE+276)
-#define __NR_mq_timedreceive		(__NR_SYSCALL_BASE+277)
-#define __NR_mq_notify			(__NR_SYSCALL_BASE+278)
-#define __NR_mq_getsetattr		(__NR_SYSCALL_BASE+279)
-#define __NR_waitid			(__NR_SYSCALL_BASE+280)
-#define __NR_socket			(__NR_SYSCALL_BASE+281)
-#define __NR_bind			(__NR_SYSCALL_BASE+282)
-#define __NR_connect			(__NR_SYSCALL_BASE+283)
-#define __NR_listen			(__NR_SYSCALL_BASE+284)
-#define __NR_accept			(__NR_SYSCALL_BASE+285)
-#define __NR_getsockname		(__NR_SYSCALL_BASE+286)
-#define __NR_getpeername		(__NR_SYSCALL_BASE+287)
-#define __NR_socketpair			(__NR_SYSCALL_BASE+288)
-#define __NR_send			(__NR_SYSCALL_BASE+289)
-#define __NR_sendto			(__NR_SYSCALL_BASE+290)
-#define __NR_recv			(__NR_SYSCALL_BASE+291)
-#define __NR_recvfrom			(__NR_SYSCALL_BASE+292)
-#define __NR_shutdown			(__NR_SYSCALL_BASE+293)
-#define __NR_setsockopt			(__NR_SYSCALL_BASE+294)
-#define __NR_getsockopt			(__NR_SYSCALL_BASE+295)
-#define __NR_sendmsg			(__NR_SYSCALL_BASE+296)
-#define __NR_recvmsg			(__NR_SYSCALL_BASE+297)
-#define __NR_semop			(__NR_SYSCALL_BASE+298)
-#define __NR_semget			(__NR_SYSCALL_BASE+299)
-#define __NR_semctl			(__NR_SYSCALL_BASE+300)
-#define __NR_msgsnd			(__NR_SYSCALL_BASE+301)
-#define __NR_msgrcv			(__NR_SYSCALL_BASE+302)
-#define __NR_msgget			(__NR_SYSCALL_BASE+303)
-#define __NR_msgctl			(__NR_SYSCALL_BASE+304)
-#define __NR_shmat			(__NR_SYSCALL_BASE+305)
-#define __NR_shmdt			(__NR_SYSCALL_BASE+306)
-#define __NR_shmget			(__NR_SYSCALL_BASE+307)
-#define __NR_shmctl			(__NR_SYSCALL_BASE+308)
-#define __NR_add_key			(__NR_SYSCALL_BASE+309)
-#define __NR_request_key		(__NR_SYSCALL_BASE+310)
-#define __NR_keyctl			(__NR_SYSCALL_BASE+311)
-#define __NR_semtimedop			(__NR_SYSCALL_BASE+312)
-#define __NR_vserver			(__NR_SYSCALL_BASE+313)
-#define __NR_ioprio_set			(__NR_SYSCALL_BASE+314)
-#define __NR_ioprio_get			(__NR_SYSCALL_BASE+315)
-#define __NR_inotify_init		(__NR_SYSCALL_BASE+316)
-#define __NR_inotify_add_watch		(__NR_SYSCALL_BASE+317)
-#define __NR_inotify_rm_watch		(__NR_SYSCALL_BASE+318)
-#define __NR_mbind			(__NR_SYSCALL_BASE+319)
-#define __NR_get_mempolicy		(__NR_SYSCALL_BASE+320)
-#define __NR_set_mempolicy		(__NR_SYSCALL_BASE+321)
-#define __NR_openat			(__NR_SYSCALL_BASE+322)
-#define __NR_mkdirat			(__NR_SYSCALL_BASE+323)
-#define __NR_mknodat			(__NR_SYSCALL_BASE+324)
-#define __NR_fchownat			(__NR_SYSCALL_BASE+325)
-#define __NR_futimesat			(__NR_SYSCALL_BASE+326)
-#define __NR_fstatat64			(__NR_SYSCALL_BASE+327)
-#define __NR_unlinkat			(__NR_SYSCALL_BASE+328)
-#define __NR_renameat			(__NR_SYSCALL_BASE+329)
-#define __NR_linkat			(__NR_SYSCALL_BASE+330)
-#define __NR_symlinkat			(__NR_SYSCALL_BASE+331)
-#define __NR_readlinkat			(__NR_SYSCALL_BASE+332)
-#define __NR_fchmodat			(__NR_SYSCALL_BASE+333)
-#define __NR_faccessat			(__NR_SYSCALL_BASE+334)
-#define __NR_pselect6			(__NR_SYSCALL_BASE+335)
-#define __NR_ppoll			(__NR_SYSCALL_BASE+336)
-#define __NR_unshare			(__NR_SYSCALL_BASE+337)
-#define __NR_set_robust_list		(__NR_SYSCALL_BASE+338)
-#define __NR_get_robust_list		(__NR_SYSCALL_BASE+339)
-#define __NR_splice			(__NR_SYSCALL_BASE+340)
-#define __NR_arm_sync_file_range	(__NR_SYSCALL_BASE+341)
+#include <asm/unistd-common.h>
 #define __NR_sync_file_range2		__NR_arm_sync_file_range
-#define __NR_tee			(__NR_SYSCALL_BASE+342)
-#define __NR_vmsplice			(__NR_SYSCALL_BASE+343)
-#define __NR_move_pages			(__NR_SYSCALL_BASE+344)
-#define __NR_getcpu			(__NR_SYSCALL_BASE+345)
-#define __NR_epoll_pwait		(__NR_SYSCALL_BASE+346)
-#define __NR_kexec_load			(__NR_SYSCALL_BASE+347)
-#define __NR_utimensat			(__NR_SYSCALL_BASE+348)
-#define __NR_signalfd			(__NR_SYSCALL_BASE+349)
-#define __NR_timerfd_create		(__NR_SYSCALL_BASE+350)
-#define __NR_eventfd			(__NR_SYSCALL_BASE+351)
-#define __NR_fallocate			(__NR_SYSCALL_BASE+352)
-#define __NR_timerfd_settime		(__NR_SYSCALL_BASE+353)
-#define __NR_timerfd_gettime		(__NR_SYSCALL_BASE+354)
-#define __NR_signalfd4			(__NR_SYSCALL_BASE+355)
-#define __NR_eventfd2			(__NR_SYSCALL_BASE+356)
-#define __NR_epoll_create1		(__NR_SYSCALL_BASE+357)
-#define __NR_dup3			(__NR_SYSCALL_BASE+358)
-#define __NR_pipe2			(__NR_SYSCALL_BASE+359)
-#define __NR_inotify_init1		(__NR_SYSCALL_BASE+360)
-#define __NR_preadv			(__NR_SYSCALL_BASE+361)
-#define __NR_pwritev			(__NR_SYSCALL_BASE+362)
-#define __NR_rt_tgsigqueueinfo		(__NR_SYSCALL_BASE+363)
-#define __NR_perf_event_open		(__NR_SYSCALL_BASE+364)
-#define __NR_recvmmsg			(__NR_SYSCALL_BASE+365)
-#define __NR_accept4			(__NR_SYSCALL_BASE+366)
-#define __NR_fanotify_init		(__NR_SYSCALL_BASE+367)
-#define __NR_fanotify_mark		(__NR_SYSCALL_BASE+368)
-#define __NR_prlimit64			(__NR_SYSCALL_BASE+369)
-#define __NR_name_to_handle_at		(__NR_SYSCALL_BASE+370)
-#define __NR_open_by_handle_at		(__NR_SYSCALL_BASE+371)
-#define __NR_clock_adjtime		(__NR_SYSCALL_BASE+372)
-#define __NR_syncfs			(__NR_SYSCALL_BASE+373)
-#define __NR_sendmmsg			(__NR_SYSCALL_BASE+374)
-#define __NR_setns			(__NR_SYSCALL_BASE+375)
-#define __NR_process_vm_readv		(__NR_SYSCALL_BASE+376)
-#define __NR_process_vm_writev		(__NR_SYSCALL_BASE+377)
-#define __NR_kcmp			(__NR_SYSCALL_BASE+378)
-#define __NR_finit_module		(__NR_SYSCALL_BASE+379)
-#define __NR_sched_setattr		(__NR_SYSCALL_BASE+380)
-#define __NR_sched_getattr		(__NR_SYSCALL_BASE+381)
-#define __NR_renameat2			(__NR_SYSCALL_BASE+382)
-#define __NR_seccomp			(__NR_SYSCALL_BASE+383)
-#define __NR_getrandom			(__NR_SYSCALL_BASE+384)
-#define __NR_memfd_create		(__NR_SYSCALL_BASE+385)
-#define __NR_bpf			(__NR_SYSCALL_BASE+386)
-#define __NR_execveat			(__NR_SYSCALL_BASE+387)
-#define __NR_userfaultfd		(__NR_SYSCALL_BASE+388)
-#define __NR_membarrier			(__NR_SYSCALL_BASE+389)
-#define __NR_mlock2			(__NR_SYSCALL_BASE+390)
-#define __NR_copy_file_range		(__NR_SYSCALL_BASE+391)
-#define __NR_preadv2			(__NR_SYSCALL_BASE+392)
-#define __NR_pwritev2			(__NR_SYSCALL_BASE+393)
 
 /*
  * The following SWIs are ARM private.
@@ -431,22 +36,4 @@
 #define __ARM_NR_usr32			(__ARM_NR_BASE+4)
 #define __ARM_NR_set_tls		(__ARM_NR_BASE+5)
 
-/*
- * The following syscalls are obsolete and no longer available for EABI.
- */
-#if defined(__ARM_EABI__)
-#undef __NR_time
-#undef __NR_umount
-#undef __NR_stime
-#undef __NR_alarm
-#undef __NR_utime
-#undef __NR_getrlimit
-#undef __NR_select
-#undef __NR_readdir
-#undef __NR_mmap
-#undef __NR_socketcall
-#undef __NR_syscall
-#undef __NR_ipc
-#endif
-
 #endif /* __ASM_ARM_UNISTD_H */
diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h
index fd5a2761a5..651ec30040 100644
--- a/linux-headers/asm-arm64/kvm.h
+++ b/linux-headers/asm-arm64/kvm.h
@@ -201,10 +201,23 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS	2
 #define   KVM_DEV_ARM_VGIC_CPUID_SHIFT	32
 #define   KVM_DEV_ARM_VGIC_CPUID_MASK	(0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
+			(0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define   KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff)
 #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS	3
 #define KVM_DEV_ARM_VGIC_GRP_CTRL	4
+#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
+#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
+#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT	10
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
+			(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK	0x3ff
+#define VGIC_LEVEL_INFO_LINE_LEVEL	0
+
 #define   KVM_DEV_ARM_VGIC_CTRL_INIT	0
 
 /* Device Control API on vcpu fd */
diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h
index c93cf35ce3..4edbe4bb0e 100644
--- a/linux-headers/asm-powerpc/kvm.h
+++ b/linux-headers/asm-powerpc/kvm.h
@@ -413,6 +413,26 @@ struct kvm_get_htab_header {
 	__u16	n_invalid;
 };
 
+/* For KVM_PPC_CONFIGURE_V3_MMU */
+struct kvm_ppc_mmuv3_cfg {
+	__u64	flags;
+	__u64	process_table;	/* second doubleword of partition table entry */
+};
+
+/* Flag values for KVM_PPC_CONFIGURE_V3_MMU */
+#define KVM_PPC_MMUV3_RADIX	1	/* 1 = radix mode, 0 = HPT */
+#define KVM_PPC_MMUV3_GTSE	2	/* global translation shootdown enb. */
+
+/* For KVM_PPC_GET_RMMU_INFO */
+struct kvm_ppc_rmmu_info {
+	struct kvm_ppc_radix_geom {
+		__u8	page_shift;
+		__u8	level_bits[4];
+		__u8	pad[3];
+	}	geometries[8];
+	__u32	ap_encodings[8];
+};
+
 /* Per-vcpu XICS interrupt controller state */
 #define KVM_REG_PPC_ICP_STATE	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
 
@@ -573,6 +593,10 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_SPRG9	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba)
 #define KVM_REG_PPC_DBSR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbb)
 
+/* POWER9 registers */
+#define KVM_REG_PPC_TIDR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc)
+#define KVM_REG_PPC_PSSCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd)
+
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
  */
@@ -596,6 +620,7 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_TM_VSCR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U32 | 0x67)
 #define KVM_REG_PPC_TM_DSCR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x68)
 #define KVM_REG_PPC_TM_TAR	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x69)
+#define KVM_REG_PPC_TM_XER	(KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x6a)
 
 /* PPC64 eXternal Interrupt Controller Specification */
 #define KVM_DEV_XICS_GRP_SOURCES	1	/* 64-bit source attributes */
@@ -608,5 +633,7 @@ struct kvm_get_htab_header {
 #define  KVM_XICS_LEVEL_SENSITIVE	(1ULL << 40)
 #define  KVM_XICS_MASKED		(1ULL << 41)
 #define  KVM_XICS_PENDING		(1ULL << 42)
+#define  KVM_XICS_PRESENTED		(1ULL << 43)
+#define  KVM_XICS_QUEUED		(1ULL << 44)
 
 #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/linux-headers/asm-powerpc/unistd.h b/linux-headers/asm-powerpc/unistd.h
index 1e66eba4c6..598043c7b6 100644
--- a/linux-headers/asm-powerpc/unistd.h
+++ b/linux-headers/asm-powerpc/unistd.h
@@ -392,5 +392,6 @@
 #define __NR_copy_file_range	379
 #define __NR_preadv2		380
 #define __NR_pwritev2		381
+#define __NR_kexec_file_load	382
 
 #endif /* _ASM_POWERPC_UNISTD_H_ */
diff --git a/linux-headers/asm-x86/kvm_para.h b/linux-headers/asm-x86/kvm_para.h
index e41c5c1a28..3a5397988e 100644
--- a/linux-headers/asm-x86/kvm_para.h
+++ b/linux-headers/asm-x86/kvm_para.h
@@ -45,7 +45,18 @@ struct kvm_steal_time {
 	__u64 steal;
 	__u32 version;
 	__u32 flags;
-	__u32 pad[12];
+	__u8  preempted;
+	__u8  u8_pad[3];
+	__u32 pad[11];
+};
+
+#define KVM_CLOCK_PAIRING_WALLCLOCK 0
+struct kvm_clock_pairing {
+	__s64 sec;
+	__s64 nsec;
+	__u64 tsc;
+	__u32 flags;
+	__u32 pad[9];
 };
 
 #define KVM_STEAL_ALIGNMENT_BITS 5
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index bb0ed71223..4e082a81b4 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -218,7 +218,8 @@ struct kvm_hyperv_exit {
 struct kvm_run {
 	/* in */
 	__u8 request_interrupt_window;
-	__u8 padding1[7];
+	__u8 immediate_exit;
+	__u8 padding1[6];
 
 	/* out */
 	__u32 exit_reason;
@@ -651,6 +652,9 @@ struct kvm_enable_cap {
 };
 
 /* for KVM_PPC_GET_PVINFO */
+
+#define KVM_PPC_PVINFO_FLAGS_EV_IDLE   (1<<0)
+
 struct kvm_ppc_pvinfo {
 	/* out */
 	__u32 flags;
@@ -682,7 +686,12 @@ struct kvm_ppc_smmu_info {
 	struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
 };
 
-#define KVM_PPC_PVINFO_FLAGS_EV_IDLE   (1<<0)
+/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */
+struct kvm_ppc_resize_hpt {
+	__u64 flags;
+	__u32 shift;
+	__u32 pad;
+};
 
 #define KVMIO 0xAE
 
@@ -870,6 +879,10 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_S390_USER_INSTR0 130
 #define KVM_CAP_MSI_DEVID 131
 #define KVM_CAP_PPC_HTM 132
+#define KVM_CAP_SPAPR_RESIZE_HPT 133
+#define KVM_CAP_PPC_MMU_RADIX 134
+#define KVM_CAP_PPC_MMU_HASH_V3 135
+#define KVM_CAP_IMMEDIATE_EXIT 136
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1186,6 +1199,13 @@ struct kvm_s390_ucas_mapping {
 #define KVM_ARM_SET_DEVICE_ADDR	  _IOW(KVMIO,  0xab, struct kvm_arm_device_addr)
 /* Available with KVM_CAP_PPC_RTAS */
 #define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO,  0xac, struct kvm_rtas_token_args)
+/* Available with KVM_CAP_SPAPR_RESIZE_HPT */
+#define KVM_PPC_RESIZE_HPT_PREPARE _IOR(KVMIO, 0xad, struct kvm_ppc_resize_hpt)
+#define KVM_PPC_RESIZE_HPT_COMMIT  _IOR(KVMIO, 0xae, struct kvm_ppc_resize_hpt)
+/* Available with KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3 */
+#define KVM_PPC_CONFIGURE_V3_MMU  _IOW(KVMIO,  0xaf, struct kvm_ppc_mmuv3_cfg)
+/* Available with KVM_CAP_PPC_RADIX_MMU */
+#define KVM_PPC_GET_RMMU_INFO	  _IOW(KVMIO,  0xb0, struct kvm_ppc_rmmu_info)
 
 /* ioctl for vm fd */
 #define KVM_CREATE_DEVICE	  _IOWR(KVMIO,  0xe0, struct kvm_create_device)
diff --git a/linux-headers/linux/kvm_para.h b/linux-headers/linux/kvm_para.h
index e61661edf3..15b24ff6cf 100644
--- a/linux-headers/linux/kvm_para.h
+++ b/linux-headers/linux/kvm_para.h
@@ -14,6 +14,7 @@
 #define KVM_EFAULT		EFAULT
 #define KVM_E2BIG		E2BIG
 #define KVM_EPERM		EPERM
+#define KVM_EOPNOTSUPP		95
 
 #define KVM_HC_VAPIC_POLL_IRQ		1
 #define KVM_HC_MMU_OP			2
@@ -23,6 +24,7 @@
 #define KVM_HC_MIPS_GET_CLOCK_FREQ	6
 #define KVM_HC_MIPS_EXIT_VM		7
 #define KVM_HC_MIPS_CONSOLE_OUTPUT	8
+#define KVM_HC_CLOCK_PAIRING		9
 
 /*
  * hypercalls use architecture specific
diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h
index 19e8453249..2ed5dc3775 100644
--- a/linux-headers/linux/userfaultfd.h
+++ b/linux-headers/linux/userfaultfd.h
@@ -11,13 +11,18 @@
 
 #include <linux/types.h>
 
-#define UFFD_API ((__u64)0xAA)
 /*
- * After implementing the respective features it will become:
- * #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \
- *			      UFFD_FEATURE_EVENT_FORK)
+ * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and
+ * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR.  In
+ * userfaultfd.h we assumed the kernel was reading (instead _IOC_READ
+ * means the userland is reading).
  */
-#define UFFD_API_FEATURES (0)
+#define UFFD_API ((__u64)0xAA)
+#define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_FORK |		\
+			   UFFD_FEATURE_EVENT_REMAP |		\
+			   UFFD_FEATURE_EVENT_MADVDONTNEED |	\
+			   UFFD_FEATURE_MISSING_HUGETLBFS |	\
+			   UFFD_FEATURE_MISSING_SHMEM)
 #define UFFD_API_IOCTLS				\
 	((__u64)1 << _UFFDIO_REGISTER |		\
 	 (__u64)1 << _UFFDIO_UNREGISTER |	\
@@ -26,6 +31,9 @@
 	((__u64)1 << _UFFDIO_WAKE |		\
 	 (__u64)1 << _UFFDIO_COPY |		\
 	 (__u64)1 << _UFFDIO_ZEROPAGE)
+#define UFFD_API_RANGE_IOCTLS_BASIC		\
+	((__u64)1 << _UFFDIO_WAKE |		\
+	 (__u64)1 << _UFFDIO_COPY)
 
 /*
  * Valid ioctl command number range with this API is from 0x00 to
@@ -72,6 +80,21 @@ struct uffd_msg {
 		} pagefault;
 
 		struct {
+			__u32	ufd;
+		} fork;
+
+		struct {
+			__u64	from;
+			__u64	to;
+			__u64	len;
+		} remap;
+
+		struct {
+			__u64	start;
+			__u64	end;
+		} madv_dn;
+
+		struct {
 			/* unused reserved fields */
 			__u64	reserved1;
 			__u64	reserved2;
@@ -84,9 +107,9 @@ struct uffd_msg {
  * Start at 0x12 and not at 0 to be more strict against bugs.
  */
 #define UFFD_EVENT_PAGEFAULT	0x12
-#if 0 /* not available yet */
 #define UFFD_EVENT_FORK		0x13
-#endif
+#define UFFD_EVENT_REMAP	0x14
+#define UFFD_EVENT_MADVDONTNEED	0x15
 
 /* flags for UFFD_EVENT_PAGEFAULT */
 #define UFFD_PAGEFAULT_FLAG_WRITE	(1<<0)	/* If this was a write fault */
@@ -104,11 +127,37 @@ struct uffdio_api {
 	 * Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE
 	 * are to be considered implicitly always enabled in all kernels as
 	 * long as the uffdio_api.api requested matches UFFD_API.
+	 *
+	 * UFFD_FEATURE_MISSING_HUGETLBFS means an UFFDIO_REGISTER
+	 * with UFFDIO_REGISTER_MODE_MISSING mode will succeed on
+	 * hugetlbfs virtual memory ranges. Adding or not adding
+	 * UFFD_FEATURE_MISSING_HUGETLBFS to uffdio_api.features has
+	 * no real functional effect after UFFDIO_API returns, but
+	 * it's only useful for an initial feature set probe at
+	 * UFFDIO_API time. There are two ways to use it:
+	 *
+	 * 1) by adding UFFD_FEATURE_MISSING_HUGETLBFS to the
+	 *    uffdio_api.features before calling UFFDIO_API, an error
+	 *    will be returned by UFFDIO_API on a kernel without
+	 *    hugetlbfs missing support
+	 *
+	 * 2) the UFFD_FEATURE_MISSING_HUGETLBFS can not be added in
+	 *    uffdio_api.features and instead it will be set by the
+	 *    kernel in the uffdio_api.features if the kernel supports
+	 *    it, so userland can later check if the feature flag is
+	 *    present in uffdio_api.features after UFFDIO_API
+	 *    succeeded.
+	 *
+	 * UFFD_FEATURE_MISSING_SHMEM works the same as
+	 * UFFD_FEATURE_MISSING_HUGETLBFS, but it applies to shmem
+	 * (i.e. tmpfs and other shmem based APIs).
 	 */
-#if 0 /* not available yet */
 #define UFFD_FEATURE_PAGEFAULT_FLAG_WP		(1<<0)
 #define UFFD_FEATURE_EVENT_FORK			(1<<1)
-#endif
+#define UFFD_FEATURE_EVENT_REMAP		(1<<2)
+#define UFFD_FEATURE_EVENT_MADVDONTNEED		(1<<3)
+#define UFFD_FEATURE_MISSING_HUGETLBFS		(1<<4)
+#define UFFD_FEATURE_MISSING_SHMEM		(1<<5)
 	__u64 features;
 
 	__u64 ioctls;
diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index 759b850a3e..531cb2eda9 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -203,6 +203,16 @@ struct vfio_device_info {
 };
 #define VFIO_DEVICE_GET_INFO		_IO(VFIO_TYPE, VFIO_BASE + 7)
 
+/*
+ * Vendor driver using Mediated device framework should provide device_api
+ * attribute in supported type attribute groups. Device API string should be one
+ * of the following corresponding to device flags in vfio_device_info structure.
+ */
+
+#define VFIO_DEVICE_API_PCI_STRING		"vfio-pci"
+#define VFIO_DEVICE_API_PLATFORM_STRING		"vfio-platform"
+#define VFIO_DEVICE_API_AMBA_STRING		"vfio-amba"
+
 /**
  * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
  *				       struct vfio_region_info)
diff --git a/linux-user/main.c b/linux-user/main.c
index 4fd49ce6b6..10a3bb3a12 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -574,6 +574,7 @@ void cpu_loop(CPUARMState *env)
         switch(trapnr) {
         case EXCP_UDEF:
         case EXCP_NOCP:
+        case EXCP_INVSTATE:
             {
                 TaskState *ts = cs->opaque;
                 uint32_t opcode;
@@ -1712,10 +1713,12 @@ void cpu_loop(CPUPPCState *env)
              * in syscalls.
              */
             env->crf[0] &= ~0x1;
+            env->nip += 4;
             ret = do_syscall(env, env->gpr[0], env->gpr[3], env->gpr[4],
                              env->gpr[5], env->gpr[6], env->gpr[7],
                              env->gpr[8], 0, 0);
             if (ret == -TARGET_ERESTARTSYS) {
+                env->nip -= 4;
                 break;
             }
             if (ret == (target_ulong)(-TARGET_QEMU_ESIGRETURN)) {
@@ -1723,7 +1726,6 @@ void cpu_loop(CPUPPCState *env)
                    Avoid corrupting register state.  */
                 break;
             }
-            env->nip += 4;
             if (ret > (target_ulong)(-515)) {
                 env->crf[0] |= 0x1;
                 ret = -ret;
diff --git a/linux-user/signal.c b/linux-user/signal.c
index 8209539555..a67db04e1a 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -254,7 +254,7 @@ int do_sigprocmask(int how, const sigset_t *set, sigset_t *oldset)
 }
 
 #if !defined(TARGET_OPENRISC) && !defined(TARGET_UNICORE32) && \
-    !defined(TARGET_X86_64) && !defined(TARGET_NIOS2)
+    !defined(TARGET_NIOS2)
 /* Just set the guest's signal mask to the specified value; the
  * caller is assumed to have called block_signals() already.
  */
@@ -512,7 +512,7 @@ void signal_init(void)
     }
 }
 
-#if !(defined(TARGET_X86_64) || defined(TARGET_UNICORE32))
+#ifndef TARGET_UNICORE32
 /* Force a synchronously taken signal. The kernel force_sig() function
  * also forces the signal to "not blocked, not ignored", but for QEMU
  * that work is done in process_pending_signals().
@@ -819,9 +819,8 @@ int do_sigaction(int sig, const struct target_sigaction *act,
     return ret;
 }
 
-#if defined(TARGET_I386) && TARGET_ABI_BITS == 32
-
-/* from the Linux kernel */
+#if defined(TARGET_I386)
+/* from the Linux kernel - /arch/x86/include/uapi/asm/sigcontext.h */
 
 struct target_fpreg {
     uint16_t significand[4];
@@ -835,58 +834,120 @@ struct target_fpxreg {
 };
 
 struct target_xmmreg {
-    abi_ulong element[4];
+    uint32_t element[4];
 };
 
-struct target_fpstate {
+struct target_fpstate_32 {
     /* Regular FPU environment */
-    abi_ulong cw;
-    abi_ulong sw;
-    abi_ulong tag;
-    abi_ulong ipoff;
-    abi_ulong cssel;
-    abi_ulong dataoff;
-    abi_ulong datasel;
-    struct target_fpreg _st[8];
+    uint32_t cw;
+    uint32_t sw;
+    uint32_t tag;
+    uint32_t ipoff;
+    uint32_t cssel;
+    uint32_t dataoff;
+    uint32_t datasel;
+    struct target_fpreg st[8];
     uint16_t  status;
     uint16_t  magic;          /* 0xffff = regular FPU data only */
 
     /* FXSR FPU environment */
-    abi_ulong _fxsr_env[6];   /* FXSR FPU env is ignored */
-    abi_ulong mxcsr;
-    abi_ulong reserved;
-    struct target_fpxreg _fxsr_st[8]; /* FXSR FPU reg data is ignored */
-    struct target_xmmreg _xmm[8];
-    abi_ulong padding[56];
+    uint32_t _fxsr_env[6];   /* FXSR FPU env is ignored */
+    uint32_t mxcsr;
+    uint32_t reserved;
+    struct target_fpxreg fxsr_st[8]; /* FXSR FPU reg data is ignored */
+    struct target_xmmreg xmm[8];
+    uint32_t padding[56];
 };
 
-#define X86_FXSR_MAGIC		0x0000
+struct target_fpstate_64 {
+    /* FXSAVE format */
+    uint16_t cw;
+    uint16_t sw;
+    uint16_t twd;
+    uint16_t fop;
+    uint64_t rip;
+    uint64_t rdp;
+    uint32_t mxcsr;
+    uint32_t mxcsr_mask;
+    uint32_t st_space[32];
+    uint32_t xmm_space[64];
+    uint32_t reserved[24];
+};
 
-struct target_sigcontext {
+#ifndef TARGET_X86_64
+# define target_fpstate target_fpstate_32
+#else
+# define target_fpstate target_fpstate_64
+#endif
+
+struct target_sigcontext_32 {
     uint16_t gs, __gsh;
     uint16_t fs, __fsh;
     uint16_t es, __esh;
     uint16_t ds, __dsh;
-    abi_ulong edi;
-    abi_ulong esi;
-    abi_ulong ebp;
-    abi_ulong esp;
-    abi_ulong ebx;
-    abi_ulong edx;
-    abi_ulong ecx;
-    abi_ulong eax;
-    abi_ulong trapno;
-    abi_ulong err;
-    abi_ulong eip;
+    uint32_t edi;
+    uint32_t esi;
+    uint32_t ebp;
+    uint32_t esp;
+    uint32_t ebx;
+    uint32_t edx;
+    uint32_t ecx;
+    uint32_t eax;
+    uint32_t trapno;
+    uint32_t err;
+    uint32_t eip;
     uint16_t cs, __csh;
-    abi_ulong eflags;
-    abi_ulong esp_at_signal;
+    uint32_t eflags;
+    uint32_t esp_at_signal;
     uint16_t ss, __ssh;
-    abi_ulong fpstate; /* pointer */
-    abi_ulong oldmask;
-    abi_ulong cr2;
+    uint32_t fpstate; /* pointer */
+    uint32_t oldmask;
+    uint32_t cr2;
+};
+
+struct target_sigcontext_64 {
+    uint64_t r8;
+    uint64_t r9;
+    uint64_t r10;
+    uint64_t r11;
+    uint64_t r12;
+    uint64_t r13;
+    uint64_t r14;
+    uint64_t r15;
+
+    uint64_t rdi;
+    uint64_t rsi;
+    uint64_t rbp;
+    uint64_t rbx;
+    uint64_t rdx;
+    uint64_t rax;
+    uint64_t rcx;
+    uint64_t rsp;
+    uint64_t rip;
+
+    uint64_t eflags;
+
+    uint16_t cs;
+    uint16_t gs;
+    uint16_t fs;
+    uint16_t ss;
+
+    uint64_t err;
+    uint64_t trapno;
+    uint64_t oldmask;
+    uint64_t cr2;
+
+    uint64_t fpstate; /* pointer */
+    uint64_t padding[8];
 };
 
+#ifndef TARGET_X86_64
+# define target_sigcontext target_sigcontext_32
+#else
+# define target_sigcontext target_sigcontext_64
+#endif
+
+/* see Linux/include/uapi/asm-generic/ucontext.h */
 struct target_ucontext {
     abi_ulong         tuc_flags;
     abi_ulong         tuc_link;
@@ -895,8 +956,8 @@ struct target_ucontext {
     target_sigset_t   tuc_sigmask;  /* mask last for extensibility */
 };
 
-struct sigframe
-{
+#ifndef TARGET_X86_64
+struct sigframe {
     abi_ulong pretcode;
     int sig;
     struct target_sigcontext sc;
@@ -905,8 +966,7 @@ struct sigframe
     char retcode[8];
 };
 
-struct rt_sigframe
-{
+struct rt_sigframe {
     abi_ulong pretcode;
     int sig;
     abi_ulong pinfo;
@@ -917,6 +977,17 @@ struct rt_sigframe
     char retcode[8];
 };
 
+#else
+
+struct rt_sigframe {
+    abi_ulong pretcode;
+    struct target_ucontext uc;
+    struct target_siginfo info;
+    struct target_fpstate fpstate;
+};
+
+#endif
+
 /*
  * Set up a signal frame.
  */
@@ -927,6 +998,7 @@ static void setup_sigcontext(struct target_sigcontext *sc,
         abi_ulong fpstate_addr)
 {
     CPUState *cs = CPU(x86_env_get_cpu(env));
+#ifndef TARGET_X86_64
     uint16_t magic;
 
     /* already locked in setup_frame() */
@@ -959,6 +1031,44 @@ static void setup_sigcontext(struct target_sigcontext *sc,
     /* non-iBCS2 extensions.. */
     __put_user(mask, &sc->oldmask);
     __put_user(env->cr[2], &sc->cr2);
+#else
+    __put_user(env->regs[R_EDI], &sc->rdi);
+    __put_user(env->regs[R_ESI], &sc->rsi);
+    __put_user(env->regs[R_EBP], &sc->rbp);
+    __put_user(env->regs[R_ESP], &sc->rsp);
+    __put_user(env->regs[R_EBX], &sc->rbx);
+    __put_user(env->regs[R_EDX], &sc->rdx);
+    __put_user(env->regs[R_ECX], &sc->rcx);
+    __put_user(env->regs[R_EAX], &sc->rax);
+
+    __put_user(env->regs[8], &sc->r8);
+    __put_user(env->regs[9], &sc->r9);
+    __put_user(env->regs[10], &sc->r10);
+    __put_user(env->regs[11], &sc->r11);
+    __put_user(env->regs[12], &sc->r12);
+    __put_user(env->regs[13], &sc->r13);
+    __put_user(env->regs[14], &sc->r14);
+    __put_user(env->regs[15], &sc->r15);
+
+    __put_user(cs->exception_index, &sc->trapno);
+    __put_user(env->error_code, &sc->err);
+    __put_user(env->eip, &sc->rip);
+
+    __put_user(env->eflags, &sc->eflags);
+    __put_user(env->segs[R_CS].selector, &sc->cs);
+    __put_user((uint16_t)0, &sc->gs);
+    __put_user((uint16_t)0, &sc->fs);
+    __put_user(env->segs[R_SS].selector, &sc->ss);
+
+    __put_user(mask, &sc->oldmask);
+    __put_user(env->cr[2], &sc->cr2);
+
+    /* fpstate_addr must be 16 byte aligned for fxsave */
+    assert(!(fpstate_addr & 0xf));
+
+    cpu_x86_fxsave(env, fpstate_addr);
+    __put_user(fpstate_addr, &sc->fpstate);
+#endif
 }
 
 /*
@@ -972,23 +1082,34 @@ get_sigframe(struct target_sigaction *ka, CPUX86State *env, size_t frame_size)
 
     /* Default to using normal stack */
     esp = env->regs[R_ESP];
+#ifdef TARGET_X86_64
+    esp -= 128; /* this is the redzone */
+#endif
+
     /* This is the X/Open sanctioned signal stack switching.  */
     if (ka->sa_flags & TARGET_SA_ONSTACK) {
         if (sas_ss_flags(esp) == 0) {
             esp = target_sigaltstack_used.ss_sp + target_sigaltstack_used.ss_size;
         }
     } else {
-
+#ifndef TARGET_X86_64
         /* This is the legacy signal stack switching. */
         if ((env->segs[R_SS].selector & 0xffff) != __USER_DS &&
                 !(ka->sa_flags & TARGET_SA_RESTORER) &&
                 ka->sa_restorer) {
             esp = (unsigned long) ka->sa_restorer;
         }
+#endif
     }
+
+#ifndef TARGET_X86_64
     return (esp - frame_size) & -8ul;
+#else
+    return ((esp - frame_size) & (~15ul)) - 8;
+#endif
 }
 
+#ifndef TARGET_X86_64
 /* compare linux/arch/i386/kernel/signal.c:setup_frame() */
 static void setup_frame(int sig, struct target_sigaction *ka,
                         target_sigset_t *set, CPUX86State *env)
@@ -1029,7 +1150,6 @@ static void setup_frame(int sig, struct target_sigaction *ka,
         __put_user(val16, (uint16_t *)(frame->retcode+6));
     }
 
-
     /* Set up registers for signal handler */
     env->regs[R_ESP] = frame_addr;
     env->eip = ka->_sa_handler;
@@ -1047,13 +1167,17 @@ static void setup_frame(int sig, struct target_sigaction *ka,
 give_sigsegv:
     force_sigsegv(sig);
 }
+#endif
 
-/* compare linux/arch/i386/kernel/signal.c:setup_rt_frame() */
+/* compare linux/arch/x86/kernel/signal.c:setup_rt_frame() */
 static void setup_rt_frame(int sig, struct target_sigaction *ka,
                            target_siginfo_t *info,
                            target_sigset_t *set, CPUX86State *env)
 {
-    abi_ulong frame_addr, addr;
+    abi_ulong frame_addr;
+#ifndef TARGET_X86_64
+    abi_ulong addr;
+#endif
     struct rt_sigframe *frame;
     int i;
 
@@ -1063,12 +1187,17 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
     if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0))
         goto give_sigsegv;
 
+    /* These fields are only in rt_sigframe on 32 bit */
+#ifndef TARGET_X86_64
     __put_user(sig, &frame->sig);
     addr = frame_addr + offsetof(struct rt_sigframe, info);
     __put_user(addr, &frame->pinfo);
     addr = frame_addr + offsetof(struct rt_sigframe, uc);
     __put_user(addr, &frame->puc);
-    tswap_siginfo(&frame->info, info);
+#endif
+    if (ka->sa_flags & TARGET_SA_SIGINFO) {
+        tswap_siginfo(&frame->info, info);
+    }
 
     /* Create the ucontext.  */
     __put_user(0, &frame->uc.tuc_flags);
@@ -1087,6 +1216,7 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
 
     /* Set up to return from userspace.  If provided, use a stub
        already in userspace.  */
+#ifndef TARGET_X86_64
     if (ka->sa_flags & TARGET_SA_RESTORER) {
         __put_user(ka->sa_restorer, &frame->pretcode);
     } else {
@@ -1099,15 +1229,31 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
         val16 = 0x80cd;
         __put_user(val16, (uint16_t *)(frame->retcode+5));
     }
+#else
+    /* XXX: Would be slightly better to return -EFAULT here if test fails
+       assert(ka->sa_flags & TARGET_SA_RESTORER); */
+    __put_user(ka->sa_restorer, &frame->pretcode);
+#endif
 
     /* Set up registers for signal handler */
     env->regs[R_ESP] = frame_addr;
     env->eip = ka->_sa_handler;
 
+#ifndef TARGET_X86_64
+    env->regs[R_EAX] = sig;
+    env->regs[R_EDX] = (unsigned long)&frame->info;
+    env->regs[R_ECX] = (unsigned long)&frame->uc;
+#else
+    env->regs[R_EAX] = 0;
+    env->regs[R_EDI] = sig;
+    env->regs[R_ESI] = (unsigned long)&frame->info;
+    env->regs[R_EDX] = (unsigned long)&frame->uc;
+#endif
+
     cpu_x86_load_seg(env, R_DS, __USER_DS);
     cpu_x86_load_seg(env, R_ES, __USER_DS);
-    cpu_x86_load_seg(env, R_SS, __USER_DS);
     cpu_x86_load_seg(env, R_CS, __USER_CS);
+    cpu_x86_load_seg(env, R_SS, __USER_DS);
     env->eflags &= ~TF_MASK;
 
     unlock_user_struct(frame, frame_addr, 1);
@@ -1125,6 +1271,7 @@ restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc)
     abi_ulong fpstate_addr;
     unsigned int tmpflags;
 
+#ifndef TARGET_X86_64
     cpu_x86_load_seg(env, R_GS, tswap16(sc->gs));
     cpu_x86_load_seg(env, R_FS, tswap16(sc->fs));
     cpu_x86_load_seg(env, R_ES, tswap16(sc->es));
@@ -1138,7 +1285,29 @@ restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc)
     env->regs[R_EDX] = tswapl(sc->edx);
     env->regs[R_ECX] = tswapl(sc->ecx);
     env->regs[R_EAX] = tswapl(sc->eax);
+
     env->eip = tswapl(sc->eip);
+#else
+    env->regs[8] = tswapl(sc->r8);
+    env->regs[9] = tswapl(sc->r9);
+    env->regs[10] = tswapl(sc->r10);
+    env->regs[11] = tswapl(sc->r11);
+    env->regs[12] = tswapl(sc->r12);
+    env->regs[13] = tswapl(sc->r13);
+    env->regs[14] = tswapl(sc->r14);
+    env->regs[15] = tswapl(sc->r15);
+
+    env->regs[R_EDI] = tswapl(sc->rdi);
+    env->regs[R_ESI] = tswapl(sc->rsi);
+    env->regs[R_EBP] = tswapl(sc->rbp);
+    env->regs[R_EBX] = tswapl(sc->rbx);
+    env->regs[R_EDX] = tswapl(sc->rdx);
+    env->regs[R_EAX] = tswapl(sc->rax);
+    env->regs[R_ECX] = tswapl(sc->rcx);
+    env->regs[R_ESP] = tswapl(sc->rsp);
+
+    env->eip = tswapl(sc->rip);
+#endif
 
     cpu_x86_load_seg(env, R_CS, lduw_p(&sc->cs) | 3);
     cpu_x86_load_seg(env, R_SS, lduw_p(&sc->ss) | 3);
@@ -1152,7 +1321,11 @@ restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc)
         if (!access_ok(VERIFY_READ, fpstate_addr,
                        sizeof(struct target_fpstate)))
             goto badframe;
+#ifndef TARGET_X86_64
         cpu_x86_frstor(env, fpstate_addr, 1);
+#else
+        cpu_x86_fxrstor(env, fpstate_addr);
+#endif
     }
 
     return err;
@@ -1160,6 +1333,8 @@ badframe:
     return 1;
 }
 
+/* Note: there is no sigreturn on x86_64, there is only rt_sigreturn */
+#ifndef TARGET_X86_64
 long do_sigreturn(CPUX86State *env)
 {
     struct sigframe *frame;
@@ -1191,6 +1366,7 @@ badframe:
     force_sig(TARGET_SIGSEGV);
     return -TARGET_QEMU_ESIGRETURN;
 }
+#endif
 
 long do_rt_sigreturn(CPUX86State *env)
 {
@@ -1198,7 +1374,7 @@ long do_rt_sigreturn(CPUX86State *env)
     struct rt_sigframe *frame;
     sigset_t set;
 
-    frame_addr = env->regs[R_ESP] - 4;
+    frame_addr = env->regs[R_ESP] - sizeof(abi_ulong);
     trace_user_do_rt_sigreturn(env, frame_addr);
     if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1))
         goto badframe;
@@ -5500,6 +5676,7 @@ static inline int target_rt_setup_ucontext(struct target_ucontext *uc,
                                            CPUM68KState *env)
 {
     target_greg_t *gregs = uc->tuc_mcontext.gregs;
+    uint32_t sr = cpu_m68k_get_ccr(env);
 
     __put_user(TARGET_MCONTEXT_VERSION, &uc->tuc_mcontext.version);
     __put_user(env->dregs[0], &gregs[0]);
@@ -5519,7 +5696,7 @@ static inline int target_rt_setup_ucontext(struct target_ucontext *uc,
     __put_user(env->aregs[6], &gregs[14]);
     __put_user(env->aregs[7], &gregs[15]);
     __put_user(env->pc, &gregs[16]);
-    __put_user(env->sr, &gregs[17]);
+    __put_user(sr, &gregs[17]);
 
     return 0;
 }
@@ -5553,7 +5730,7 @@ static inline int target_rt_restore_ucontext(CPUM68KState *env,
     __get_user(env->aregs[7], &gregs[15]);
     __get_user(env->pc, &gregs[16]);
     __get_user(temp, &gregs[17]);
-    env->sr = (env->sr & 0xff00) | (temp & 0xff);
+    cpu_m68k_set_ccr(env, temp);
 
     return 0;
 
@@ -5674,14 +5851,13 @@ long do_rt_sigreturn(CPUM68KState *env)
 {
     struct target_rt_sigframe *frame;
     abi_ulong frame_addr = env->aregs[7] - 4;
-    target_sigset_t target_set;
     sigset_t set;
 
     trace_user_do_rt_sigreturn(env, frame_addr);
     if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1))
         goto badframe;
 
-    target_to_host_sigset_internal(&set, &target_set);
+    target_to_host_sigset(&set, &frame->uc.tuc_sigmask);
     set_sigmask(&set);
 
     /* restore registers */
@@ -6418,7 +6594,7 @@ static void handle_pending_signal(CPUArchState *cpu_env, int sig,
 #if defined(TARGET_ABI_MIPSN32) || defined(TARGET_ABI_MIPSN64) \
         || defined(TARGET_OPENRISC) || defined(TARGET_TILEGX) \
         || defined(TARGET_PPC64) || defined(TARGET_HPPA) \
-        || defined(TARGET_NIOS2)
+        || defined(TARGET_NIOS2) || defined(TARGET_X86_64)
         /* These targets do not have traditional signals.  */
         setup_rt_frame(sig, sa, &k->info, &target_old_set, cpu_env);
 #else
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index f569f827fc..cec8428589 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -57,6 +57,8 @@ int __clone2(int (*fn)(void *), void *child_stack_base,
 #include <netinet/tcp.h>
 #include <linux/wireless.h>
 #include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/errqueue.h>
 #include "qemu-common.h"
 #ifdef CONFIG_TIMERFD
 #include <sys/timerfd.h>
@@ -1634,6 +1636,11 @@ static inline abi_long host_to_target_sockaddr(abi_ulong target_addr,
         struct sockaddr_ll *target_ll = (struct sockaddr_ll *)target_saddr;
         target_ll->sll_ifindex = tswap32(target_ll->sll_ifindex);
         target_ll->sll_hatype = tswap16(target_ll->sll_hatype);
+    } else if (addr->sa_family == AF_INET6 &&
+               len >= sizeof(struct target_sockaddr_in6)) {
+        struct target_sockaddr_in6 *target_in6 =
+               (struct target_sockaddr_in6 *)target_saddr;
+        target_in6->sin6_scope_id = tswap16(target_in6->sin6_scope_id);
     }
     unlock_user(target_saddr, target_addr, len);
 
@@ -1839,6 +1846,78 @@ static inline abi_long host_to_target_cmsg(struct target_msghdr *target_msgh,
             }
             break;
 
+        case SOL_IP:
+            switch (cmsg->cmsg_type) {
+            case IP_TTL:
+            {
+                uint32_t *v = (uint32_t *)data;
+                uint32_t *t_int = (uint32_t *)target_data;
+
+                __put_user(*v, t_int);
+                break;
+            }
+            case IP_RECVERR:
+            {
+                struct errhdr_t {
+                   struct sock_extended_err ee;
+                   struct sockaddr_in offender;
+                };
+                struct errhdr_t *errh = (struct errhdr_t *)data;
+                struct errhdr_t *target_errh =
+                    (struct errhdr_t *)target_data;
+
+                __put_user(errh->ee.ee_errno, &target_errh->ee.ee_errno);
+                __put_user(errh->ee.ee_origin, &target_errh->ee.ee_origin);
+                __put_user(errh->ee.ee_type,  &target_errh->ee.ee_type);
+                __put_user(errh->ee.ee_code, &target_errh->ee.ee_code);
+                __put_user(errh->ee.ee_pad, &target_errh->ee.ee_pad);
+                __put_user(errh->ee.ee_info, &target_errh->ee.ee_info);
+                __put_user(errh->ee.ee_data, &target_errh->ee.ee_data);
+                host_to_target_sockaddr((unsigned long) &target_errh->offender,
+                    (void *) &errh->offender, sizeof(errh->offender));
+                break;
+            }
+            default:
+                goto unimplemented;
+            }
+            break;
+
+        case SOL_IPV6:
+            switch (cmsg->cmsg_type) {
+            case IPV6_HOPLIMIT:
+            {
+                uint32_t *v = (uint32_t *)data;
+                uint32_t *t_int = (uint32_t *)target_data;
+
+                __put_user(*v, t_int);
+                break;
+            }
+            case IPV6_RECVERR:
+            {
+                struct errhdr6_t {
+                   struct sock_extended_err ee;
+                   struct sockaddr_in6 offender;
+                };
+                struct errhdr6_t *errh = (struct errhdr6_t *)data;
+                struct errhdr6_t *target_errh =
+                    (struct errhdr6_t *)target_data;
+
+                __put_user(errh->ee.ee_errno, &target_errh->ee.ee_errno);
+                __put_user(errh->ee.ee_origin, &target_errh->ee.ee_origin);
+                __put_user(errh->ee.ee_type,  &target_errh->ee.ee_type);
+                __put_user(errh->ee.ee_code, &target_errh->ee.ee_code);
+                __put_user(errh->ee.ee_pad, &target_errh->ee.ee_pad);
+                __put_user(errh->ee.ee_info, &target_errh->ee.ee_info);
+                __put_user(errh->ee.ee_data, &target_errh->ee.ee_data);
+                host_to_target_sockaddr((unsigned long) &target_errh->offender,
+                    (void *) &errh->offender, sizeof(errh->offender));
+                break;
+            }
+            default:
+                goto unimplemented;
+            }
+            break;
+
         default:
         unimplemented:
             gemu_log("Unsupported ancillary data: %d/%d\n",
@@ -2768,6 +2847,7 @@ static abi_long do_setsockopt(int sockfd, int level, int optname,
         case IP_PKTINFO:
         case IP_MTU_DISCOVER:
         case IP_RECVERR:
+        case IP_RECVTTL:
         case IP_RECVTOS:
 #ifdef IP_FREEBIND
         case IP_FREEBIND:
@@ -2817,6 +2897,11 @@ static abi_long do_setsockopt(int sockfd, int level, int optname,
         case IPV6_MTU:
         case IPV6_V6ONLY:
         case IPV6_RECVPKTINFO:
+        case IPV6_UNICAST_HOPS:
+        case IPV6_RECVERR:
+        case IPV6_RECVHOPLIMIT:
+        case IPV6_2292HOPLIMIT:
+        case IPV6_CHECKSUM:
             val = 0;
             if (optlen < sizeof(uint32_t)) {
                 return -TARGET_EINVAL;
@@ -2827,6 +2912,50 @@ static abi_long do_setsockopt(int sockfd, int level, int optname,
             ret = get_errno(setsockopt(sockfd, level, optname,
                                        &val, sizeof(val)));
             break;
+        case IPV6_PKTINFO:
+        {
+            struct in6_pktinfo pki;
+
+            if (optlen < sizeof(pki)) {
+                return -TARGET_EINVAL;
+            }
+
+            if (copy_from_user(&pki, optval_addr, sizeof(pki))) {
+                return -TARGET_EFAULT;
+            }
+
+            pki.ipi6_ifindex = tswap32(pki.ipi6_ifindex);
+
+            ret = get_errno(setsockopt(sockfd, level, optname,
+                                       &pki, sizeof(pki)));
+            break;
+        }
+        default:
+            goto unimplemented;
+        }
+        break;
+    case SOL_ICMPV6:
+        switch (optname) {
+        case ICMPV6_FILTER:
+        {
+            struct icmp6_filter icmp6f;
+
+            if (optlen > sizeof(icmp6f)) {
+                optlen = sizeof(icmp6f);
+            }
+
+            if (copy_from_user(&icmp6f, optval_addr, optlen)) {
+                return -TARGET_EFAULT;
+            }
+
+            for (val = 0; val < 8; val++) {
+                icmp6f.data[val] = tswap32(icmp6f.data[val]);
+            }
+
+            ret = get_errno(setsockopt(sockfd, level, optname,
+                                       &icmp6f, optlen));
+            break;
+        }
         default:
             goto unimplemented;
         }
@@ -2834,7 +2963,8 @@ static abi_long do_setsockopt(int sockfd, int level, int optname,
     case SOL_RAW:
         switch (optname) {
         case ICMP_FILTER:
-            /* struct icmp_filter takes an u32 value */
+        case IPV6_CHECKSUM:
+            /* those take an u32 value */
             if (optlen < sizeof(uint32_t)) {
                 return -TARGET_EINVAL;
             }
@@ -7680,7 +7810,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         break;
 #ifdef TARGET_NR_fork
     case TARGET_NR_fork:
-        ret = get_errno(do_fork(cpu_env, SIGCHLD, 0, 0, 0, 0));
+        ret = get_errno(do_fork(cpu_env, TARGET_SIGCHLD, 0, 0, 0, 0));
         break;
 #endif
 #ifdef TARGET_NR_waitpid
@@ -10490,7 +10620,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 #endif
 #ifdef TARGET_NR_vfork
     case TARGET_NR_vfork:
-        ret = get_errno(do_fork(cpu_env, CLONE_VFORK | CLONE_VM | SIGCHLD,
+        ret = get_errno(do_fork(cpu_env,
+                        CLONE_VFORK | CLONE_VM | TARGET_SIGCHLD,
                         0, 0, 0, 0));
         break;
 #endif
@@ -11063,11 +11194,16 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
     case TARGET_NR_mincore:
         {
             void *a;
+            ret = -TARGET_ENOMEM;
+            a = lock_user(VERIFY_READ, arg1, arg2, 0);
+            if (!a) {
+                goto fail;
+            }
             ret = -TARGET_EFAULT;
-            if (!(a = lock_user(VERIFY_READ, arg1,arg2, 0)))
-                goto efault;
-            if (!(p = lock_user_string(arg3)))
+            p = lock_user_string(arg3);
+            if (!p) {
                 goto mincore_fail;
+            }
             ret = get_errno(mincore(a, arg2, p));
             unlock_user(p, arg3, ret);
             mincore_fail:
diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
index 72ca5b11d6..40c5027e93 100644
--- a/linux-user/syscall_defs.h
+++ b/linux-user/syscall_defs.h
@@ -164,6 +164,14 @@ struct target_sockaddr_in {
                 sizeof(struct target_in_addr)];
 };
 
+struct target_sockaddr_in6 {
+    uint16_t sin6_family;
+    uint16_t sin6_port; /* big endian */
+    uint32_t sin6_flowinfo; /* big endian */
+    struct in6_addr sin6_addr; /* IPv6 address, big endian */
+    uint32_t sin6_scope_id;
+};
+
 struct target_sock_filter {
     abi_ushort code;
     uint8_t jt;
diff --git a/memory.c b/memory.c
index ed8b5aa83e..d61caee867 100644
--- a/memory.c
+++ b/memory.c
@@ -917,6 +917,8 @@ void memory_region_transaction_commit(void)
     AddressSpace *as;
 
     assert(memory_region_transaction_depth);
+    assert(qemu_mutex_iothread_locked());
+
     --memory_region_transaction_depth;
     if (!memory_region_transaction_depth) {
         if (memory_region_update_pending) {
diff --git a/migration/block.c b/migration/block.c
index ebc10e628d..1941bc2402 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -379,7 +379,7 @@ static void unset_dirty_tracking(void)
     }
 }
 
-static void init_blk_migration(QEMUFile *f)
+static int init_blk_migration(QEMUFile *f)
 {
     BlockDriverState *bs;
     BlkMigDevState *bmds;
@@ -390,6 +390,8 @@ static void init_blk_migration(QEMUFile *f)
         BlkMigDevState *bmds;
         BlockDriverState *bs;
     } *bmds_bs;
+    Error *local_err = NULL;
+    int ret;
 
     block_mig_state.submitted = 0;
     block_mig_state.read_done = 0;
@@ -411,11 +413,12 @@ static void init_blk_migration(QEMUFile *f)
 
         sectors = bdrv_nb_sectors(bs);
         if (sectors <= 0) {
+            ret = sectors;
             goto out;
         }
 
         bmds = g_new0(BlkMigDevState, 1);
-        bmds->blk = blk_new();
+        bmds->blk = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
         bmds->blk_name = g_strdup(bdrv_get_device_name(bs));
         bmds->bulk_completed = 0;
         bmds->total_sectors = sectors;
@@ -445,7 +448,11 @@ static void init_blk_migration(QEMUFile *f)
         BlockDriverState *bs = bmds_bs[i].bs;
 
         if (bmds) {
-            blk_insert_bs(bmds->blk, bs);
+            ret = blk_insert_bs(bmds->blk, bs, &local_err);
+            if (ret < 0) {
+                error_report_err(local_err);
+                goto out;
+            }
 
             alloc_aio_bitmap(bmds);
             error_setg(&bmds->blocker, "block device is in use by migration");
@@ -453,8 +460,10 @@ static void init_blk_migration(QEMUFile *f)
         }
     }
 
+    ret = 0;
 out:
     g_free(bmds_bs);
+    return ret;
 }
 
 /* Called with no lock taken.  */
@@ -705,7 +714,11 @@ static int block_save_setup(QEMUFile *f, void *opaque)
             block_mig_state.submitted, block_mig_state.transferred);
 
     qemu_mutex_lock_iothread();
-    init_blk_migration(f);
+    ret = init_blk_migration(f);
+    if (ret < 0) {
+        qemu_mutex_unlock_iothread();
+        return ret;
+    }
 
     /* start track dirty blocks */
     ret = set_dirty_tracking();
diff --git a/migration/colo.c b/migration/colo.c
index 712308ed5e..c19eb3f073 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -19,6 +19,8 @@
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "migration/failover.h"
+#include "replication.h"
+#include "qmp-commands.h"
 
 static bool vmstate_loading;
 
@@ -147,6 +149,53 @@ void colo_do_failover(MigrationState *s)
     }
 }
 
+void qmp_xen_set_replication(bool enable, bool primary,
+                             bool has_failover, bool failover,
+                             Error **errp)
+{
+    ReplicationMode mode = primary ?
+                           REPLICATION_MODE_PRIMARY :
+                           REPLICATION_MODE_SECONDARY;
+
+    if (has_failover && enable) {
+        error_setg(errp, "Parameter 'failover' is only for"
+                   " stopping replication");
+        return;
+    }
+
+    if (enable) {
+        replication_start_all(mode, errp);
+    } else {
+        if (!has_failover) {
+            failover = NULL;
+        }
+        replication_stop_all(failover, failover ? NULL : errp);
+    }
+}
+
+ReplicationStatus *qmp_query_xen_replication_status(Error **errp)
+{
+    Error *err = NULL;
+    ReplicationStatus *s = g_new0(ReplicationStatus, 1);
+
+    replication_get_error_all(&err);
+    if (err) {
+        s->error = true;
+        s->has_desc = true;
+        s->desc = g_strdup(error_get_pretty(err));
+    } else {
+        s->error = false;
+    }
+
+    error_free(err);
+    return s;
+}
+
+void qmp_xen_colo_do_checkpoint(Error **errp)
+{
+    replication_do_checkpoint_all(errp);
+}
+
 static void colo_send_message(QEMUFile *f, COLOMessage msg,
                               Error **errp)
 {
diff --git a/migration/migration.c b/migration/migration.c
index c6ae69d371..3dab6845b1 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -49,6 +49,10 @@
  * for sending the last part */
 #define DEFAULT_MIGRATE_SET_DOWNTIME 300
 
+/* Maximum migrate downtime set to 2000 seconds */
+#define MAX_MIGRATE_DOWNTIME_SECONDS 2000
+#define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000)
+
 /* Default compression thread count */
 #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8
 /* Default decompression thread count, usually decompression is at
@@ -383,6 +387,7 @@ static void process_incoming_migration_co(void *opaque)
     int ret;
 
     mis->from_src_file = f;
+    mis->largest_page_size = qemu_ram_pagesize_largest();
     postcopy_state_set(POSTCOPY_INCOMING_NONE);
     migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
                       MIGRATION_STATUS_ACTIVE);
@@ -843,10 +848,11 @@ void qmp_migrate_set_parameters(MigrationParameters *params, Error **errp)
         return;
     }
     if (params->has_downtime_limit &&
-        (params->downtime_limit < 0 || params->downtime_limit > 2000000)) {
-        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
-                   "downtime_limit",
-                   "an integer in the range of 0 to 2000000 milliseconds");
+        (params->downtime_limit < 0 ||
+         params->downtime_limit > MAX_MIGRATE_DOWNTIME)) {
+        error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
+                         "the range of 0 to %d milliseconds",
+                         MAX_MIGRATE_DOWNTIME);
         return;
     }
     if (params->has_x_checkpoint_delay && (params->x_checkpoint_delay < 0)) {
@@ -1145,6 +1151,21 @@ void migrate_del_blocker(Error *reason)
     migration_blockers = g_slist_remove(migration_blockers, reason);
 }
 
+int check_migratable(Object *obj, Error **err)
+{
+    DeviceClass *dc = DEVICE_GET_CLASS(obj);
+    if (only_migratable && dc->vmsd) {
+        if (dc->vmsd->unmigratable) {
+            error_setg(err, "Device %s is not migratable, but "
+                       "--only-migratable was specified",
+                       object_get_typename(obj));
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
 void qmp_migrate_incoming(const char *uri, Error **errp)
 {
     Error *local_err = NULL;
@@ -1289,6 +1310,13 @@ void qmp_migrate_set_speed(int64_t value, Error **errp)
 
 void qmp_migrate_set_downtime(double value, Error **errp)
 {
+    if (value < 0 || value > MAX_MIGRATE_DOWNTIME_SECONDS) {
+        error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
+                         "the range of 0 to %d seconds",
+                         MAX_MIGRATE_DOWNTIME_SECONDS);
+        return;
+    }
+
     value *= 1000; /* Convert to milliseconds */
     value = MAX(0, MIN(INT64_MAX, value));
 
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index a40dddbaf6..effbeb64fb 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -81,25 +81,18 @@ static bool ufd_version_check(int ufd)
         return false;
     }
 
-    return true;
-}
-
-/*
- * Check for things that postcopy won't support; returns 0 if the block
- * is fine.
- */
-static int check_range(const char *block_name, void *host_addr,
-                      ram_addr_t offset, ram_addr_t length, void *opaque)
-{
-    RAMBlock *rb = qemu_ram_block_by_name(block_name);
-
-    if (qemu_ram_pagesize(rb) > getpagesize()) {
-        error_report("Postcopy doesn't support large page sizes yet (%s)",
-                     block_name);
-        return -E2BIG;
+    if (getpagesize() != ram_pagesize_summary()) {
+        bool have_hp = false;
+        /* We've got a huge page */
+#ifdef UFFD_FEATURE_MISSING_HUGETLBFS
+        have_hp = api_struct.features & UFFD_FEATURE_MISSING_HUGETLBFS;
+#endif
+        if (!have_hp) {
+            error_report("Userfault on this host does not support huge pages");
+            return false;
+        }
     }
-
-    return 0;
+    return true;
 }
 
 /*
@@ -122,12 +115,6 @@ bool postcopy_ram_supported_by_host(void)
         goto out;
     }
 
-    /* Check for anything about the RAMBlocks we don't support */
-    if (qemu_ram_foreach_block(check_range, NULL)) {
-        /* check_range will have printed its own error */
-        goto out;
-    }
-
     ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
     if (ufd == -1) {
         error_report("%s: userfaultfd not available: %s", __func__,
@@ -200,27 +187,6 @@ out:
     return ret;
 }
 
-/**
- * postcopy_ram_discard_range: Discard a range of memory.
- * We can assume that if we've been called postcopy_ram_hosttest returned true.
- *
- * @mis: Current incoming migration state.
- * @start, @length: range of memory to discard.
- *
- * returns: 0 on success.
- */
-int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
-                               size_t length)
-{
-    trace_postcopy_ram_discard_range(start, length);
-    if (madvise(start, length, MADV_DONTNEED)) {
-        error_report("%s MADV_DONTNEED: %s", __func__, strerror(errno));
-        return -1;
-    }
-
-    return 0;
-}
-
 /*
  * Setup an area of RAM so that it *can* be used for postcopy later; this
  * must be done right at the start prior to pre-copy.
@@ -239,7 +205,7 @@ static int init_range(const char *block_name, void *host_addr,
      * - we're going to get the copy from the source anyway.
      * (Precopy will just overwrite this data, so doesn't need the discard)
      */
-    if (postcopy_ram_discard_range(mis, host_addr, length)) {
+    if (ram_discard_range(mis, block_name, 0, length)) {
         return -1;
     }
 
@@ -342,9 +308,13 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
     migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
 
     if (mis->postcopy_tmp_page) {
-        munmap(mis->postcopy_tmp_page, getpagesize());
+        munmap(mis->postcopy_tmp_page, mis->largest_page_size);
         mis->postcopy_tmp_page = NULL;
     }
+    if (mis->postcopy_tmp_zero_page) {
+        munmap(mis->postcopy_tmp_zero_page, mis->largest_page_size);
+        mis->postcopy_tmp_zero_page = NULL;
+    }
     trace_postcopy_ram_incoming_cleanup_exit();
     return 0;
 }
@@ -408,6 +378,10 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr,
         error_report("%s userfault register: %s", __func__, strerror(errno));
         return -1;
     }
+    if (!(reg_struct.ioctls & ((__u64)1 << _UFFDIO_COPY))) {
+        error_report("%s userfault: Region doesn't support COPY", __func__);
+        return -1;
+    }
 
     return 0;
 }
@@ -420,7 +394,6 @@ static void *postcopy_ram_fault_thread(void *opaque)
     MigrationIncomingState *mis = opaque;
     struct uffd_msg msg;
     int ret;
-    size_t hostpagesize = getpagesize();
     RAMBlock *rb = NULL;
     RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */
 
@@ -487,7 +460,7 @@ static void *postcopy_ram_fault_thread(void *opaque)
             break;
         }
 
-        rb_offset &= ~(hostpagesize - 1);
+        rb_offset &= ~(qemu_ram_pagesize(rb) - 1);
         trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
                                                 qemu_ram_get_idstr(rb),
                                                 rb_offset);
@@ -499,11 +472,11 @@ static void *postcopy_ram_fault_thread(void *opaque)
         if (rb != last_rb) {
             last_rb = rb;
             migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
-                                     rb_offset, hostpagesize);
+                                     rb_offset, qemu_ram_pagesize(rb));
         } else {
             /* Save some space */
             migrate_send_rp_req_pages(mis, NULL,
-                                     rb_offset, hostpagesize);
+                                     rb_offset, qemu_ram_pagesize(rb));
         }
     }
     trace_postcopy_ram_fault_thread_exit();
@@ -564,13 +537,14 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
  * Place a host page (from) at (host) atomically
  * returns 0 on success
  */
-int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
+int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
+                        size_t pagesize)
 {
     struct uffdio_copy copy_struct;
 
     copy_struct.dst = (uint64_t)(uintptr_t)host;
     copy_struct.src = (uint64_t)(uintptr_t)from;
-    copy_struct.len = getpagesize();
+    copy_struct.len = pagesize;
     copy_struct.mode = 0;
 
     /* copy also acks to the kernel waking the stalled thread up
@@ -580,8 +554,8 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
      */
     if (ioctl(mis->userfault_fd, UFFDIO_COPY, &copy_struct)) {
         int e = errno;
-        error_report("%s: %s copy host: %p from: %p",
-                     __func__, strerror(e), host, from);
+        error_report("%s: %s copy host: %p from: %p (size: %zd)",
+                     __func__, strerror(e), host, from, pagesize);
 
         return -e;
     }
@@ -594,23 +568,44 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
  * Place a zero page at (host) atomically
  * returns 0 on success
  */
-int postcopy_place_page_zero(MigrationIncomingState *mis, void *host)
+int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
+                             size_t pagesize)
 {
-    struct uffdio_zeropage zero_struct;
+    trace_postcopy_place_page_zero(host);
 
-    zero_struct.range.start = (uint64_t)(uintptr_t)host;
-    zero_struct.range.len = getpagesize();
-    zero_struct.mode = 0;
+    if (pagesize == getpagesize()) {
+        struct uffdio_zeropage zero_struct;
+        zero_struct.range.start = (uint64_t)(uintptr_t)host;
+        zero_struct.range.len = getpagesize();
+        zero_struct.mode = 0;
 
-    if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) {
-        int e = errno;
-        error_report("%s: %s zero host: %p",
-                     __func__, strerror(e), host);
+        if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) {
+            int e = errno;
+            error_report("%s: %s zero host: %p",
+                         __func__, strerror(e), host);
 
-        return -e;
+            return -e;
+        }
+    } else {
+        /* The kernel can't use UFFDIO_ZEROPAGE for hugepages */
+        if (!mis->postcopy_tmp_zero_page) {
+            mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size,
+                                               PROT_READ | PROT_WRITE,
+                                               MAP_PRIVATE | MAP_ANONYMOUS,
+                                               -1, 0);
+            if (mis->postcopy_tmp_zero_page == MAP_FAILED) {
+                int e = errno;
+                mis->postcopy_tmp_zero_page = NULL;
+                error_report("%s: %s mapping large zero page",
+                             __func__, strerror(e));
+                return -e;
+            }
+            memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size);
+        }
+        return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page,
+                                   pagesize);
     }
 
-    trace_postcopy_place_page_zero(host);
     return 0;
 }
 
@@ -625,7 +620,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host)
 void *postcopy_get_tmp_page(MigrationIncomingState *mis)
 {
     if (!mis->postcopy_tmp_page) {
-        mis->postcopy_tmp_page = mmap(NULL, getpagesize(),
+        mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size,
                              PROT_READ | PROT_WRITE, MAP_PRIVATE |
                              MAP_ANONYMOUS, -1, 0);
         if (mis->postcopy_tmp_page == MAP_FAILED) {
@@ -658,13 +653,6 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
     return -1;
 }
 
-int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
-                               size_t length)
-{
-    assert(0);
-    return -1;
-}
-
 int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
 {
     assert(0);
@@ -677,13 +665,15 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
     return -1;
 }
 
-int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from)
+int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
+                        size_t pagesize)
 {
     assert(0);
     return -1;
 }
 
-int postcopy_place_page_zero(MigrationIncomingState *mis, void *host)
+int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
+                        size_t pagesize)
 {
     assert(0);
     return -1;
diff --git a/migration/ram.c b/migration/ram.c
index f289fcddd5..719425b9b8 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -600,6 +600,23 @@ static void migration_bitmap_sync_init(void)
     iterations_prev = 0;
 }
 
+/* Returns a summary bitmap of the page sizes of all RAMBlocks;
+ * for VMs with just normal pages this is equivalent to the
+ * host page size.  If it's got some huge pages then it's the OR
+ * of all the different page sizes.
+ */
+uint64_t ram_pagesize_summary(void)
+{
+    RAMBlock *block;
+    uint64_t summary = 0;
+
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+        summary |= block->page_size;
+    }
+
+    return summary;
+}
+
 static void migration_bitmap_sync(void)
 {
     RAMBlock *block;
@@ -1285,6 +1302,8 @@ static int ram_save_target_page(MigrationState *ms, QEMUFile *f,
  *                     offset to point into the middle of a host page
  *                     in which case the remainder of the hostpage is sent.
  *                     Only dirty target pages are sent.
+ *                     Note that the host page size may be a huge page for this
+ *                     block.
  *
  * Returns: Number of pages written.
  *
@@ -1303,6 +1322,8 @@ static int ram_save_host_page(MigrationState *ms, QEMUFile *f,
                               ram_addr_t dirty_ram_abs)
 {
     int tmppages, pages = 0;
+    size_t pagesize = qemu_ram_pagesize(pss->block);
+
     do {
         tmppages = ram_save_target_page(ms, f, pss, last_stage,
                                         bytes_transferred, dirty_ram_abs);
@@ -1313,7 +1334,7 @@ static int ram_save_host_page(MigrationState *ms, QEMUFile *f,
         pages += tmppages;
         pss->offset += TARGET_PAGE_SIZE;
         dirty_ram_abs += TARGET_PAGE_SIZE;
-    } while (pss->offset & (qemu_host_page_size - 1));
+    } while (pss->offset & (pagesize - 1));
 
     /* The offset we leave with is the last one we looked at */
     pss->offset -= TARGET_PAGE_SIZE;
@@ -1655,12 +1676,17 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
 {
     unsigned long *bitmap;
     unsigned long *unsentmap;
-    unsigned int host_ratio = qemu_host_page_size / TARGET_PAGE_SIZE;
+    unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
     unsigned long first = block->offset >> TARGET_PAGE_BITS;
     unsigned long len = block->used_length >> TARGET_PAGE_BITS;
     unsigned long last = first + (len - 1);
     unsigned long run_start;
 
+    if (block->page_size == TARGET_PAGE_SIZE) {
+        /* Easy case - TPS==HPS for a non-huge page RAMBlock */
+        return;
+    }
+
     bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
     unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
 
@@ -1764,7 +1790,8 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
  * Utility for the outgoing postcopy code.
  *
  * Discard any partially sent host-page size chunks, mark any partially
- * dirty host-page size chunks as all dirty.
+ * dirty host-page size chunks as all dirty.  In this case the host-page
+ * is the host-page for the particular RAMBlock, i.e. it might be a huge page
  *
  * Returns: 0 on success
  */
@@ -1772,11 +1799,6 @@ static int postcopy_chunk_hostpages(MigrationState *ms)
 {
     struct RAMBlock *block;
 
-    if (qemu_host_page_size == TARGET_PAGE_SIZE) {
-        /* Easy case - TPS==HPS - nothing to be done */
-        return 0;
-    }
-
     /* Easiest way to make sure we don't resume in the middle of a host-page */
     last_seen_block = NULL;
     last_sent_block = NULL;
@@ -1832,7 +1854,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms)
         return -EINVAL;
     }
 
-    /* Deal with TPS != HPS */
+    /* Deal with TPS != HPS and huge pages */
     ret = postcopy_chunk_hostpages(ms);
     if (ret) {
         rcu_read_unlock();
@@ -1872,6 +1894,8 @@ int ram_discard_range(MigrationIncomingState *mis,
 {
     int ret = -1;
 
+    trace_ram_discard_range(block_name, start, length);
+
     rcu_read_lock();
     RAMBlock *rb = qemu_ram_block_by_name(block_name);
 
@@ -1881,27 +1905,7 @@ int ram_discard_range(MigrationIncomingState *mis,
         goto err;
     }
 
-    uint8_t *host_startaddr = rb->host + start;
-
-    if ((uintptr_t)host_startaddr & (qemu_host_page_size - 1)) {
-        error_report("ram_discard_range: Unaligned start address: %p",
-                     host_startaddr);
-        goto err;
-    }
-
-    if ((start + length) <= rb->used_length) {
-        uint8_t *host_endaddr = host_startaddr + length;
-        if ((uintptr_t)host_endaddr & (qemu_host_page_size - 1)) {
-            error_report("ram_discard_range: Unaligned end address: %p",
-                         host_endaddr);
-            goto err;
-        }
-        ret = postcopy_ram_discard_range(mis, host_startaddr, length);
-    } else {
-        error_report("ram_discard_range: Overrun block '%s' (%" PRIu64
-                     "/%zx/" RAM_ADDR_FMT")",
-                     block_name, start, length, rb->used_length);
-    }
+    ret = ram_block_discard_range(rb, start, length);
 
 err:
     rcu_read_unlock();
@@ -2010,6 +2014,9 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
         qemu_put_byte(f, strlen(block->idstr));
         qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
         qemu_put_be64(f, block->used_length);
+        if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
+            qemu_put_be64(f, block->page_size);
+        }
     }
 
     rcu_read_unlock();
@@ -2387,7 +2394,7 @@ static int ram_load_postcopy(QEMUFile *f)
 {
     int flags = 0, ret = 0;
     bool place_needed = false;
-    bool matching_page_sizes = qemu_host_page_size == TARGET_PAGE_SIZE;
+    bool matching_page_sizes = false;
     MigrationIncomingState *mis = migration_incoming_get_current();
     /* Temporary page that is later 'placed' */
     void *postcopy_host_page = postcopy_get_tmp_page(mis);
@@ -2399,6 +2406,7 @@ static int ram_load_postcopy(QEMUFile *f)
         void *host = NULL;
         void *page_buffer = NULL;
         void *place_source = NULL;
+        RAMBlock *block = NULL;
         uint8_t ch;
 
         addr = qemu_get_be64(f);
@@ -2408,7 +2416,7 @@ static int ram_load_postcopy(QEMUFile *f)
         trace_ram_load_postcopy_loop((uint64_t)addr, flags);
         place_needed = false;
         if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) {
-            RAMBlock *block = ram_block_from_stream(f, flags);
+            block = ram_block_from_stream(f, flags);
 
             host = host_from_ram_block_offset(block, addr);
             if (!host) {
@@ -2416,8 +2424,11 @@ static int ram_load_postcopy(QEMUFile *f)
                 ret = -EINVAL;
                 break;
             }
+            matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
             /*
-             * Postcopy requires that we place whole host pages atomically.
+             * Postcopy requires that we place whole host pages atomically;
+             * these may be huge pages for RAMBlocks that are backed by
+             * hugetlbfs.
              * To make it atomic, the data is read into a temporary page
              * that's moved into place later.
              * The migration protocol uses,  possibly smaller, target-pages
@@ -2425,9 +2436,9 @@ static int ram_load_postcopy(QEMUFile *f)
              * of a host page in order.
              */
             page_buffer = postcopy_host_page +
-                          ((uintptr_t)host & ~qemu_host_page_mask);
+                          ((uintptr_t)host & (block->page_size - 1));
             /* If all TP are zero then we can optimise the place */
-            if (!((uintptr_t)host & ~qemu_host_page_mask)) {
+            if (!((uintptr_t)host & (block->page_size - 1))) {
                 all_zero = true;
             } else {
                 /* not the 1st TP within the HP */
@@ -2445,7 +2456,7 @@ static int ram_load_postcopy(QEMUFile *f)
              * page
              */
             place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
-                                     ~qemu_host_page_mask) == 0;
+                                     (block->page_size - 1)) == 0;
             place_source = postcopy_host_page;
         }
         last_host = host;
@@ -2483,14 +2494,14 @@ static int ram_load_postcopy(QEMUFile *f)
 
         if (place_needed) {
             /* This gets called at the last target page in the host page */
+            void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
+
             if (all_zero) {
-                ret = postcopy_place_page_zero(mis,
-                                               host + TARGET_PAGE_SIZE -
-                                               qemu_host_page_size);
+                ret = postcopy_place_page_zero(mis, place_dest,
+                                               block->page_size);
             } else {
-                ret = postcopy_place_page(mis, host + TARGET_PAGE_SIZE -
-                                               qemu_host_page_size,
-                                               place_source);
+                ret = postcopy_place_page(mis, place_dest,
+                                          place_source, block->page_size);
             }
         }
         if (!ret) {
@@ -2511,6 +2522,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
      * be atomic
      */
     bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
+    /* ADVISE is earlier, it shows the source has the postcopy capability on */
+    bool postcopy_advised = postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE;
 
     seq_iter++;
 
@@ -2575,6 +2588,18 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
                             error_report_err(local_err);
                         }
                     }
+                    /* For postcopy we need to check hugepage sizes match */
+                    if (postcopy_advised &&
+                        block->page_size != qemu_host_page_size) {
+                        uint64_t remote_page_size = qemu_get_be64(f);
+                        if (remote_page_size != block->page_size) {
+                            error_report("Mismatched RAM page size %s "
+                                         "(local) %zd != %" PRId64,
+                                         id, block->page_size,
+                                         remote_page_size);
+                            ret = -EINVAL;
+                        }
+                    }
                     ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
                                           block->idstr);
                 } else {
diff --git a/migration/savevm.c b/migration/savevm.c
index 5ecd264134..3b19a4a274 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -688,6 +688,7 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
 
                 return -1;
             }
+            g_free(id);
 
             se->compat = g_new0(CompatEntry, 1);
             pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name);
@@ -869,7 +870,7 @@ int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len)
 void qemu_savevm_send_postcopy_advise(QEMUFile *f)
 {
     uint64_t tmp[2];
-    tmp[0] = cpu_to_be64(getpagesize());
+    tmp[0] = cpu_to_be64(ram_pagesize_summary());
     tmp[1] = cpu_to_be64(1ul << qemu_target_page_bits());
 
     trace_qemu_savevm_send_postcopy_advise();
@@ -1276,6 +1277,11 @@ done:
         status = MIGRATION_STATUS_COMPLETED;
     }
     migrate_set_state(&ms->state, MIGRATION_STATUS_SETUP, status);
+
+    /* f is outer parameter, it should not stay in global migration state after
+     * this function finished */
+    ms->to_dst_file = NULL;
+
     return ret;
 }
 
@@ -1346,7 +1352,7 @@ static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
 static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis)
 {
     PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
-    uint64_t remote_hps, remote_tps;
+    uint64_t remote_pagesize_summary, local_pagesize_summary, remote_tps;
 
     trace_loadvm_postcopy_handle_advise();
     if (ps != POSTCOPY_INCOMING_NONE) {
@@ -1359,17 +1365,27 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis)
         return -1;
     }
 
-    remote_hps = qemu_get_be64(mis->from_src_file);
-    if (remote_hps != getpagesize())  {
+    remote_pagesize_summary = qemu_get_be64(mis->from_src_file);
+    local_pagesize_summary = ram_pagesize_summary();
+
+    if (remote_pagesize_summary != local_pagesize_summary)  {
         /*
-         * Some combinations of mismatch are probably possible but it gets
-         * a bit more complicated.  In particular we need to place whole
-         * host pages on the dest at once, and we need to ensure that we
-         * handle dirtying to make sure we never end up sending part of
-         * a hostpage on it's own.
+         * This detects two potential causes of mismatch:
+         *   a) A mismatch in host page sizes
+         *      Some combinations of mismatch are probably possible but it gets
+         *      a bit more complicated.  In particular we need to place whole
+         *      host pages on the dest at once, and we need to ensure that we
+         *      handle dirtying to make sure we never end up sending part of
+         *      a hostpage on it's own.
+         *   b) The use of different huge page sizes on source/destination
+         *      a more fine grain test is performed during RAM block migration
+         *      but this test here causes a nice early clear failure, and
+         *      also fails when passed to an older qemu that doesn't
+         *      do huge pages.
          */
-        error_report("Postcopy needs matching host page sizes (s=%d d=%d)",
-                     (int)remote_hps, getpagesize());
+        error_report("Postcopy needs matching RAM page sizes (s=%" PRIx64
+                                                             " d=%" PRIx64 ")",
+                     remote_pagesize_summary, local_pagesize_summary);
         return -1;
     }
 
diff --git a/migration/trace-events b/migration/trace-events
index fa660e35b1..7372ce2a51 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -68,6 +68,7 @@ get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, uint64_t
 migration_bitmap_sync_start(void) ""
 migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64
 migration_throttle(void) ""
+ram_discard_range(const char *rbname, uint64_t start, size_t len) "%s: start: %" PRIx64 " %zx"
 ram_load_postcopy_loop(uint64_t addr, int flags) "@%" PRIx64 " %x"
 ram_postcopy_send_discard_bitmap(void) ""
 ram_save_queue_pages(const char *rbname, size_t start, size_t len) "%s: start: %zx len: %zx"
@@ -176,7 +177,6 @@ rdma_start_outgoing_migration_after_rdma_source_init(void) ""
 # migration/postcopy-ram.c
 postcopy_discard_send_finish(const char *ramblock, int nwords, int ncmds) "%s mask words sent=%d in %d commands"
 postcopy_discard_send_range(const char *ramblock, unsigned long start, unsigned long length) "%s:%lx/%lx"
-postcopy_ram_discard_range(void *start, size_t length) "%p,+%zx"
 postcopy_cleanup_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
 postcopy_init_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
 postcopy_nhp_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
diff --git a/migration/vmstate.c b/migration/vmstate.c
index b4d8ae982a..78b3cd48e7 100644
--- a/migration/vmstate.c
+++ b/migration/vmstate.c
@@ -52,29 +52,15 @@ static int vmstate_size(void *opaque, VMStateField *field)
     return size;
 }
 
-static void *vmstate_base_addr(void *opaque, VMStateField *field, bool alloc)
+static void vmstate_handle_alloc(void *ptr, VMStateField *field, void *opaque)
 {
-    void *base_addr = opaque + field->offset;
-
-    if (field->flags & VMS_POINTER) {
-        if (alloc && (field->flags & VMS_ALLOC)) {
-            gsize size = 0;
-            if (field->flags & VMS_VBUFFER) {
-                size = vmstate_size(opaque, field);
-            } else {
-                int n_elems = vmstate_n_elems(opaque, field);
-                if (n_elems) {
-                    size = n_elems * field->size;
-                }
-            }
-            if (size) {
-                *(void **)base_addr = g_malloc(size);
-            }
+    if (field->flags & VMS_POINTER && field->flags & VMS_ALLOC) {
+        gsize size = vmstate_size(opaque, field);
+        size *= vmstate_n_elems(opaque, field);
+        if (size) {
+            *(void **)ptr = g_malloc(size);
         }
-        base_addr = *(void **)base_addr;
     }
-
-    return base_addr;
 }
 
 int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
@@ -116,21 +102,30 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
              field->field_exists(opaque, version_id)) ||
             (!field->field_exists &&
              field->version_id <= version_id)) {
-            void *base_addr = vmstate_base_addr(opaque, field, true);
+            void *first_elem = opaque + field->offset;
             int i, n_elems = vmstate_n_elems(opaque, field);
             int size = vmstate_size(opaque, field);
 
+            vmstate_handle_alloc(first_elem, field, opaque);
+            if (field->flags & VMS_POINTER) {
+                first_elem = *(void **)first_elem;
+                assert(first_elem  || !n_elems);
+            }
             for (i = 0; i < n_elems; i++) {
-                void *addr = base_addr + size * i;
+                void *curr_elem = first_elem + size * i;
 
                 if (field->flags & VMS_ARRAY_OF_POINTER) {
-                    addr = *(void **)addr;
+                    curr_elem = *(void **)curr_elem;
                 }
-                if (field->flags & VMS_STRUCT) {
-                    ret = vmstate_load_state(f, field->vmsd, addr,
+                if (!curr_elem) {
+                    /* if null pointer check placeholder and do not follow */
+                    assert(field->flags & VMS_ARRAY_OF_POINTER);
+                    ret = vmstate_info_nullptr.get(f, curr_elem, size, NULL);
+                } else if (field->flags & VMS_STRUCT) {
+                    ret = vmstate_load_state(f, field->vmsd, curr_elem,
                                              field->vmsd->version_id);
                 } else {
-                   ret = field->info->get(f, addr, size, field);
+                    ret = field->info->get(f, curr_elem, size, field);
                 }
                 if (ret >= 0) {
                     ret = qemu_file_get_error(f);
@@ -321,26 +316,34 @@ void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
     while (field->name) {
         if (!field->field_exists ||
             field->field_exists(opaque, vmsd->version_id)) {
-            void *base_addr = vmstate_base_addr(opaque, field, false);
+            void *first_elem = opaque + field->offset;
             int i, n_elems = vmstate_n_elems(opaque, field);
             int size = vmstate_size(opaque, field);
             int64_t old_offset, written_bytes;
             QJSON *vmdesc_loop = vmdesc;
 
             trace_vmstate_save_state_loop(vmsd->name, field->name, n_elems);
+            if (field->flags & VMS_POINTER) {
+                first_elem = *(void **)first_elem;
+                assert(first_elem  || !n_elems);
+            }
             for (i = 0; i < n_elems; i++) {
-                void *addr = base_addr + size * i;
+                void *curr_elem = first_elem + size * i;
 
                 vmsd_desc_field_start(vmsd, vmdesc_loop, field, i, n_elems);
                 old_offset = qemu_ftell_fast(f);
-
                 if (field->flags & VMS_ARRAY_OF_POINTER) {
-                    addr = *(void **)addr;
+                    assert(curr_elem);
+                    curr_elem = *(void **)curr_elem;
                 }
-                if (field->flags & VMS_STRUCT) {
-                    vmstate_save_state(f, field->vmsd, addr, vmdesc_loop);
+                if (!curr_elem) {
+                    /* if null pointer write placeholder and do not follow */
+                    assert(field->flags & VMS_ARRAY_OF_POINTER);
+                    vmstate_info_nullptr.put(f, curr_elem, size, NULL, NULL);
+                } else if (field->flags & VMS_STRUCT) {
+                    vmstate_save_state(f, field->vmsd, curr_elem, vmdesc_loop);
                 } else {
-                    field->info->put(f, addr, size, field, vmdesc_loop);
+                    field->info->put(f, curr_elem, size, field, vmdesc_loop);
                 }
 
                 written_bytes = qemu_ftell_fast(f) - old_offset;
@@ -752,6 +755,34 @@ const VMStateInfo vmstate_info_uint64 = {
     .put  = put_uint64,
 };
 
+static int get_nullptr(QEMUFile *f, void *pv, size_t size, VMStateField *field)
+
+{
+    if (qemu_get_byte(f) == VMS_NULLPTR_MARKER) {
+        return  0;
+    }
+    error_report("vmstate: get_nullptr expected VMS_NULLPTR_MARKER");
+    return -EINVAL;
+}
+
+static int put_nullptr(QEMUFile *f, void *pv, size_t size,
+                        VMStateField *field, QJSON *vmdesc)
+
+{
+    if (pv == NULL) {
+        qemu_put_byte(f, VMS_NULLPTR_MARKER);
+        return 0;
+    }
+    error_report("vmstate: put_nullptr must be called with pv == NULL");
+    return -EINVAL;
+}
+
+const VMStateInfo vmstate_info_nullptr = {
+    .name = "uint64",
+    .get  = get_nullptr,
+    .put  = put_nullptr,
+};
+
 /* 64 bit unsigned int. See that the received value is the same than the one
    in the field */
 
diff --git a/monitor.c b/monitor.c
index 3cd72a9bab..b68944d93c 100644
--- a/monitor.c
+++ b/monitor.c
@@ -984,8 +984,10 @@ static void qmp_unregister_commands_hack(void)
 #ifndef TARGET_ARM
     qmp_unregister_command("query-gic-capabilities");
 #endif
-#if !defined(TARGET_S390X)
+#if !defined(TARGET_S390X) && !defined(TARGET_I386)
     qmp_unregister_command("query-cpu-model-expansion");
+#endif
+#if !defined(TARGET_S390X)
     qmp_unregister_command("query-cpu-model-baseline");
     qmp_unregister_command("query-cpu-model-comparison");
 #endif
@@ -1026,6 +1028,9 @@ int monitor_set_cpu(int cpu_index)
 CPUState *mon_get_cpu(void)
 {
     if (!cur_mon->mon_cpu) {
+        if (!first_cpu) {
+            return NULL;
+        }
         monitor_set_cpu(first_cpu->cpu_index);
     }
     cpu_synchronize_state(cur_mon->mon_cpu);
@@ -1034,17 +1039,27 @@ CPUState *mon_get_cpu(void)
 
 CPUArchState *mon_get_cpu_env(void)
 {
-    return mon_get_cpu()->env_ptr;
+    CPUState *cs = mon_get_cpu();
+
+    return cs ? cs->env_ptr : NULL;
 }
 
 int monitor_get_cpu_index(void)
 {
-    return mon_get_cpu()->cpu_index;
+    CPUState *cs = mon_get_cpu();
+
+    return cs ? cs->cpu_index : UNASSIGNED_CPU_INDEX;
 }
 
 static void hmp_info_registers(Monitor *mon, const QDict *qdict)
 {
-    cpu_dump_state(mon_get_cpu(), (FILE *)mon, monitor_fprintf, CPU_DUMP_FPU);
+    CPUState *cs = mon_get_cpu();
+
+    if (!cs) {
+        monitor_printf(mon, "No CPU available\n");
+        return;
+    }
+    cpu_dump_state(cs, (FILE *)mon, monitor_fprintf, CPU_DUMP_FPU);
 }
 
 static void hmp_info_jit(Monitor *mon, const QDict *qdict)
@@ -1077,7 +1092,13 @@ static void hmp_info_history(Monitor *mon, const QDict *qdict)
 
 static void hmp_info_cpustats(Monitor *mon, const QDict *qdict)
 {
-    cpu_dump_statistics(mon_get_cpu(), (FILE *)mon, &monitor_fprintf, 0);
+    CPUState *cs = mon_get_cpu();
+
+    if (!cs) {
+        monitor_printf(mon, "No CPU available\n");
+        return;
+    }
+    cpu_dump_statistics(cs, (FILE *)mon, &monitor_fprintf, 0);
 }
 
 static void hmp_info_trace_events(Monitor *mon, const QDict *qdict)
@@ -1236,6 +1257,12 @@ static void memory_dump(Monitor *mon, int count, int format, int wsize,
     int l, line_size, i, max_digits, len;
     uint8_t buf[16];
     uint64_t v;
+    CPUState *cs = mon_get_cpu();
+
+    if (!cs && (format == 'i' || !is_physical)) {
+        monitor_printf(mon, "Can not dump without CPU\n");
+        return;
+    }
 
     if (format == 'i') {
         int flags = 0;
@@ -1265,7 +1292,7 @@ static void memory_dump(Monitor *mon, int count, int format, int wsize,
         flags = msr_le << 16;
         flags |= env->bfd_mach;
 #endif
-        monitor_disas(mon, mon_get_cpu(), addr, count, is_physical, flags);
+        monitor_disas(mon, cs, addr, count, is_physical, flags);
         return;
     }
 
@@ -1304,7 +1331,7 @@ static void memory_dump(Monitor *mon, int count, int format, int wsize,
         if (is_physical) {
             cpu_physical_memory_read(addr, buf, l);
         } else {
-            if (cpu_memory_rw_debug(mon_get_cpu(), addr, buf, l, 0) < 0) {
+            if (cpu_memory_rw_debug(cs, addr, buf, l, 0) < 0) {
                 monitor_printf(mon, " Cannot access memory\n");
                 break;
             }
@@ -2189,11 +2216,12 @@ expr_error(Monitor *mon, const char *fmt, ...)
 static int get_monitor_def(target_long *pval, const char *name)
 {
     const MonitorDef *md = target_monitor_defs();
+    CPUState *cs = mon_get_cpu();
     void *ptr;
     uint64_t tmp = 0;
     int ret;
 
-    if (md == NULL) {
+    if (cs == NULL || md == NULL) {
         return -1;
     }
 
@@ -2220,7 +2248,7 @@ static int get_monitor_def(target_long *pval, const char *name)
         }
     }
 
-    ret = target_get_monitor_def(mon_get_cpu(), name, &tmp);
+    ret = target_get_monitor_def(cs, name, &tmp);
     if (!ret) {
         *pval = (target_long) tmp;
     }
@@ -2773,7 +2801,8 @@ static QDict *monitor_parse_arguments(Monitor *mon,
             break;
         case 'o':
             {
-                int64_t val;
+                int ret;
+                uint64_t val;
                 char *end;
 
                 while (qemu_isspace(*p)) {
@@ -2785,8 +2814,8 @@ static QDict *monitor_parse_arguments(Monitor *mon,
                         break;
                     }
                 }
-                val = qemu_strtosz(p, &end);
-                if (val < 0) {
+                ret = qemu_strtosz_MiB(p, &end, &val);
+                if (ret < 0 || val > INT64_MAX) {
                     monitor_printf(mon, "invalid size\n");
                     goto fail;
                 }
@@ -3686,12 +3715,12 @@ static QDict *qmp_check_input_obj(QObject *input_obj, Error **errp)
     int has_exec_key = 0;
     QDict *input_dict;
 
-    if (qobject_type(input_obj) != QTYPE_QDICT) {
+    input_dict = qobject_to_qdict(input_obj);
+    if (!input_dict) {
         error_setg(errp, QERR_QMP_BAD_INPUT_OBJECT, "object");
         return NULL;
     }
 
-    input_dict = qobject_to_qdict(input_obj);
 
     for (ent = qdict_first(input_dict); ent; ent = qdict_next(input_dict, ent)){
         const char *arg_name = qdict_entry_key(ent);
@@ -3735,10 +3764,11 @@ static void handle_qmp_command(JSONMessageParser *parser, GQueue *tokens)
     Error *err = NULL;
 
     req = json_parser_parse_err(tokens, NULL, &err);
-    if (err || !req || qobject_type(req) != QTYPE_QDICT) {
-        if (!err) {
-            error_setg(&err, QERR_JSON_PARSING);
-        }
+    if (!req && !err) {
+        /* json_parser_parse_err() sucks: can fail without setting @err */
+        error_setg(&err, QERR_JSON_PARSING);
+    }
+    if (err) {
         goto err_out;
     }
 
@@ -4155,10 +4185,10 @@ HotpluggableCPUList *qmp_query_hotpluggable_cpus(Error **errp)
     MachineState *ms = MACHINE(qdev_get_machine());
     MachineClass *mc = MACHINE_GET_CLASS(ms);
 
-    if (!mc->query_hotpluggable_cpus) {
+    if (!mc->has_hotpluggable_cpus) {
         error_setg(errp, QERR_FEATURE_DISABLED, "query-hotpluggable-cpus");
         return NULL;
     }
 
-    return mc->query_hotpluggable_cpus(ms);
+    return machine_query_hotpluggable_cpus(ms);
 }
diff --git a/nbd/server.c b/nbd/server.c
index ac92fa0727..924a1fe2db 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -891,9 +891,21 @@ NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size,
 {
     BlockBackend *blk;
     NBDExport *exp = g_malloc0(sizeof(NBDExport));
+    uint64_t perm;
+    int ret;
 
-    blk = blk_new();
-    blk_insert_bs(blk, bs);
+    /* Don't allow resize while the NBD server is running, otherwise we don't
+     * care what happens with the node. */
+    perm = BLK_PERM_CONSISTENT_READ;
+    if ((nbdflags & NBD_FLAG_READ_ONLY) == 0) {
+        perm |= BLK_PERM_WRITE;
+    }
+    blk = blk_new(perm, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+                        BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD);
+    ret = blk_insert_bs(blk, bs, errp);
+    if (ret < 0) {
+        goto fail;
+    }
     blk_set_enable_write_cache(blk, !writethrough);
 
     exp->refcount = 1;
diff --git a/net/net.c b/net/net.c
index fb7af3a432..0ac3b9e80c 100644
--- a/net/net.c
+++ b/net/net.c
@@ -993,47 +993,47 @@ static int net_client_init1(const void *object, bool is_netdev, Error **errp)
 
         /* Map the old options to the new flat type */
         switch (opts->type) {
-        case NET_LEGACY_OPTIONS_KIND_NONE:
+        case NET_LEGACY_OPTIONS_TYPE_NONE:
             return 0; /* nothing to do */
-        case NET_LEGACY_OPTIONS_KIND_NIC:
+        case NET_LEGACY_OPTIONS_TYPE_NIC:
             legacy.type = NET_CLIENT_DRIVER_NIC;
-            legacy.u.nic = *opts->u.nic.data;
+            legacy.u.nic = opts->u.nic;
             break;
-        case NET_LEGACY_OPTIONS_KIND_USER:
+        case NET_LEGACY_OPTIONS_TYPE_USER:
             legacy.type = NET_CLIENT_DRIVER_USER;
-            legacy.u.user = *opts->u.user.data;
+            legacy.u.user = opts->u.user;
             break;
-        case NET_LEGACY_OPTIONS_KIND_TAP:
+        case NET_LEGACY_OPTIONS_TYPE_TAP:
             legacy.type = NET_CLIENT_DRIVER_TAP;
-            legacy.u.tap = *opts->u.tap.data;
+            legacy.u.tap = opts->u.tap;
             break;
-        case NET_LEGACY_OPTIONS_KIND_L2TPV3:
+        case NET_LEGACY_OPTIONS_TYPE_L2TPV3:
             legacy.type = NET_CLIENT_DRIVER_L2TPV3;
-            legacy.u.l2tpv3 = *opts->u.l2tpv3.data;
+            legacy.u.l2tpv3 = opts->u.l2tpv3;
             break;
-        case NET_LEGACY_OPTIONS_KIND_SOCKET:
+        case NET_LEGACY_OPTIONS_TYPE_SOCKET:
             legacy.type = NET_CLIENT_DRIVER_SOCKET;
-            legacy.u.socket = *opts->u.socket.data;
+            legacy.u.socket = opts->u.socket;
             break;
-        case NET_LEGACY_OPTIONS_KIND_VDE:
+        case NET_LEGACY_OPTIONS_TYPE_VDE:
             legacy.type = NET_CLIENT_DRIVER_VDE;
-            legacy.u.vde = *opts->u.vde.data;
+            legacy.u.vde = opts->u.vde;
             break;
-        case NET_LEGACY_OPTIONS_KIND_DUMP:
+        case NET_LEGACY_OPTIONS_TYPE_DUMP:
             legacy.type = NET_CLIENT_DRIVER_DUMP;
-            legacy.u.dump = *opts->u.dump.data;
+            legacy.u.dump = opts->u.dump;
             break;
-        case NET_LEGACY_OPTIONS_KIND_BRIDGE:
+        case NET_LEGACY_OPTIONS_TYPE_BRIDGE:
             legacy.type = NET_CLIENT_DRIVER_BRIDGE;
-            legacy.u.bridge = *opts->u.bridge.data;
+            legacy.u.bridge = opts->u.bridge;
             break;
-        case NET_LEGACY_OPTIONS_KIND_NETMAP:
+        case NET_LEGACY_OPTIONS_TYPE_NETMAP:
             legacy.type = NET_CLIENT_DRIVER_NETMAP;
-            legacy.u.netmap = *opts->u.netmap.data;
+            legacy.u.netmap = opts->u.netmap;
             break;
-        case NET_LEGACY_OPTIONS_KIND_VHOST_USER:
+        case NET_LEGACY_OPTIONS_TYPE_VHOST_USER:
             legacy.type = NET_CLIENT_DRIVER_VHOST_USER;
-            legacy.u.vhost_user = *opts->u.vhost_user.data;
+            legacy.u.vhost_user = opts->u.vhost_user;
             break;
         default:
             abort();
@@ -1048,7 +1048,7 @@ static int net_client_init1(const void *object, bool is_netdev, Error **errp)
 
         /* Do not add to a vlan if it's a nic with a netdev= parameter. */
         if (netdev->type != NET_CLIENT_DRIVER_NIC ||
-            !opts->u.nic.data->has_netdev) {
+            !opts->u.nic.has_netdev) {
             peer = net_hub_add_port(net->has_vlan ? net->vlan : 0, NULL);
         }
 
diff --git a/net/vhost-user.c b/net/vhost-user.c
index 77b8110f8c..e7e63408a1 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -190,7 +190,35 @@ static gboolean net_vhost_user_watch(GIOChannel *chan, GIOCondition cond,
 
     qemu_chr_fe_disconnect(&s->chr);
 
-    return FALSE;
+    return TRUE;
+}
+
+static void net_vhost_user_event(void *opaque, int event);
+
+static void chr_closed_bh(void *opaque)
+{
+    const char *name = opaque;
+    NetClientState *ncs[MAX_QUEUE_NUM];
+    VhostUserState *s;
+    Error *err = NULL;
+    int queues;
+
+    queues = qemu_find_net_clients_except(name, ncs,
+                                          NET_CLIENT_DRIVER_NIC,
+                                          MAX_QUEUE_NUM);
+    assert(queues < MAX_QUEUE_NUM);
+
+    s = DO_UPCAST(VhostUserState, nc, ncs[0]);
+
+    qmp_set_link(name, false, &err);
+    vhost_user_stop(queues, ncs);
+
+    qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, net_vhost_user_event,
+                             opaque, NULL, true);
+
+    if (err) {
+        error_report_err(err);
+    }
 }
 
 static void net_vhost_user_event(void *opaque, int event)
@@ -212,20 +240,31 @@ static void net_vhost_user_event(void *opaque, int event)
     trace_vhost_user_event(chr->label, event);
     switch (event) {
     case CHR_EVENT_OPENED:
-        s->watch = qemu_chr_fe_add_watch(&s->chr, G_IO_HUP,
-                                         net_vhost_user_watch, s);
         if (vhost_user_start(queues, ncs, &s->chr) < 0) {
             qemu_chr_fe_disconnect(&s->chr);
             return;
         }
+        s->watch = qemu_chr_fe_add_watch(&s->chr, G_IO_HUP,
+                                         net_vhost_user_watch, s);
         qmp_set_link(name, true, &err);
         s->started = true;
         break;
     case CHR_EVENT_CLOSED:
-        qmp_set_link(name, false, &err);
-        vhost_user_stop(queues, ncs);
-        g_source_remove(s->watch);
-        s->watch = 0;
+        /* a close event may happen during a read/write, but vhost
+         * code assumes the vhost_dev remains setup, so delay the
+         * stop & clear to idle.
+         * FIXME: better handle failure in vhost code, remove bh
+         */
+        if (s->watch) {
+            AioContext *ctx = qemu_get_current_aio_context();
+
+            g_source_remove(s->watch);
+            s->watch = 0;
+            qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, NULL,
+                                     NULL, NULL, false);
+
+            aio_bh_schedule_oneshot(ctx, chr_closed_bh, opaque);
+        }
         break;
     }
 
diff --git a/numa.c b/numa.c
index 9f56be960f..e01cb547a2 100644
--- a/numa.c
+++ b/numa.c
@@ -228,8 +228,8 @@ static int parse_numa(void *opaque, QemuOpts *opts, Error **errp)
     }
 
     switch (object->type) {
-    case NUMA_OPTIONS_KIND_NODE:
-        numa_node_parse(object->u.node.data, opts, &err);
+    case NUMA_OPTIONS_TYPE_NODE:
+        numa_node_parse(&object->u.node, opts, &err);
         if (err) {
             goto end;
         }
diff --git a/pc-bios/bios-256k.bin b/pc-bios/bios-256k.bin
index 229b5af986..18666c9f2f 100644
--- a/pc-bios/bios-256k.bin
+++ b/pc-bios/bios-256k.bin
diff --git a/pc-bios/bios.bin b/pc-bios/bios.bin
index 9a9b0f0106..a394411fe5 100644
--- a/pc-bios/bios.bin
+++ b/pc-bios/bios.bin
diff --git a/pc-bios/openbios-ppc b/pc-bios/openbios-ppc
index 95f1167261..4869c9dcf3 100644
--- a/pc-bios/openbios-ppc
+++ b/pc-bios/openbios-ppc
diff --git a/pc-bios/openbios-sparc32 b/pc-bios/openbios-sparc32
index 675968ea62..aada55e094 100644
--- a/pc-bios/openbios-sparc32
+++ b/pc-bios/openbios-sparc32
diff --git a/pc-bios/openbios-sparc64 b/pc-bios/openbios-sparc64
index d4b95326fe..cf466f6a4c 100644
--- a/pc-bios/openbios-sparc64
+++ b/pc-bios/openbios-sparc64
diff --git a/pc-bios/s390-ccw.img b/pc-bios/s390-ccw.img
index cf05bf0be2..2a4adfa654 100644
--- a/pc-bios/s390-ccw.img
+++ b/pc-bios/s390-ccw.img
diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c
index 611102e3ef..b21c877b53 100644
--- a/pc-bios/s390-ccw/bootmap.c
+++ b/pc-bios/s390-ccw/bootmap.c
@@ -724,11 +724,17 @@ static void zipl_load_vscsi(void)
 
 void zipl_load(void)
 {
-    if (virtio_get_device()->is_cdrom) {
+    VDev *vdev = virtio_get_device();
+
+    if (vdev->is_cdrom) {
         ipl_iso_el_torito();
         panic("\n! Cannot IPL this ISO image !\n");
     }
 
+    if (virtio_get_device_type() == VIRTIO_ID_NET) {
+        jump_to_IPL_code(vdev->netboot_start_addr);
+    }
+
     ipl_scsi();
 
     switch (virtio_get_device_type()) {
diff --git a/pc-bios/s390-ccw/iplb.h b/pc-bios/s390-ccw/iplb.h
index 86abc56a90..890aed9ece 100644
--- a/pc-bios/s390-ccw/iplb.h
+++ b/pc-bios/s390-ccw/iplb.h
@@ -13,7 +13,8 @@
 #define IPLB_H
 
 struct IplBlockCcw {
-    uint8_t  reserved0[85];
+    uint64_t netboot_start_addr;
+    uint8_t  reserved0[77];
     uint8_t  ssid;
     uint16_t devno;
     uint8_t  vm_flags;
diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c
index 345b848752..0946766d86 100644
--- a/pc-bios/s390-ccw/main.c
+++ b/pc-bios/s390-ccw/main.c
@@ -53,6 +53,12 @@ static bool find_dev(Schib *schib, int dev_no)
         if (!virtio_is_supported(blk_schid)) {
             continue;
         }
+        /* Skip net devices since no IPLB is created and therefore no
+         * no network bootloader has been loaded
+         */
+        if (virtio_get_device_type() == VIRTIO_ID_NET && dev_no < 0) {
+            continue;
+        }
         if ((dev_no < 0) || (schib->pmcw.dev == dev_no)) {
             return true;
         }
@@ -67,6 +73,7 @@ static void virtio_setup(void)
     int ssid;
     bool found = false;
     uint16_t dev_no;
+    VDev *vdev = virtio_get_device();
 
     /*
      * We unconditionally enable mss support. In every sane configuration,
@@ -85,9 +92,6 @@ static void virtio_setup(void)
             found = find_dev(&schib, dev_no);
             break;
         case S390_IPL_TYPE_QEMU_SCSI:
-        {
-            VDev *vdev = virtio_get_device();
-
             vdev->scsi_device_selected = true;
             vdev->selected_scsi_device.channel = iplb.scsi.channel;
             vdev->selected_scsi_device.target = iplb.scsi.target;
@@ -95,7 +99,6 @@ static void virtio_setup(void)
             blk_schid.ssid = iplb.scsi.ssid & 0x3;
             found = find_dev(&schib, iplb.scsi.devno);
             break;
-        }
         default:
             panic("List-directed IPL not supported yet!\n");
         }
@@ -111,9 +114,14 @@ static void virtio_setup(void)
 
     IPL_assert(found, "No virtio device found");
 
-    virtio_setup_device(blk_schid);
+    if (virtio_get_device_type() == VIRTIO_ID_NET) {
+        sclp_print("Network boot device detected\n");
+        vdev->netboot_start_addr = iplb.ccw.netboot_start_addr;
+    } else {
+        virtio_setup_device(blk_schid);
 
-    IPL_assert(virtio_ipl_disk_is_valid(), "No valid IPL device detected");
+        IPL_assert(virtio_ipl_disk_is_valid(), "No valid IPL device detected");
+    }
 }
 
 int main(void)
diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c
index b333734955..6ee93d56db 100644
--- a/pc-bios/s390-ccw/virtio.c
+++ b/pc-bios/s390-ccw/virtio.c
@@ -585,6 +585,7 @@ bool virtio_is_supported(SubChannelId schid)
         switch (vdev.senseid.cu_model) {
         case VIRTIO_ID_BLOCK:
         case VIRTIO_ID_SCSI:
+        case VIRTIO_ID_NET:
             return true;
         }
     }
diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h
index eb35ea5faf..3388a423e5 100644
--- a/pc-bios/s390-ccw/virtio.h
+++ b/pc-bios/s390-ccw/virtio.h
@@ -276,6 +276,7 @@ struct VDev {
     uint8_t scsi_dev_heads;
     bool scsi_device_selected;
     ScsiDevice selected_scsi_device;
+    uint64_t netboot_start_addr;
 };
 typedef struct VDev VDev;
 
diff --git a/pc-bios/vgabios-cirrus.bin b/pc-bios/vgabios-cirrus.bin
index 9dadce2345..e6c42bd3c3 100644
--- a/pc-bios/vgabios-cirrus.bin
+++ b/pc-bios/vgabios-cirrus.bin
diff --git a/pc-bios/vgabios-qxl.bin b/pc-bios/vgabios-qxl.bin
index a89725c81c..915eba7c81 100644
--- a/pc-bios/vgabios-qxl.bin
+++ b/pc-bios/vgabios-qxl.bin
diff --git a/pc-bios/vgabios-stdvga.bin b/pc-bios/vgabios-stdvga.bin
index ea041412a2..40eca8c6d1 100644
--- a/pc-bios/vgabios-stdvga.bin
+++ b/pc-bios/vgabios-stdvga.bin
diff --git a/pc-bios/vgabios-virtio.bin b/pc-bios/vgabios-virtio.bin
index 71e22fc868..8b3abfa003 100644
--- a/pc-bios/vgabios-virtio.bin
+++ b/pc-bios/vgabios-virtio.bin
diff --git a/pc-bios/vgabios-vmware.bin b/pc-bios/vgabios-vmware.bin
index ad239cbfe8..6a90b945bd 100644
--- a/pc-bios/vgabios-vmware.bin
+++ b/pc-bios/vgabios-vmware.bin
diff --git a/pc-bios/vgabios.bin b/pc-bios/vgabios.bin
index 9947c2c26f..d3ed89d94b 100644
--- a/pc-bios/vgabios.bin
+++ b/pc-bios/vgabios.bin
diff --git a/qapi-schema.json b/qapi-schema.json
index e9a6364b7d..fb39d1dc11 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1427,10 +1427,23 @@
 #
 # @thread-id: ID of the underlying host thread
 #
+# @poll-max-ns: maximum polling time in ns, 0 means polling is disabled
+#               (since 2.9)
+#
+# @poll-grow: how many ns will be added to polling time, 0 means that it's not
+#             configured (since 2.9)
+#
+# @poll-shrink: how many ns will be removed from polling time, 0 means that
+#               it's not configured (since 2.9)
+#
 # Since: 2.0
 ##
 { 'struct': 'IOThreadInfo',
-  'data': {'id': 'str', 'thread-id': 'int'} }
+  'data': {'id': 'str',
+           'thread-id': 'int',
+           'poll-max-ns': 'int',
+           'poll-grow': 'int',
+           'poll-shrink': 'int' } }
 
 ##
 # @query-iothreads:
@@ -3959,6 +3972,15 @@
     'opts':  'NetLegacyOptions' } }
 
 ##
+# @NetLegacyOptionsType:
+#
+# Since: 1.2
+##
+{ 'enum': 'NetLegacyOptionsType',
+  'data': ['none', 'nic', 'user', 'tap', 'l2tpv3', 'socket', 'vde',
+           'dump', 'bridge', 'netmap', 'vhost-user'] }
+
+##
 # @NetLegacyOptions:
 #
 # Like Netdev, but for use only by the legacy command line options
@@ -3966,6 +3988,8 @@
 # Since: 1.2
 ##
 { 'union': 'NetLegacyOptions',
+  'base': { 'type': 'NetLegacyOptionsType' },
+  'discriminator': 'type',
   'data': {
     'none':     'NetdevNoneOptions',
     'nic':      'NetLegacyNicOptions',
@@ -4250,6 +4274,15 @@
 #        migration-safe, but allows tooling to get an insight and work with
 #        model details.
 #
+# Note: When a non-migration-safe CPU model is expanded in static mode, some
+# features enabled by the CPU model may be omitted, because they can't be
+# implemented by a static CPU model definition (e.g. cache info passthrough and
+# PMU passthrough in x86). If you need an accurate representation of the
+# features enabled by a non-migration-safe CPU model, use @full. If you need a
+# static representation that will keep ABI compatibility even when changing QEMU
+# version or machine-type, use @static (but keep in mind that some features may
+# be omitted).
+#
 # Since: 2.8.0
 ##
 { 'enum': 'CpuModelExpansionType',
@@ -5571,6 +5604,14 @@
             'events' : [ 'InputEvent' ] } }
 
 ##
+# @NumaOptionsType:
+#
+# Since: 2.1
+##
+{ 'enum': 'NumaOptionsType',
+  'data': [ 'node' ] }
+
+##
 # @NumaOptions:
 #
 # A discriminated record of NUMA options. (for OptsVisitor)
@@ -5578,6 +5619,8 @@
 # Since: 2.1
 ##
 { 'union': 'NumaOptions',
+  'base': { 'type': 'NumaOptionsType' },
+  'discriminator': 'type',
   'data': {
     'node': 'NumaNodeOptions' }}
 
@@ -5956,6 +5999,79 @@
 { 'command': 'xen-load-devices-state', 'data': {'filename': 'str'} }
 
 ##
+# @xen-set-replication:
+#
+# Enable or disable replication.
+#
+# @enable: true to enable, false to disable.
+#
+# @primary: true for primary or false for secondary.
+#
+# @failover: #optional true to do failover, false to stop. but cannot be
+#            specified if 'enable' is true. default value is false.
+#
+# Returns: nothing.
+#
+# Example:
+#
+# -> { "execute": "xen-set-replication",
+#      "arguments": {"enable": true, "primary": false} }
+# <- { "return": {} }
+#
+# Since: 2.9
+##
+{ 'command': 'xen-set-replication',
+  'data': { 'enable': 'bool', 'primary': 'bool', '*failover' : 'bool' } }
+
+##
+# @ReplicationStatus:
+#
+# The result format for 'query-xen-replication-status'.
+#
+# @error: true if an error happened, false if replication is normal.
+#
+# @desc: #optional the human readable error description string, when
+#        @error is 'true'.
+#
+# Since: 2.9
+##
+{ 'struct': 'ReplicationStatus',
+  'data': { 'error': 'bool', '*desc': 'str' } }
+
+##
+# @query-xen-replication-status:
+#
+# Query replication status while the vm is running.
+#
+# Returns: A @ReplicationResult object showing the status.
+#
+# Example:
+#
+# -> { "execute": "query-xen-replication-status" }
+# <- { "return": { "error": false } }
+#
+# Since: 2.9
+##
+{ 'command': 'query-xen-replication-status',
+  'returns': 'ReplicationStatus' }
+
+##
+# @xen-colo-do-checkpoint:
+#
+# Xen uses this command to notify replication to trigger a checkpoint.
+#
+# Returns: nothing.
+#
+# Example:
+#
+# -> { "execute": "xen-colo-do-checkpoint" }
+# <- { "return": {} }
+#
+# Since: 2.9
+##
+{ 'command': 'xen-colo-do-checkpoint' }
+
+##
 # @GICCapability:
 #
 # The struct describes capability for a specific GIC (Generic
@@ -6081,3 +6197,23 @@
 #
 ##
 { 'command': 'query-hotpluggable-cpus', 'returns': ['HotpluggableCPU'] }
+
+##
+# @GuidInfo:
+#
+# GUID information.
+#
+# @guid: the globally unique identifier
+#
+# Since: 2.9
+##
+{ 'struct': 'GuidInfo', 'data': {'guid': 'str'} }
+
+##
+# @query-vm-generation-id:
+#
+# Show Virtual Machine Generation ID
+#
+# Since 2.9
+##
+{ 'command': 'query-vm-generation-id', 'returns': 'GuidInfo' }
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 932f5bb3b4..bc0ccd615c 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1304,6 +1304,11 @@
 #
 # @speed:  #optional the maximum speed, in bytes per second
 #
+# @filter-node-name: #optional the node name that should be assigned to the
+#                    filter driver that the commit job inserts into the graph
+#                    above @top. If this option is not given, a node name is
+#                    autogenerated. (Since: 2.9)
+#
 # Returns: Nothing on success
 #          If commit or stream is already active on this device, DeviceInUse
 #          If @device does not exist, DeviceNotFound
@@ -1323,7 +1328,8 @@
 ##
 { 'command': 'block-commit',
   'data': { '*job-id': 'str', 'device': 'str', '*base': 'str', '*top': 'str',
-            '*backing-file': 'str', '*speed': 'int' } }
+            '*backing-file': 'str', '*speed': 'int',
+            '*filter-node-name': 'str' } }
 
 ##
 # @drive-backup:
@@ -1671,6 +1677,11 @@
 #                   default 'report' (no limitations, since this applies to
 #                   a different block device than @device).
 #
+# @filter-node-name: #optional the node name that should be assigned to the
+#                    filter driver that the mirror job inserts into the graph
+#                    above @device. If this option is not given, a node name is
+#                    autogenerated. (Since: 2.9)
+#
 # Returns: nothing on success.
 #
 # Since: 2.6
@@ -1690,7 +1701,8 @@
             'sync': 'MirrorSyncMode',
             '*speed': 'int', '*granularity': 'uint32',
             '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
-            '*on-target-error': 'BlockdevOnError' } }
+            '*on-target-error': 'BlockdevOnError',
+            '*filter-node-name': 'str' } }
 
 ##
 # @block_set_io_throttle:
@@ -2110,16 +2122,18 @@
 # @nfs: Since 2.8
 # @replication: Since 2.8
 # @ssh: Since 2.8
+# @iscsi: Since 2.9
+# @rbd: Since 2.9
 #
 # Since: 2.0
 ##
 { 'enum': 'BlockdevDriver',
   'data': [ 'archipelago', 'blkdebug', 'blkverify', 'bochs', 'cloop',
             'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom',
-            'host_device', 'http', 'https', 'luks', 'nbd', 'nfs', 'null-aio',
-            'null-co', 'parallels', 'qcow', 'qcow2', 'qed', 'quorum', 'raw',
-            'replication', 'ssh', 'vdi', 'vhdx', 'vmdk', 'vpc',
-            'vvfat' ] }
+            'host_device', 'http', 'https', 'iscsi', 'luks', 'nbd', 'nfs',
+            'null-aio', 'null-co', 'parallels', 'qcow', 'qcow2', 'qed',
+            'quorum', 'raw', 'rbd', 'replication', 'ssh', 'vdi', 'vhdx', 'vmdk',
+            'vpc', 'vvfat' ] }
 
 ##
 # @BlockdevOptionsFile:
@@ -2601,6 +2615,127 @@
             '*logfile': 'str' } }
 
 ##
+# @IscsiTransport:
+#
+# An enumeration of libiscsi transport types
+#
+# Since: 2.9
+##
+{ 'enum': 'IscsiTransport',
+  'data': [ 'tcp', 'iser' ] }
+
+##
+# @IscsiHeaderDigest:
+#
+# An enumeration of header digests supported by libiscsi
+#
+# Since: 2.9
+##
+{ 'enum': 'IscsiHeaderDigest',
+  'prefix': 'QAPI_ISCSI_HEADER_DIGEST',
+  'data': [ 'crc32c', 'none', 'crc32c-none', 'none-crc32c' ] }
+
+##
+# @BlockdevOptionsIscsi:
+#
+# @transport:       The iscsi transport type
+#
+# @portal:          The address of the iscsi portal
+#
+# @target:          The target iqn name
+#
+# @lun:             #optional LUN to connect to. Defaults to 0.
+#
+# @user:            #optional User name to log in with. If omitted, no CHAP
+#                   authentication is performed.
+#
+# @password-secret: #optional The ID of a QCryptoSecret object providing
+#                   the password for the login. This option is required if
+#                   @user is specified.
+#
+# @initiator-name:  #optional The iqn name we want to identify to the target
+#                   as. If this option is not specified, an initiator name is
+#                   generated automatically.
+#
+# @header-digest:   #optional The desired header digest. Defaults to
+#                   none-crc32c.
+#
+# @timeout:         #optional Timeout in seconds after which a request will
+#                   timeout. 0 means no timeout and is the default.
+#
+# Driver specific block device options for iscsi
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsIscsi',
+  'data': { 'transport': 'IscsiTransport',
+            'portal': 'str',
+            'target': 'str',
+            '*lun': 'int',
+            '*user': 'str',
+            '*password-secret': 'str',
+            '*initiator-name': 'str',
+            '*header-digest': 'IscsiHeaderDigest',
+            '*timeout': 'int' } }
+
+
+##
+# @RbdAuthSupport:
+#
+# An enumeration of RBD auth support
+#
+# Since: 2.9
+##
+{ 'enum': 'RbdAuthSupport',
+  'data': [ 'cephx', 'none' ] }
+
+
+##
+# @RbdAuthMethod:
+#
+# An enumeration of rados auth_supported types
+#
+# Since: 2.9
+##
+{ 'struct': 'RbdAuthMethod',
+  'data': { 'auth': 'RbdAuthSupport' } }
+
+##
+# @BlockdevOptionsRbd:
+#
+# @pool:               Ceph pool name.
+#
+# @image:              Image name in the Ceph pool.
+#
+# @conf:               #optional path to Ceph configuration file.  Values
+#                      in the configuration file will be overridden by
+#                      options specified via QAPI.
+#
+# @snapshot:           #optional Ceph snapshot name.
+#
+# @user:               #optional Ceph id name.
+#
+# @server:             #optional Monitor host address and port.  This maps
+#                      to the "mon_host" Ceph option.
+#
+# @auth-supported:     #optional Authentication supported.
+#
+# @password-secret:    #optional The ID of a QCryptoSecret object providing
+#                      the password for the login.
+#
+# Since: 2.9
+##
+{ 'struct': 'BlockdevOptionsRbd',
+  'data': { 'pool': 'str',
+            'image': 'str',
+            '*conf': 'str',
+            '*snapshot': 'str',
+            '*user': 'str',
+            '*server': ['InetSocketAddress'],
+            '*auth-supported': ['RbdAuthMethod'],
+            '*password-secret': 'str' } }
+
+##
 # @ReplicationMode:
 #
 # An enumeration of replication modes.
@@ -2786,7 +2921,7 @@
       'host_device':'BlockdevOptionsFile',
       'http':       'BlockdevOptionsCurl',
       'https':      'BlockdevOptionsCurl',
-# TODO iscsi: Wait for structured options
+      'iscsi':      'BlockdevOptionsIscsi',
       'luks':       'BlockdevOptionsLUKS',
       'nbd':        'BlockdevOptionsNbd',
       'nfs':        'BlockdevOptionsNfs',
@@ -2798,7 +2933,7 @@
       'qed':        'BlockdevOptionsGenericCOWFormat',
       'quorum':     'BlockdevOptionsQuorum',
       'raw':        'BlockdevOptionsRaw',
-# TODO rbd: Wait for structured options
+      'rbd':        'BlockdevOptionsRbd',
       'replication':'BlockdevOptionsReplication',
 # TODO sheepdog: Wait for structured options
       'ssh':        'BlockdevOptionsSsh',
@@ -2845,34 +2980,36 @@
 # 1.
 # -> { "execute": "blockdev-add",
 #      "arguments": {
-#          "options" : { "driver": "qcow2",
-#                        "file": { "driver": "file",
-#                                  "filename": "test.qcow2" } } } }
+#           "driver": "qcow2",
+#           "node-name": "test1",
+#           "file": {
+#               "driver": "file",
+#               "filename": "test.qcow2"
+#            }
+#       }
+#     }
 # <- { "return": {} }
 #
 # 2.
 # -> { "execute": "blockdev-add",
 #      "arguments": {
-#          "options": {
-#            "driver": "qcow2",
-#            "node-name": "node0",
-#            "discard": "unmap",
-#            "cache": {
-#                "direct": true,
-#                "writeback": true
+#           "driver": "qcow2",
+#           "node-name": "node0",
+#           "discard": "unmap",
+#           "cache": {
+#              "direct": true
 #            },
 #            "file": {
-#                "driver": "file",
-#                "filename": "/tmp/test.qcow2"
+#              "driver": "file",
+#              "filename": "/tmp/test.qcow2"
 #            },
 #            "backing": {
-#                "driver": "raw",
-#                "file": {
-#                    "driver": "file",
-#                    "filename": "/dev/fdset/4"
+#               "driver": "raw",
+#               "file": {
+#                  "driver": "file",
+#                  "filename": "/dev/fdset/4"
 #                }
 #            }
-#          }
 #        }
 #      }
 #
@@ -2900,14 +3037,12 @@
 #
 # -> { "execute": "blockdev-add",
 #      "arguments": {
-#          "options": {
-#              "driver": "qcow2",
-#              "node-name": "node0",
-#              "file": {
-#                  "driver": "file",
-#                  "filename": "test.qcow2"
-#              }
-#          }
+#           "driver": "qcow2",
+#           "node-name": "node0",
+#           "file": {
+#               "driver": "file",
+#               "filename": "test.qcow2"
+#           }
 #      }
 #    }
 # <- { "return": {} }
diff --git a/qapi/opts-visitor.c b/qapi/opts-visitor.c
index 1048bbc84e..a0a7c0e734 100644
--- a/qapi/opts-visitor.c
+++ b/qapi/opts-visitor.c
@@ -481,23 +481,20 @@ opts_type_size(Visitor *v, const char *name, uint64_t *obj, Error **errp)
 {
     OptsVisitor *ov = to_ov(v);
     const QemuOpt *opt;
-    int64_t val;
-    char *endptr;
+    int err;
 
     opt = lookup_scalar(ov, name, errp);
     if (!opt) {
         return;
     }
 
-    val = qemu_strtosz_suffix(opt->str ? opt->str : "", &endptr,
-                         QEMU_STRTOSZ_DEFSUFFIX_B);
-    if (val < 0 || *endptr) {
+    err = qemu_strtosz(opt->str ? opt->str : "", NULL, obj);
+    if (err < 0) {
         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, opt->name,
-                   "a size value representible as a non-negative int64");
+                   "a size value");
         return;
     }
 
-    *obj = val;
     processed(ov, name);
 }
 
diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c
index 505eb418ac..48bec2072b 100644
--- a/qapi/qmp-dispatch.c
+++ b/qapi/qmp-dispatch.c
@@ -28,14 +28,13 @@ static QDict *qmp_dispatch_check_obj(const QObject *request, Error **errp)
     bool has_exec_key = false;
     QDict *dict = NULL;
 
-    if (qobject_type(request) != QTYPE_QDICT) {
+    dict = qobject_to_qdict(request);
+    if (!dict) {
         error_setg(errp, QERR_QMP_BAD_INPUT_OBJECT,
                    "request is not a dictionary");
         return NULL;
     }
 
-    dict = qobject_to_qdict(request);
-
     for (ent = qdict_first(dict); ent;
          ent = qdict_next(dict, ent)) {
         arg_name = qdict_entry_key(ent);
diff --git a/qdev-monitor.c b/qdev-monitor.c
index 549f45f066..5f2fcdfc45 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -29,7 +29,6 @@
 #include "qemu/error-report.h"
 #include "qemu/help_option.h"
 #include "sysemu/block-backend.h"
-#include "migration/migration.h"
 
 /*
  * Aliases were a bad idea from the start.  Let's keep them
@@ -579,14 +578,6 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
         return NULL;
     }
 
-    if (only_migratable) {
-        if (dc->vmsd->unmigratable) {
-            error_setg(errp, "Device %s is not migratable, but "
-                       "--only-migratable was specified", driver);
-            return NULL;
-        }
-    }
-
     /* find bus */
     path = qemu_opt_get(opts, "bus");
     if (path != NULL) {
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index f054599a91..9c9702cc62 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -40,9 +40,9 @@ STEXI
 ETEXI
 
 DEF("convert", img_convert,
-    "convert [--object objectdef] [--image-opts] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] filename [filename2 [...]] output_filename")
+    "convert [--object objectdef] [--image-opts] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] [-m num_coroutines] [-W] filename [filename2 [...]] output_filename")
 STEXI
-@item convert [--object @var{objectdef}] [--image-opts] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename}
+@item convert [--object @var{objectdef}] [--image-opts] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] [-m @var{num_coroutines}] [-W] @var{filename} [@var{filename2} [...]] @var{output_filename}
 ETEXI
 
 DEF("dd", img_dd,
diff --git a/qemu-img.c b/qemu-img.c
index cff22e3005..98b836b030 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -156,6 +156,11 @@ static void QEMU_NORETURN help(void)
            "       kinds of errors, with a higher risk of choosing the wrong fix or\n"
            "       hiding corruption that has already occurred.\n"
            "\n"
+           "Parameters to convert subcommand:\n"
+           "  '-m' specifies how many coroutines work in parallel during the convert\n"
+           "       process (defaults to 8)\n"
+           "  '-W' allow to write to the target out of order rather than sequential\n"
+           "\n"
            "Parameters to snapshot subcommand:\n"
            "  'snapshot' is the name of the snapshot to create, apply or delete\n"
            "  '-a' applies a snapshot (revert disk to saved state)\n"
@@ -368,6 +373,21 @@ static int add_old_style_options(const char *fmt, QemuOpts *opts,
     return 0;
 }
 
+static int64_t cvtnum(const char *s)
+{
+    int err;
+    uint64_t value;
+
+    err = qemu_strtosz(s, NULL, &value);
+    if (err < 0) {
+        return err;
+    }
+    if (value > INT64_MAX) {
+        return -ERANGE;
+    }
+    return value;
+}
+
 static int img_create(int argc, char **argv)
 {
     int c;
@@ -461,10 +481,9 @@ static int img_create(int argc, char **argv)
     /* Get image size, if specified */
     if (optind < argc) {
         int64_t sval;
-        char *end;
-        sval = qemu_strtosz_suffix(argv[optind++], &end,
-                                   QEMU_STRTOSZ_DEFSUFFIX_B);
-        if (sval < 0 || *end) {
+
+        sval = cvtnum(argv[optind++]);
+        if (sval < 0) {
             if (sval == -ERANGE) {
                 error_report("Image size must be less than 8 EiB!");
             } else {
@@ -795,6 +814,8 @@ static void run_block_job(BlockJob *job, Error **errp)
 {
     AioContext *aio_context = blk_get_aio_context(job->blk);
 
+    /* FIXME In error cases, the job simply goes away and we access a dangling
+     * pointer below. */
     aio_context_acquire(aio_context);
     do {
         aio_poll(aio_context, true);
@@ -816,6 +837,7 @@ static int img_commit(int argc, char **argv)
     const char *filename, *fmt, *cache, *base;
     BlockBackend *blk;
     BlockDriverState *bs, *base_bs;
+    BlockJob *job;
     bool progress = false, quiet = false, drop = false;
     bool writethrough;
     Error *local_err = NULL;
@@ -936,8 +958,8 @@ static int img_commit(int argc, char **argv)
     aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
     commit_active_start("commit", bs, base_bs, BLOCK_JOB_DEFAULT, 0,
-                        BLOCKDEV_ON_ERROR_REPORT, common_block_job_cb, &cbi,
-                        &local_err, false);
+                        BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb,
+                        &cbi, &local_err, false);
     aio_context_release(aio_context);
     if (local_err) {
         goto done;
@@ -951,7 +973,8 @@ static int img_commit(int argc, char **argv)
         bdrv_ref(bs);
     }
 
-    run_block_job(bs->job, &local_err);
+    job = block_job_get("commit");
+    run_block_job(job, &local_err);
     if (local_err) {
         goto unref_backing;
     }
@@ -1448,48 +1471,61 @@ enum ImgConvertBlockStatus {
     BLK_BACKING_FILE,
 };
 
+#define MAX_COROUTINES 16
+
 typedef struct ImgConvertState {
     BlockBackend **src;
     int64_t *src_sectors;
-    int src_cur, src_num;
-    int64_t src_cur_offset;
+    int src_num;
     int64_t total_sectors;
     int64_t allocated_sectors;
+    int64_t allocated_done;
+    int64_t sector_num;
+    int64_t wr_offs;
     enum ImgConvertBlockStatus status;
     int64_t sector_next_status;
     BlockBackend *target;
     bool has_zero_init;
     bool compressed;
     bool target_has_backing;
+    bool wr_in_order;
     int min_sparse;
     size_t cluster_sectors;
     size_t buf_sectors;
+    int num_coroutines;
+    int running_coroutines;
+    Coroutine *co[MAX_COROUTINES];
+    int64_t wait_sector_num[MAX_COROUTINES];
+    CoMutex lock;
+    int ret;
 } ImgConvertState;
 
-static void convert_select_part(ImgConvertState *s, int64_t sector_num)
+static void convert_select_part(ImgConvertState *s, int64_t sector_num,
+                                int *src_cur, int64_t *src_cur_offset)
 {
-    assert(sector_num >= s->src_cur_offset);
-    while (sector_num - s->src_cur_offset >= s->src_sectors[s->src_cur]) {
-        s->src_cur_offset += s->src_sectors[s->src_cur];
-        s->src_cur++;
-        assert(s->src_cur < s->src_num);
+    *src_cur = 0;
+    *src_cur_offset = 0;
+    while (sector_num - *src_cur_offset >= s->src_sectors[*src_cur]) {
+        *src_cur_offset += s->src_sectors[*src_cur];
+        (*src_cur)++;
+        assert(*src_cur < s->src_num);
     }
 }
 
 static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
 {
-    int64_t ret;
-    int n;
+    int64_t ret, src_cur_offset;
+    int n, src_cur;
 
-    convert_select_part(s, sector_num);
+    convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
 
     assert(s->total_sectors > sector_num);
     n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
 
     if (s->sector_next_status <= sector_num) {
         BlockDriverState *file;
-        ret = bdrv_get_block_status(blk_bs(s->src[s->src_cur]),
-                                    sector_num - s->src_cur_offset,
+        ret = bdrv_get_block_status(blk_bs(s->src[src_cur]),
+                                    sector_num - src_cur_offset,
                                     n, &n, &file);
         if (ret < 0) {
             return ret;
@@ -1505,8 +1541,8 @@ static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
             /* Check block status of the backing file chain to avoid
              * needlessly reading zeroes and limiting the iteration to the
              * buffer size */
-            ret = bdrv_get_block_status_above(blk_bs(s->src[s->src_cur]), NULL,
-                                              sector_num - s->src_cur_offset,
+            ret = bdrv_get_block_status_above(blk_bs(s->src[src_cur]), NULL,
+                                              sector_num - src_cur_offset,
                                               n, &n, &file);
             if (ret < 0) {
                 return ret;
@@ -1544,28 +1580,34 @@ static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
     return n;
 }
 
-static int convert_read(ImgConvertState *s, int64_t sector_num, int nb_sectors,
-                        uint8_t *buf)
+static int coroutine_fn convert_co_read(ImgConvertState *s, int64_t sector_num,
+                                        int nb_sectors, uint8_t *buf)
 {
-    int n;
-    int ret;
+    int n, ret;
+    QEMUIOVector qiov;
+    struct iovec iov;
 
     assert(nb_sectors <= s->buf_sectors);
     while (nb_sectors > 0) {
         BlockBackend *blk;
-        int64_t bs_sectors;
+        int src_cur;
+        int64_t bs_sectors, src_cur_offset;
 
         /* In the case of compression with multiple source files, we can get a
          * nb_sectors that spreads into the next part. So we must be able to
          * read across multiple BDSes for one convert_read() call. */
-        convert_select_part(s, sector_num);
-        blk = s->src[s->src_cur];
-        bs_sectors = s->src_sectors[s->src_cur];
-
-        n = MIN(nb_sectors, bs_sectors - (sector_num - s->src_cur_offset));
-        ret = blk_pread(blk,
-                        (sector_num - s->src_cur_offset) << BDRV_SECTOR_BITS,
-                        buf, n << BDRV_SECTOR_BITS);
+        convert_select_part(s, sector_num, &src_cur, &src_cur_offset);
+        blk = s->src[src_cur];
+        bs_sectors = s->src_sectors[src_cur];
+
+        n = MIN(nb_sectors, bs_sectors - (sector_num - src_cur_offset));
+        iov.iov_base = buf;
+        iov.iov_len = n << BDRV_SECTOR_BITS;
+        qemu_iovec_init_external(&qiov, &iov, 1);
+
+        ret = blk_co_preadv(
+                blk, (sector_num - src_cur_offset) << BDRV_SECTOR_BITS,
+                n << BDRV_SECTOR_BITS, &qiov, 0);
         if (ret < 0) {
             return ret;
         }
@@ -1578,15 +1620,18 @@ static int convert_read(ImgConvertState *s, int64_t sector_num, int nb_sectors,
     return 0;
 }
 
-static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
-                         const uint8_t *buf)
+
+static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num,
+                                         int nb_sectors, uint8_t *buf,
+                                         enum ImgConvertBlockStatus status)
 {
     int ret;
+    QEMUIOVector qiov;
+    struct iovec iov;
 
     while (nb_sectors > 0) {
         int n = nb_sectors;
-
-        switch (s->status) {
+        switch (status) {
         case BLK_BACKING_FILE:
             /* If we have a backing file, leave clusters unallocated that are
              * unallocated in the source image, so that the backing file is
@@ -1607,9 +1652,13 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
                     break;
                 }
 
-                ret = blk_pwrite_compressed(s->target,
-                                            sector_num << BDRV_SECTOR_BITS,
-                                            buf, n << BDRV_SECTOR_BITS);
+                iov.iov_base = buf;
+                iov.iov_len = n << BDRV_SECTOR_BITS;
+                qemu_iovec_init_external(&qiov, &iov, 1);
+
+                ret = blk_co_pwritev(s->target, sector_num << BDRV_SECTOR_BITS,
+                                     n << BDRV_SECTOR_BITS, &qiov,
+                                     BDRV_REQ_WRITE_COMPRESSED);
                 if (ret < 0) {
                     return ret;
                 }
@@ -1622,8 +1671,12 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
             if (!s->min_sparse ||
                 is_allocated_sectors_min(buf, n, &n, s->min_sparse))
             {
-                ret = blk_pwrite(s->target, sector_num << BDRV_SECTOR_BITS,
-                                 buf, n << BDRV_SECTOR_BITS, 0);
+                iov.iov_base = buf;
+                iov.iov_len = n << BDRV_SECTOR_BITS;
+                qemu_iovec_init_external(&qiov, &iov, 1);
+
+                ret = blk_co_pwritev(s->target, sector_num << BDRV_SECTOR_BITS,
+                                     n << BDRV_SECTOR_BITS, &qiov, 0);
                 if (ret < 0) {
                     return ret;
                 }
@@ -1635,8 +1688,9 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
             if (s->has_zero_init) {
                 break;
             }
-            ret = blk_pwrite_zeroes(s->target, sector_num << BDRV_SECTOR_BITS,
-                                    n << BDRV_SECTOR_BITS, 0);
+            ret = blk_co_pwrite_zeroes(s->target,
+                                       sector_num << BDRV_SECTOR_BITS,
+                                       n << BDRV_SECTOR_BITS, 0);
             if (ret < 0) {
                 return ret;
             }
@@ -1651,12 +1705,122 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
     return 0;
 }
 
-static int convert_do_copy(ImgConvertState *s)
+static void coroutine_fn convert_co_do_copy(void *opaque)
 {
+    ImgConvertState *s = opaque;
     uint8_t *buf = NULL;
-    int64_t sector_num, allocated_done;
-    int ret;
-    int n;
+    int ret, i;
+    int index = -1;
+
+    for (i = 0; i < s->num_coroutines; i++) {
+        if (s->co[i] == qemu_coroutine_self()) {
+            index = i;
+            break;
+        }
+    }
+    assert(index >= 0);
+
+    s->running_coroutines++;
+    buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
+
+    while (1) {
+        int n;
+        int64_t sector_num;
+        enum ImgConvertBlockStatus status;
+
+        qemu_co_mutex_lock(&s->lock);
+        if (s->ret != -EINPROGRESS || s->sector_num >= s->total_sectors) {
+            qemu_co_mutex_unlock(&s->lock);
+            goto out;
+        }
+        n = convert_iteration_sectors(s, s->sector_num);
+        if (n < 0) {
+            qemu_co_mutex_unlock(&s->lock);
+            s->ret = n;
+            goto out;
+        }
+        /* save current sector and allocation status to local variables */
+        sector_num = s->sector_num;
+        status = s->status;
+        if (!s->min_sparse && s->status == BLK_ZERO) {
+            n = MIN(n, s->buf_sectors);
+        }
+        /* increment global sector counter so that other coroutines can
+         * already continue reading beyond this request */
+        s->sector_num += n;
+        qemu_co_mutex_unlock(&s->lock);
+
+        if (status == BLK_DATA || (!s->min_sparse && status == BLK_ZERO)) {
+            s->allocated_done += n;
+            qemu_progress_print(100.0 * s->allocated_done /
+                                        s->allocated_sectors, 0);
+        }
+
+        if (status == BLK_DATA) {
+            ret = convert_co_read(s, sector_num, n, buf);
+            if (ret < 0) {
+                error_report("error while reading sector %" PRId64
+                             ": %s", sector_num, strerror(-ret));
+                s->ret = ret;
+                goto out;
+            }
+        } else if (!s->min_sparse && status == BLK_ZERO) {
+            status = BLK_DATA;
+            memset(buf, 0x00, n * BDRV_SECTOR_SIZE);
+        }
+
+        if (s->wr_in_order) {
+            /* keep writes in order */
+            while (s->wr_offs != sector_num) {
+                if (s->ret != -EINPROGRESS) {
+                    goto out;
+                }
+                s->wait_sector_num[index] = sector_num;
+                qemu_coroutine_yield();
+            }
+            s->wait_sector_num[index] = -1;
+        }
+
+        ret = convert_co_write(s, sector_num, n, buf, status);
+        if (ret < 0) {
+            error_report("error while writing sector %" PRId64
+                         ": %s", sector_num, strerror(-ret));
+            s->ret = ret;
+            goto out;
+        }
+
+        if (s->wr_in_order) {
+            /* reenter the coroutine that might have waited
+             * for this write to complete */
+            s->wr_offs = sector_num + n;
+            for (i = 0; i < s->num_coroutines; i++) {
+                if (s->co[i] && s->wait_sector_num[i] == s->wr_offs) {
+                    /*
+                     * A -> B -> A cannot occur because A has
+                     * s->wait_sector_num[i] == -1 during A -> B.  Therefore
+                     * B will never enter A during this time window.
+                     */
+                    qemu_coroutine_enter(s->co[i]);
+                    break;
+                }
+            }
+        }
+    }
+
+out:
+    qemu_vfree(buf);
+    s->co[index] = NULL;
+    s->running_coroutines--;
+    if (!s->running_coroutines && s->ret == -EINPROGRESS) {
+        /* the convert job finished successfully */
+        s->ret = 0;
+    }
+}
+
+static int convert_do_copy(ImgConvertState *s)
+{
+    int ret, i, n;
+    int64_t sector_num = 0;
 
     /* Check whether we have zero initialisation or can get it efficiently */
     s->has_zero_init = s->min_sparse && !s->target_has_backing
@@ -1677,21 +1841,15 @@ static int convert_do_copy(ImgConvertState *s)
     if (s->compressed) {
         if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) {
             error_report("invalid cluster size");
-            ret = -EINVAL;
-            goto fail;
+            return -EINVAL;
         }
         s->buf_sectors = s->cluster_sectors;
     }
-    buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
 
-    /* Calculate allocated sectors for progress */
-    s->allocated_sectors = 0;
-    sector_num = 0;
     while (sector_num < s->total_sectors) {
         n = convert_iteration_sectors(s, sector_num);
         if (n < 0) {
-            ret = n;
-            goto fail;
+            return n;
         }
         if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
         {
@@ -1701,61 +1859,29 @@ static int convert_do_copy(ImgConvertState *s)
     }
 
     /* Do the copy */
-    s->src_cur = 0;
-    s->src_cur_offset = 0;
     s->sector_next_status = 0;
+    s->ret = -EINPROGRESS;
 
-    sector_num = 0;
-    allocated_done = 0;
-
-    while (sector_num < s->total_sectors) {
-        n = convert_iteration_sectors(s, sector_num);
-        if (n < 0) {
-            ret = n;
-            goto fail;
-        }
-        if (s->status == BLK_DATA || (!s->min_sparse && s->status == BLK_ZERO))
-        {
-            allocated_done += n;
-            qemu_progress_print(100.0 * allocated_done / s->allocated_sectors,
-                                0);
-        }
-
-        if (s->status == BLK_DATA) {
-            ret = convert_read(s, sector_num, n, buf);
-            if (ret < 0) {
-                error_report("error while reading sector %" PRId64
-                             ": %s", sector_num, strerror(-ret));
-                goto fail;
-            }
-        } else if (!s->min_sparse && s->status == BLK_ZERO) {
-            n = MIN(n, s->buf_sectors);
-            memset(buf, 0, n * BDRV_SECTOR_SIZE);
-            s->status = BLK_DATA;
-        }
-
-        ret = convert_write(s, sector_num, n, buf);
-        if (ret < 0) {
-            error_report("error while writing sector %" PRId64
-                         ": %s", sector_num, strerror(-ret));
-            goto fail;
-        }
+    qemu_co_mutex_init(&s->lock);
+    for (i = 0; i < s->num_coroutines; i++) {
+        s->co[i] = qemu_coroutine_create(convert_co_do_copy, s);
+        s->wait_sector_num[i] = -1;
+        qemu_coroutine_enter(s->co[i]);
+    }
 
-        sector_num += n;
+    while (s->ret == -EINPROGRESS) {
+        main_loop_wait(false);
     }
 
-    if (s->compressed) {
+    if (s->compressed && !s->ret) {
         /* signal EOF to align */
         ret = blk_pwrite_compressed(s->target, 0, NULL, 0);
         if (ret < 0) {
-            goto fail;
+            return ret;
         }
     }
 
-    ret = 0;
-fail:
-    qemu_vfree(buf);
-    return ret;
+    return s->ret;
 }
 
 static int img_convert(int argc, char **argv)
@@ -1783,6 +1909,8 @@ static int img_convert(int argc, char **argv)
     QemuOpts *sn_opts = NULL;
     ImgConvertState state;
     bool image_opts = false;
+    bool wr_in_order = true;
+    long num_coroutines = 8;
 
     fmt = NULL;
     out_fmt = "raw";
@@ -1798,7 +1926,7 @@ static int img_convert(int argc, char **argv)
             {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
             {0, 0, 0, 0}
         };
-        c = getopt_long(argc, argv, "hf:O:B:ce6o:s:l:S:pt:T:qn",
+        c = getopt_long(argc, argv, "hf:O:B:ce6o:s:l:S:pt:T:qnm:W",
                         long_options, NULL);
         if (c == -1) {
             break;
@@ -1864,9 +1992,9 @@ static int img_convert(int argc, char **argv)
         case 'S':
         {
             int64_t sval;
-            char *end;
-            sval = qemu_strtosz_suffix(optarg, &end, QEMU_STRTOSZ_DEFSUFFIX_B);
-            if (sval < 0 || *end) {
+
+            sval = cvtnum(optarg);
+            if (sval < 0) {
                 error_report("Invalid minimum zero buffer size for sparse output specified");
                 ret = -1;
                 goto fail_getopt;
@@ -1890,6 +2018,18 @@ static int img_convert(int argc, char **argv)
         case 'n':
             skip_create = 1;
             break;
+        case 'm':
+            if (qemu_strtol(optarg, NULL, 0, &num_coroutines) ||
+                num_coroutines < 1 || num_coroutines > MAX_COROUTINES) {
+                error_report("Invalid number of coroutines. Allowed number of"
+                             " coroutines is between 1 and %d", MAX_COROUTINES);
+                ret = -1;
+                goto fail_getopt;
+            }
+            break;
+        case 'W':
+            wr_in_order = false;
+            break;
         case OPTION_OBJECT:
             opts = qemu_opts_parse_noisily(&qemu_object_opts,
                                            optarg, true);
@@ -1909,6 +2049,12 @@ static int img_convert(int argc, char **argv)
         goto fail_getopt;
     }
 
+    if (!wr_in_order && compress) {
+        error_report("Out of order write and compress are mutually exclusive");
+        ret = -1;
+        goto fail_getopt;
+    }
+
     /* Initialize before goto out */
     if (quiet) {
         progress = 0;
@@ -2149,6 +2295,8 @@ static int img_convert(int argc, char **argv)
         .min_sparse         = min_sparse,
         .cluster_sectors    = cluster_sectors,
         .buf_sectors        = bufsectors,
+        .wr_in_order        = wr_in_order,
+        .num_coroutines     = num_coroutines,
     };
     ret = convert_do_copy(&state);
 
@@ -3275,7 +3423,7 @@ static int img_resize(int argc, char **argv)
     qemu_opts_del(param);
 
     blk = img_open(image_opts, filename, fmt,
-                   BDRV_O_RDWR, false, quiet);
+                   BDRV_O_RDWR | BDRV_O_RESIZE, false, quiet);
     if (!blk) {
         ret = -1;
         goto out;
@@ -3651,11 +3799,8 @@ static int img_bench(int argc, char **argv)
             break;
         case 'o':
         {
-            char *end;
-            errno = 0;
-            offset = qemu_strtosz_suffix(optarg, &end,
-                                         QEMU_STRTOSZ_DEFSUFFIX_B);
-            if (offset < 0|| *end) {
+            offset = cvtnum(optarg);
+            if (offset < 0) {
                 error_report("Invalid offset specified");
                 return 1;
             }
@@ -3668,10 +3813,9 @@ static int img_bench(int argc, char **argv)
         case 's':
         {
             int64_t sval;
-            char *end;
 
-            sval = qemu_strtosz_suffix(optarg, &end, QEMU_STRTOSZ_DEFSUFFIX_B);
-            if (sval < 0 || sval > INT_MAX || *end) {
+            sval = cvtnum(optarg);
+            if (sval < 0 || sval > INT_MAX) {
                 error_report("Invalid buffer size specified");
                 return 1;
             }
@@ -3682,10 +3826,9 @@ static int img_bench(int argc, char **argv)
         case 'S':
         {
             int64_t sval;
-            char *end;
 
-            sval = qemu_strtosz_suffix(optarg, &end, QEMU_STRTOSZ_DEFSUFFIX_B);
-            if (sval < 0 || sval > INT_MAX || *end) {
+            sval = cvtnum(optarg);
+            if (sval < 0 || sval > INT_MAX) {
                 error_report("Invalid step size specified");
                 return 1;
             }
@@ -3844,12 +3987,11 @@ static int img_dd_bs(const char *arg,
                      struct DdIo *in, struct DdIo *out,
                      struct DdInfo *dd)
 {
-    char *end;
     int64_t res;
 
-    res = qemu_strtosz_suffix(arg, &end, QEMU_STRTOSZ_DEFSUFFIX_B);
+    res = cvtnum(arg);
 
-    if (res <= 0 || res > INT_MAX || *end) {
+    if (res <= 0 || res > INT_MAX) {
         error_report("invalid number: '%s'", arg);
         return 1;
     }
@@ -3862,11 +4004,9 @@ static int img_dd_count(const char *arg,
                         struct DdIo *in, struct DdIo *out,
                         struct DdInfo *dd)
 {
-    char *end;
-
-    dd->count = qemu_strtosz_suffix(arg, &end, QEMU_STRTOSZ_DEFSUFFIX_B);
+    dd->count = cvtnum(arg);
 
-    if (dd->count < 0 || *end) {
+    if (dd->count < 0) {
         error_report("invalid number: '%s'", arg);
         return 1;
     }
@@ -3896,11 +4036,9 @@ static int img_dd_skip(const char *arg,
                        struct DdIo *in, struct DdIo *out,
                        struct DdInfo *dd)
 {
-    char *end;
-
-    in->offset = qemu_strtosz_suffix(arg, &end, QEMU_STRTOSZ_DEFSUFFIX_B);
+    in->offset = cvtnum(arg);
 
-    if (in->offset < 0 || *end) {
+    if (in->offset < 0) {
         error_report("invalid number: '%s'", arg);
         return 1;
     }
diff --git a/qemu-img.texi b/qemu-img.texi
index 174aae38b7..c81db3e81c 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -137,6 +137,12 @@ Parameters to convert subcommand:
 
 @item -n
 Skip the creation of the target volume
+@item -m
+Number of parallel coroutines for the convert process
+@item -W
+Allow out-of-order writes to the destination. This option improves performance,
+but is only recommended for preallocated devices like host devices or other
+raw block devices.
 @end table
 
 Parameters to dd subcommand:
@@ -296,7 +302,7 @@ Error on reading data
 
 @end table
 
-@item convert [-c] [-p] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename}
+@item convert [-c] [-p] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-m @var{num_coroutines}] [-W] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename}
 
 Convert the disk image @var{filename} or a snapshot @var{snapshot_param}(@var{snapshot_id_or_name} is deprecated)
 to disk image @var{output_filename} using format @var{output_fmt}. It can be optionally compressed (@code{-c}
@@ -326,6 +332,14 @@ skipped. This is useful for formats such as @code{rbd} if the target
 volume has already been created with site specific options that cannot
 be supplied through qemu-img.
 
+Out of order writes can be enabled with @code{-W} to improve performance.
+This is only recommended for preallocated devices like host devices or other
+raw block devices. Out of order write does not work in combination with
+creating compressed images.
+
+@var{num_coroutines} specifies how many coroutines work in parallel during
+the convert process (defaults to 8).
+
 @item dd [-f @var{fmt}] [-O @var{output_fmt}] [bs=@var{block_size}] [count=@var{blocks}] [skip=@var{blocks}] if=@var{input} of=@var{output}
 
 Dd copies from @var{input} file to @var{output} file converting it from
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index e415b03cd0..2c48f9ce1a 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -83,6 +83,29 @@ static int command(BlockBackend *blk, const cmdinfo_t *ct, int argc,
         }
         return 0;
     }
+
+    /* Request additional permissions if necessary for this command. The caller
+     * is responsible for restoring the original permissions afterwards if this
+     * is what it wants. */
+    if (ct->perm && blk_is_available(blk)) {
+        uint64_t orig_perm, orig_shared_perm;
+        blk_get_perm(blk, &orig_perm, &orig_shared_perm);
+
+        if (ct->perm & ~orig_perm) {
+            uint64_t new_perm;
+            Error *local_err = NULL;
+            int ret;
+
+            new_perm = orig_perm | ct->perm;
+
+            ret = blk_set_perm(blk, new_perm, orig_shared_perm, &local_err);
+            if (ret < 0) {
+                error_report_err(local_err);
+                return 0;
+            }
+        }
+    }
+
     optind = 0;
     return ct->cfunc(blk, argc, argv);
 }
@@ -137,15 +160,17 @@ static char **breakline(char *input, int *count)
 
 static int64_t cvtnum(const char *s)
 {
-    char *end;
-    int64_t ret;
+    int err;
+    uint64_t value;
 
-    ret = qemu_strtosz_suffix(s, &end, QEMU_STRTOSZ_DEFSUFFIX_B);
-    if (*end != '\0') {
-        /* Detritus at the end of the string */
-        return -EINVAL;
+    err = qemu_strtosz(s, NULL, &value);
+    if (err < 0) {
+        return err;
+    }
+    if (value > INT64_MAX) {
+        return -ERANGE;
     }
-    return ret;
+    return value;
 }
 
 static void print_cvtnum_err(int64_t rc, const char *arg)
@@ -916,6 +941,7 @@ static const cmdinfo_t write_cmd = {
     .name       = "write",
     .altname    = "w",
     .cfunc      = write_f,
+    .perm       = BLK_PERM_WRITE,
     .argmin     = 2,
     .argmax     = -1,
     .args       = "[-bcCfquz] [-P pattern] off len",
@@ -1091,6 +1117,7 @@ static int writev_f(BlockBackend *blk, int argc, char **argv);
 static const cmdinfo_t writev_cmd = {
     .name       = "writev",
     .cfunc      = writev_f,
+    .perm       = BLK_PERM_WRITE,
     .argmin     = 2,
     .argmax     = -1,
     .args       = "[-Cfq] [-P pattern] off len [len..]",
@@ -1390,6 +1417,7 @@ static int aio_write_f(BlockBackend *blk, int argc, char **argv);
 static const cmdinfo_t aio_write_cmd = {
     .name       = "aio_write",
     .cfunc      = aio_write_f,
+    .perm       = BLK_PERM_WRITE,
     .argmin     = 2,
     .argmax     = -1,
     .args       = "[-Cfiquz] [-P pattern] off len [len..]",
@@ -1554,6 +1582,7 @@ static const cmdinfo_t truncate_cmd = {
     .name       = "truncate",
     .altname    = "t",
     .cfunc      = truncate_f,
+    .perm       = BLK_PERM_WRITE | BLK_PERM_RESIZE,
     .argmin     = 1,
     .argmax     = 1,
     .args       = "off",
@@ -1651,6 +1680,7 @@ static const cmdinfo_t discard_cmd = {
     .name       = "discard",
     .altname    = "d",
     .cfunc      = discard_f,
+    .perm       = BLK_PERM_WRITE,
     .argmin     = 2,
     .argmax     = -1,
     .args       = "[-Cq] off len",
diff --git a/qemu-options.hx b/qemu-options.hx
index 1e815ef520..c85f77d1d8 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -95,6 +95,26 @@ STEXI
 Select CPU model (@code{-cpu help} for list and additional feature selection)
 ETEXI
 
+DEF("accel", HAS_ARG, QEMU_OPTION_accel,
+    "-accel [accel=]accelerator[,thread=single|multi]\n"
+    "               select accelerator ('-accel help for list')\n"
+    "               thread=single|multi (enable multi-threaded TCG)", QEMU_ARCH_ALL)
+STEXI
+@item -accel @var{name}[,prop=@var{value}[,...]]
+@findex -accel
+This is used to enable an accelerator. Depending on the target architecture,
+kvm, xen, or tcg can be available. By default, tcg is used. If there is more
+than one accelerator specified, the next one is used if the previous one fails
+to initialize.
+@table @option
+@item thread=single|multi
+Controls number of TCG threads. When the TCG is multi-threaded there will be one
+thread per vCPU therefor taking advantage of additional host cores. The default
+is to enable multi-threading where both the back-end and front-ends support it and
+no incompatible TCG features have been enabled (e.g. icount/replay).
+@end table
+ETEXI
+
 DEF("smp", HAS_ARG, QEMU_OPTION_smp,
     "-smp [cpus=]n[,maxcpus=cpus][,cores=cores][,threads=threads][,sockets=sockets]\n"
     "                set the number of CPUs to 'n' [default=1]\n"
@@ -724,7 +744,12 @@ ETEXI
 
 DEF("fsdev", HAS_ARG, QEMU_OPTION_fsdev,
     "-fsdev fsdriver,id=id[,path=path,][security_model={mapped-xattr|mapped-file|passthrough|none}]\n"
-    " [,writeout=immediate][,readonly][,socket=socket|sock_fd=sock_fd]\n",
+    " [,writeout=immediate][,readonly][,socket=socket|sock_fd=sock_fd]\n"
+    " [[,throttling.bps-total=b]|[[,throttling.bps-read=r][,throttling.bps-write=w]]]\n"
+    " [[,throttling.iops-total=i]|[[,throttling.iops-read=r][,throttling.iops-write=w]]]\n"
+    " [[,throttling.bps-total-max=bm]|[[,throttling.bps-read-max=rm][,throttling.bps-write-max=wm]]]\n"
+    " [[,throttling.iops-total-max=im]|[[,throttling.iops-read-max=irm][,throttling.iops-write-max=iwm]]]\n"
+    " [[,throttling.iops-size=is]]\n",
     QEMU_ARCH_ALL)
 
 STEXI
@@ -2126,7 +2151,7 @@ Example:
 @example
 qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \
      -numa node,memdev=mem \
-     -chardev socket,path=/path/to/socket \
+     -chardev socket,id=chr0,path=/path/to/socket \
      -netdev type=vhost-user,id=net0,chardev=chr0 \
      -device virtio-net-pci,netdev=net0
 @end example
@@ -2586,7 +2611,7 @@ Example
 qemu-system-i386 --drive file=sheepdog://192.0.2.1:30000/MyVirtualMachine
 @end example
 
-See also @url{http://http://www.osrg.net/sheepdog/}.
+See also @url{https://sheepdog.github.io/sheepdog/}.
 
 @item GlusterFS
 GlusterFS is a user space distributed file system.
diff --git a/qobject/qdict.c b/qobject/qdict.c
index 197b0fbd47..291eef1a19 100644
--- a/qobject/qdict.c
+++ b/qobject/qdict.c
@@ -178,20 +178,6 @@ size_t qdict_size(const QDict *qdict)
 }
 
 /**
- * qdict_get_obj(): Get a QObject of a specific type
- */
-static QObject *qdict_get_obj(const QDict *qdict, const char *key, QType type)
-{
-    QObject *obj;
-
-    obj = qdict_get(qdict, key);
-    assert(obj != NULL);
-    assert(qobject_type(obj) == type);
-
-    return obj;
-}
-
-/**
  * qdict_get_double(): Get an number mapped by 'key'
  *
  * This function assumes that 'key' exists and it stores a
@@ -241,25 +227,15 @@ bool qdict_get_bool(const QDict *qdict, const char *key)
 }
 
 /**
- * qdict_get_qlist(): Get the QList mapped by 'key'
- *
- * This function assumes that 'key' exists and it stores a
- * QList object.
- *
- * Return QList mapped by 'key'.
+ * qdict_get_qlist(): If @qdict maps @key to a QList, return it, else NULL.
  */
 QList *qdict_get_qlist(const QDict *qdict, const char *key)
 {
-    return qobject_to_qlist(qdict_get_obj(qdict, key, QTYPE_QLIST));
+    return qobject_to_qlist(qdict_get(qdict, key));
 }
 
 /**
- * qdict_get_qdict(): Get the QDict mapped by 'key'
- *
- * This function assumes that 'key' exists and it stores a
- * QDict object.
- *
- * Return QDict mapped by 'key'.
+ * qdict_get_qdict(): If @qdict maps @key to a QDict, return it, else NULL.
  */
 QDict *qdict_get_qdict(const QDict *qdict, const char *key)
 {
@@ -767,7 +743,7 @@ static int qdict_is_list(QDict *maybe_list, Error **errp)
     for (ent = qdict_first(maybe_list); ent != NULL;
          ent = qdict_next(maybe_list, ent)) {
 
-        if (qemu_strtoll(ent->key, NULL, 10, &val) == 0) {
+        if (qemu_strtoi64(ent->key, NULL, 10, &val) == 0) {
             if (is_list == -1) {
                 is_list = 1;
             } else if (!is_list) {
diff --git a/qom/cpu.c b/qom/cpu.c
index 7e005af37a..f02e9c0fae 100644
--- a/qom/cpu.c
+++ b/qom/cpu.c
@@ -113,9 +113,19 @@ static void cpu_common_get_memory_mapping(CPUState *cpu,
     error_setg(errp, "Obtaining memory mappings is unsupported on this CPU.");
 }
 
+/* Resetting the IRQ comes from across the code base so we take the
+ * BQL here if we need to.  cpu_interrupt assumes it is held.*/
 void cpu_reset_interrupt(CPUState *cpu, int mask)
 {
+    bool need_lock = !qemu_mutex_iothread_locked();
+
+    if (need_lock) {
+        qemu_mutex_lock_iothread();
+    }
     cpu->interrupt_request &= ~mask;
+    if (need_lock) {
+        qemu_mutex_unlock_iothread();
+    }
 }
 
 void cpu_exit(CPUState *cpu)
diff --git a/qtest.c b/qtest.c
index 1446719e8d..5aa6636ca8 100644
--- a/qtest.c
+++ b/qtest.c
@@ -240,6 +240,7 @@ static void GCC_FMT_ATTR(2, 3) qtest_sendf(CharBackend *chr,
     va_start(ap, fmt);
     buffer = g_strdup_vprintf(fmt, ap);
     qtest_send(chr, buffer);
+    g_free(buffer);
     va_end(ap);
 }
 
@@ -373,8 +374,8 @@ static void qtest_process_command(CharBackend *chr, gchar **words)
         uint64_t value;
 
         g_assert(words[1] && words[2]);
-        g_assert(qemu_strtoull(words[1], NULL, 0, &addr) == 0);
-        g_assert(qemu_strtoull(words[2], NULL, 0, &value) == 0);
+        g_assert(qemu_strtou64(words[1], NULL, 0, &addr) == 0);
+        g_assert(qemu_strtou64(words[2], NULL, 0, &value) == 0);
 
         if (words[0][5] == 'b') {
             uint8_t data = value;
@@ -402,7 +403,7 @@ static void qtest_process_command(CharBackend *chr, gchar **words)
         uint64_t value = UINT64_C(-1);
 
         g_assert(words[1]);
-        g_assert(qemu_strtoull(words[1], NULL, 0, &addr) == 0);
+        g_assert(qemu_strtou64(words[1], NULL, 0, &addr) == 0);
 
         if (words[0][4] == 'b') {
             uint8_t data;
@@ -428,8 +429,8 @@ static void qtest_process_command(CharBackend *chr, gchar **words)
         char *enc;
 
         g_assert(words[1] && words[2]);
-        g_assert(qemu_strtoull(words[1], NULL, 0, &addr) == 0);
-        g_assert(qemu_strtoull(words[2], NULL, 0, &len) == 0);
+        g_assert(qemu_strtou64(words[1], NULL, 0, &addr) == 0);
+        g_assert(qemu_strtou64(words[2], NULL, 0, &len) == 0);
         /* We'd send garbage to libqtest if len is 0 */
         g_assert(len);
 
@@ -452,8 +453,8 @@ static void qtest_process_command(CharBackend *chr, gchar **words)
         gchar *b64_data;
 
         g_assert(words[1] && words[2]);
-        g_assert(qemu_strtoull(words[1], NULL, 0, &addr) == 0);
-        g_assert(qemu_strtoull(words[2], NULL, 0, &len) == 0);
+        g_assert(qemu_strtou64(words[1], NULL, 0, &addr) == 0);
+        g_assert(qemu_strtou64(words[2], NULL, 0, &len) == 0);
 
         data = g_malloc(len);
         cpu_physical_memory_read(addr, data, len);
@@ -469,8 +470,8 @@ static void qtest_process_command(CharBackend *chr, gchar **words)
         size_t data_len;
 
         g_assert(words[1] && words[2] && words[3]);
-        g_assert(qemu_strtoull(words[1], NULL, 0, &addr) == 0);
-        g_assert(qemu_strtoull(words[2], NULL, 0, &len) == 0);
+        g_assert(qemu_strtou64(words[1], NULL, 0, &addr) == 0);
+        g_assert(qemu_strtou64(words[2], NULL, 0, &len) == 0);
 
         data_len = strlen(words[3]);
         if (data_len < 3) {
@@ -498,8 +499,8 @@ static void qtest_process_command(CharBackend *chr, gchar **words)
         unsigned long pattern;
 
         g_assert(words[1] && words[2] && words[3]);
-        g_assert(qemu_strtoull(words[1], NULL, 0, &addr) == 0);
-        g_assert(qemu_strtoull(words[2], NULL, 0, &len) == 0);
+        g_assert(qemu_strtou64(words[1], NULL, 0, &addr) == 0);
+        g_assert(qemu_strtou64(words[2], NULL, 0, &len) == 0);
         g_assert(qemu_strtoul(words[3], NULL, 0, &pattern) == 0);
 
         if (len) {
@@ -518,8 +519,8 @@ static void qtest_process_command(CharBackend *chr, gchar **words)
         gsize out_len;
 
         g_assert(words[1] && words[2] && words[3]);
-        g_assert(qemu_strtoull(words[1], NULL, 0, &addr) == 0);
-        g_assert(qemu_strtoull(words[2], NULL, 0, &len) == 0);
+        g_assert(qemu_strtou64(words[1], NULL, 0, &addr) == 0);
+        g_assert(qemu_strtou64(words[2], NULL, 0, &len) == 0);
 
         data_len = strlen(words[3]);
         if (data_len < 3) {
@@ -552,9 +553,9 @@ static void qtest_process_command(CharBackend *chr, gchar **words)
         unsigned long nargs, nret;
 
         g_assert(qemu_strtoul(words[2], NULL, 0, &nargs) == 0);
-        g_assert(qemu_strtoull(words[3], NULL, 0, &args) == 0);
+        g_assert(qemu_strtou64(words[3], NULL, 0, &args) == 0);
         g_assert(qemu_strtoul(words[4], NULL, 0, &nret) == 0);
-        g_assert(qemu_strtoull(words[5], NULL, 0, &ret) == 0);
+        g_assert(qemu_strtou64(words[5], NULL, 0, &ret) == 0);
         res = qtest_rtas_call(words[1], nargs, args, nret, ret);
 
         qtest_send_prefix(chr);
@@ -564,7 +565,7 @@ static void qtest_process_command(CharBackend *chr, gchar **words)
         int64_t ns;
 
         if (words[1]) {
-            g_assert(qemu_strtoll(words[1], NULL, 0, &ns) == 0);
+            g_assert(qemu_strtoi64(words[1], NULL, 0, &ns) == 0);
         } else {
             ns = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
         }
@@ -576,7 +577,7 @@ static void qtest_process_command(CharBackend *chr, gchar **words)
         int64_t ns;
 
         g_assert(words[1]);
-        g_assert(qemu_strtoll(words[1], NULL, 0, &ns) == 0);
+        g_assert(qemu_strtoi64(words[1], NULL, 0, &ns) == 0);
         qtest_clock_warp(ns);
         qtest_send_prefix(chr);
         qtest_sendf(chr, "OK %"PRIi64"\n",
diff --git a/replay/Makefile.objs b/replay/Makefile.objs
index b2afd4030a..cee6539a23 100644
--- a/replay/Makefile.objs
+++ b/replay/Makefile.objs
@@ -6,3 +6,4 @@ common-obj-y += replay-input.o
 common-obj-y += replay-char.o
 common-obj-y += replay-snapshot.o
 common-obj-y += replay-net.o
+common-obj-y += replay-audio.o
+\ No newline at end of file
diff --git a/replay/replay-audio.c b/replay/replay-audio.c
new file mode 100644
index 0000000000..3d837434d4
--- /dev/null
+++ b/replay/replay-audio.c
@@ -0,0 +1,79 @@
+/*
+ * replay-audio.c
+ *
+ * Copyright (c) 2010-2017 Institute for System Programming
+ *                         of the Russian Academy of Sciences.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "sysemu/replay.h"
+#include "replay-internal.h"
+#include "sysemu/sysemu.h"
+#include "audio/audio.h"
+
+void replay_audio_out(int *played)
+{
+    if (replay_mode == REPLAY_MODE_RECORD) {
+        replay_save_instructions();
+        replay_mutex_lock();
+        replay_put_event(EVENT_AUDIO_OUT);
+        replay_put_dword(*played);
+        replay_mutex_unlock();
+    } else if (replay_mode == REPLAY_MODE_PLAY) {
+        replay_account_executed_instructions();
+        replay_mutex_lock();
+        if (replay_next_event_is(EVENT_AUDIO_OUT)) {
+            *played = replay_get_dword();
+            replay_finish_event();
+            replay_mutex_unlock();
+        } else {
+            replay_mutex_unlock();
+            error_report("Missing audio out event in the replay log");
+            abort();
+        }
+    }
+}
+
+void replay_audio_in(int *recorded, void *samples, int *wpos, int size)
+{
+    int pos;
+    uint64_t left, right;
+    if (replay_mode == REPLAY_MODE_RECORD) {
+        replay_save_instructions();
+        replay_mutex_lock();
+        replay_put_event(EVENT_AUDIO_IN);
+        replay_put_dword(*recorded);
+        replay_put_dword(*wpos);
+        for (pos = (*wpos - *recorded + size) % size ; pos != *wpos
+             ; pos = (pos + 1) % size) {
+            audio_sample_to_uint64(samples, pos, &left, &right);
+            replay_put_qword(left);
+            replay_put_qword(right);
+        }
+        replay_mutex_unlock();
+    } else if (replay_mode == REPLAY_MODE_PLAY) {
+        replay_account_executed_instructions();
+        replay_mutex_lock();
+        if (replay_next_event_is(EVENT_AUDIO_IN)) {
+            *recorded = replay_get_dword();
+            *wpos = replay_get_dword();
+            for (pos = (*wpos - *recorded + size) % size ; pos != *wpos
+                 ; pos = (pos + 1) % size) {
+                left = replay_get_qword();
+                right = replay_get_qword();
+                audio_sample_from_uint64(samples, pos, left, right);
+            }
+            replay_finish_event();
+            replay_mutex_unlock();
+        } else {
+            replay_mutex_unlock();
+            error_report("Missing audio in event in the replay log");
+            abort();
+        }
+    }
+}
diff --git a/replay/replay-internal.h b/replay/replay-internal.h
index c26d0795f2..ed66ed803c 100644
--- a/replay/replay-internal.h
+++ b/replay/replay-internal.h
@@ -29,6 +29,10 @@ enum ReplayEvents {
     /* for character device read all event */
     EVENT_CHAR_READ_ALL,
     EVENT_CHAR_READ_ALL_ERROR,
+    /* for audio out event */
+    EVENT_AUDIO_OUT,
+    /* for audio in event */
+    EVENT_AUDIO_IN,
     /* for clock read/writes */
     /* some of greater codes are reserved for clocks */
     EVENT_CLOCK,
diff --git a/roms/openbios b/roms/openbios
-Subproject ef8a14e8afb47635c9c5f7524a52c3251827e29
+Subproject 0cd97cc904e71fbb461112f6756934ec6af890b
diff --git a/roms/seabios b/roms/seabios
-Subproject 8891697e3f7d84355420573efd98e94f1473676
+Subproject 5f4c7b13cdf9c450eb55645f4362ea58fa61b79
diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh
index 72cf1fbf0a..6a370a8669 100755
--- a/scripts/update-linux-headers.sh
+++ b/scripts/update-linux-headers.sh
@@ -75,7 +75,13 @@ for arch in $ARCHLIST; do
         continue
     fi
 
-    make -C "$linux" INSTALL_HDR_PATH="$tmpdir" SRCARCH=$arch headers_install
+    if [ "$arch" = x86 ]; then
+        arch_var=SRCARCH
+    else
+        arch_var=ARCH
+    fi
+
+    make -C "$linux" INSTALL_HDR_PATH="$tmpdir" $arch_var=$arch headers_install
 
     rm -rf "$output/linux-headers/asm-$arch"
     mkdir -p "$output/linux-headers/asm-$arch"
@@ -92,6 +98,11 @@ for arch in $ARCHLIST; do
         cp_portable "$tmpdir/include/asm/kvm_virtio.h" "$output/include/standard-headers/asm-s390/"
         cp_portable "$tmpdir/include/asm/virtio-ccw.h" "$output/include/standard-headers/asm-s390/"
     fi
+    if [ $arch = arm ]; then
+        cp "$tmpdir/include/asm/unistd-eabi.h" "$output/linux-headers/asm-arm/"
+        cp "$tmpdir/include/asm/unistd-oabi.h" "$output/linux-headers/asm-arm/"
+        cp "$tmpdir/include/asm/unistd-common.h" "$output/linux-headers/asm-arm/"
+    fi
     if [ $arch = x86 ]; then
         cp_portable "$tmpdir/include/asm/hyperv.h" "$output/include/standard-headers/asm-x86/"
         cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/"
diff --git a/scripts/vmstate-static-checker.py b/scripts/vmstate-static-checker.py
index 14a27e7f6a..bcef7ee28e 100755
--- a/scripts/vmstate-static-checker.py
+++ b/scripts/vmstate-static-checker.py
@@ -85,6 +85,11 @@ def check_fields_match(name, s_field, d_field):
         'xio3130-express-upstream-port': ['br.dev', 'parent_obj.parent_obj',
                                           'br.dev.exp.aer_log',
                                           'parent_obj.parent_obj.exp.aer_log'],
+        'spapr_pci': ['dma_liobn[0]', 'mig_liobn',
+                      'mem_win_addr', 'mig_mem_win_addr',
+                      'mem_win_size', 'mig_mem_win_size',
+                      'io_win_addr', 'mig_io_win_addr',
+                      'io_win_size', 'mig_io_win_size'],
     }
 
     if not name in changed_names:
diff --git a/slirp/mbuf.c b/slirp/mbuf.c
index 7eddc217e4..5ff24559fd 100644
--- a/slirp/mbuf.c
+++ b/slirp/mbuf.c
@@ -10,7 +10,7 @@
  * FreeBSD.  They are fixed size, determined by the MTU,
  * so that one whole packet can fit.  Mbuf's cannot be
  * chained together.  If there's more data than the mbuf
- * could hold, an external malloced buffer is pointed to
+ * could hold, an external g_malloced buffer is pointed to
  * by m_ext (and the data pointers) and M_EXT is set in
  * the flags
  */
@@ -41,26 +41,26 @@ void m_cleanup(Slirp *slirp)
     while ((struct quehead *) m != &slirp->m_usedlist) {
         next = m->m_next;
         if (m->m_flags & M_EXT) {
-            free(m->m_ext);
+            g_free(m->m_ext);
         }
-        free(m);
+        g_free(m);
         m = next;
     }
     m = (struct mbuf *) slirp->m_freelist.qh_link;
     while ((struct quehead *) m != &slirp->m_freelist) {
         next = m->m_next;
-        free(m);
+        g_free(m);
         m = next;
     }
 }
 
 /*
  * Get an mbuf from the free list, if there are none
- * malloc one
+ * allocate one
  *
  * Because fragmentation can occur if we alloc new mbufs and
  * free old mbufs, we mark all mbufs above mbuf_thresh as M_DOFREE,
- * which tells m_free to actually free() it
+ * which tells m_free to actually g_free() it
  */
 struct mbuf *
 m_get(Slirp *slirp)
@@ -71,8 +71,7 @@ m_get(Slirp *slirp)
 	DEBUG_CALL("m_get");
 
 	if (slirp->m_freelist.qh_link == &slirp->m_freelist) {
-		m = (struct mbuf *)malloc(SLIRP_MSIZE);
-		if (m == NULL) goto end_error;
+                m = g_malloc(SLIRP_MSIZE);
 		slirp->mbuf_alloced++;
 		if (slirp->mbuf_alloced > MBUF_THRESH)
 			flags = M_DOFREE;
@@ -94,7 +93,6 @@ m_get(Slirp *slirp)
         m->m_prevpkt = NULL;
         m->resolution_requested = false;
         m->expiration_date = (uint64_t)-1;
-end_error:
 	DEBUG_ARG("m = %p", m);
 	return m;
 }
@@ -112,15 +110,15 @@ m_free(struct mbuf *m)
 	   remque(m);
 
 	/* If it's M_EXT, free() it */
-	if (m->m_flags & M_EXT)
-	   free(m->m_ext);
-
+        if (m->m_flags & M_EXT) {
+                g_free(m->m_ext);
+        }
 	/*
 	 * Either free() it or put it on the free list
 	 */
 	if (m->m_flags & M_DOFREE) {
 		m->slirp->mbuf_alloced--;
-		free(m);
+                g_free(m);
 	} else if ((m->m_flags & M_FREELIST) == 0) {
 		insque(m,&m->slirp->m_freelist);
 		m->m_flags = M_FREELIST; /* Clobber other flags */
@@ -130,7 +128,7 @@ m_free(struct mbuf *m)
 
 /*
  * Copy data from one mbuf to the end of
- * the other.. if result is too big for one mbuf, malloc()
+ * the other.. if result is too big for one mbuf, allocate
  * an M_EXT data segment
  */
 void
@@ -160,12 +158,12 @@ m_inc(struct mbuf *m, int size)
 
         if (m->m_flags & M_EXT) {
 	  datasize = m->m_data - m->m_ext;
-	  m->m_ext = (char *)realloc(m->m_ext,size);
+          m->m_ext = g_realloc(m->m_ext, size);
 	  m->m_data = m->m_ext + datasize;
         } else {
 	  char *dat;
 	  datasize = m->m_data - m->m_dat;
-	  dat = (char *)malloc(size);
+          dat = g_malloc(size);
 	  memcpy(dat, m->m_dat, m->m_size);
 
 	  m->m_ext = dat;
diff --git a/slirp/socket.c b/slirp/socket.c
index 6c18971368..86927722e1 100644
--- a/slirp/socket.c
+++ b/slirp/socket.c
@@ -713,7 +713,9 @@ tcp_listen(Slirp *slirp, uint32_t haddr, u_int hport, uint32_t laddr,
 	    (listen(s,1) < 0)) {
 		int tmperrno = errno; /* Don't clobber the real reason we failed */
 
-		close(s);
+                if (s >= 0) {
+                    closesocket(s);
+                }
 		sofree(so);
 		/* Restore the real errno */
 #ifdef _WIN32
diff --git a/slirp/udp.c b/slirp/udp.c
index 93d7224792..227d779022 100644
--- a/slirp/udp.c
+++ b/slirp/udp.c
@@ -335,6 +335,10 @@ udp_listen(Slirp *slirp, uint32_t haddr, u_int hport, uint32_t laddr,
 	    return NULL;
 	}
 	so->s = qemu_socket(AF_INET,SOCK_DGRAM,0);
+        if (so->s < 0) {
+            sofree(so);
+            return NULL;
+        }
 	so->so_expire = curtime + SO_EXPIRE;
 	insque(so, &slirp->udb);
 
diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs
index aa6050f406..224f04ba69 100644
--- a/stubs/Makefile.objs
+++ b/stubs/Makefile.objs
@@ -36,3 +36,4 @@ stub-obj-y += qmp_pc_dimm_device_list.o
 stub-obj-y += target-monitor-defs.o
 stub-obj-y += target-get-monitor-def.o
 stub-obj-y += pc_madt_cpu_entry.o
+stub-obj-y += vmgenid.o
diff --git a/stubs/vmgenid.c b/stubs/vmgenid.c
new file mode 100644
index 0000000000..c64eb7a16e
--- /dev/null
+++ b/stubs/vmgenid.c
@@ -0,0 +1,9 @@
+#include "qemu/osdep.h"
+#include "qmp-commands.h"
+#include "qapi/qmp/qerror.h"
+
+GuidInfo *qmp_query_vm_generation_id(Error **errp)
+{
+    error_setg(errp, QERR_UNSUPPORTED);
+    return NULL;
+}
diff --git a/stubs/vmstate.c b/stubs/vmstate.c
index bbe158fe3b..6d52f29bb2 100644
--- a/stubs/vmstate.c
+++ b/stubs/vmstate.c
@@ -1,6 +1,7 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "migration/vmstate.h"
+#include "migration/migration.h"
 
 const VMStateDescription vmstate_dummy = {};
 
@@ -19,3 +20,8 @@ void vmstate_unregister(DeviceState *dev,
                         void *opaque)
 {
 }
+
+int check_migratable(Object *obj, Error **err)
+{
+    return 0;
+}
diff --git a/target/alpha/cpu.h b/target/alpha/cpu.h
index b08d1601d1..691ac00c0b 100644
--- a/target/alpha/cpu.h
+++ b/target/alpha/cpu.h
@@ -28,6 +28,9 @@
 
 #define CPUArchState struct CPUAlphaState
 
+/* Alpha processors have a weak memory model */
+#define TCG_GUEST_DEFAULT_MO      (0)
+
 #include "exec/cpu-defs.h"
 
 #include "fpu/softfloat.h"
diff --git a/target/arm/arm-powerctl.c b/target/arm/arm-powerctl.c
index fbb7a15daa..25207cb850 100644
--- a/target/arm/arm-powerctl.c
+++ b/target/arm/arm-powerctl.c
@@ -14,6 +14,7 @@
 #include "internals.h"
 #include "arm-powerctl.h"
 #include "qemu/log.h"
+#include "qemu/main-loop.h"
 #include "exec/exec-all.h"
 
 #ifndef DEBUG_ARM_POWERCTL
@@ -48,11 +49,93 @@ CPUState *arm_get_cpu_by_id(uint64_t id)
     return NULL;
 }
 
+struct CpuOnInfo {
+    uint64_t entry;
+    uint64_t context_id;
+    uint32_t target_el;
+    bool target_aa64;
+};
+
+
+static void arm_set_cpu_on_async_work(CPUState *target_cpu_state,
+                                      run_on_cpu_data data)
+{
+    ARMCPU *target_cpu = ARM_CPU(target_cpu_state);
+    struct CpuOnInfo *info = (struct CpuOnInfo *) data.host_ptr;
+
+    /* Initialize the cpu we are turning on */
+    cpu_reset(target_cpu_state);
+    target_cpu_state->halted = 0;
+
+    if (info->target_aa64) {
+        if ((info->target_el < 3) && arm_feature(&target_cpu->env,
+                                                 ARM_FEATURE_EL3)) {
+            /*
+             * As target mode is AArch64, we need to set lower
+             * exception level (the requested level 2) to AArch64
+             */
+            target_cpu->env.cp15.scr_el3 |= SCR_RW;
+        }
+
+        if ((info->target_el < 2) && arm_feature(&target_cpu->env,
+                                                 ARM_FEATURE_EL2)) {
+            /*
+             * As target mode is AArch64, we need to set lower
+             * exception level (the requested level 1) to AArch64
+             */
+            target_cpu->env.cp15.hcr_el2 |= HCR_RW;
+        }
+
+        target_cpu->env.pstate = aarch64_pstate_mode(info->target_el, true);
+    } else {
+        /* We are requested to boot in AArch32 mode */
+        static const uint32_t mode_for_el[] = { 0,
+                                                ARM_CPU_MODE_SVC,
+                                                ARM_CPU_MODE_HYP,
+                                                ARM_CPU_MODE_SVC };
+
+        cpsr_write(&target_cpu->env, mode_for_el[info->target_el], CPSR_M,
+                   CPSRWriteRaw);
+    }
+
+    if (info->target_el == 3) {
+        /* Processor is in secure mode */
+        target_cpu->env.cp15.scr_el3 &= ~SCR_NS;
+    } else {
+        /* Processor is not in secure mode */
+        target_cpu->env.cp15.scr_el3 |= SCR_NS;
+    }
+
+    /* We check if the started CPU is now at the correct level */
+    assert(info->target_el == arm_current_el(&target_cpu->env));
+
+    if (info->target_aa64) {
+        target_cpu->env.xregs[0] = info->context_id;
+        target_cpu->env.thumb = false;
+    } else {
+        target_cpu->env.regs[0] = info->context_id;
+        target_cpu->env.thumb = info->entry & 1;
+        info->entry &= 0xfffffffe;
+    }
+
+    /* Start the new CPU at the requested address */
+    cpu_set_pc(target_cpu_state, info->entry);
+
+    g_free(info);
+
+    /* Finally set the power status */
+    assert(qemu_mutex_iothread_locked());
+    target_cpu->power_state = PSCI_ON;
+}
+
 int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id,
                    uint32_t target_el, bool target_aa64)
 {
     CPUState *target_cpu_state;
     ARMCPU *target_cpu;
+    struct CpuOnInfo *info;
+
+    assert(qemu_mutex_iothread_locked());
 
     DPRINTF("cpu %" PRId64 " (EL %d, %s) @ 0x%" PRIx64 " with R0 = 0x%" PRIx64
             "\n", cpuid, target_el, target_aa64 ? "aarch64" : "aarch32", entry,
@@ -77,7 +160,7 @@ int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id,
     }
 
     target_cpu = ARM_CPU(target_cpu_state);
-    if (!target_cpu->powered_off) {
+    if (target_cpu->power_state == PSCI_ON) {
         qemu_log_mask(LOG_GUEST_ERROR,
                       "[ARM]%s: CPU %" PRId64 " is already on\n",
                       __func__, cpuid);
@@ -109,74 +192,54 @@ int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id,
         return QEMU_ARM_POWERCTL_INVALID_PARAM;
     }
 
-    /* Initialize the cpu we are turning on */
-    cpu_reset(target_cpu_state);
-    target_cpu->powered_off = false;
-    target_cpu_state->halted = 0;
-
-    if (target_aa64) {
-        if ((target_el < 3) && arm_feature(&target_cpu->env, ARM_FEATURE_EL3)) {
-            /*
-             * As target mode is AArch64, we need to set lower
-             * exception level (the requested level 2) to AArch64
-             */
-            target_cpu->env.cp15.scr_el3 |= SCR_RW;
-        }
-
-        if ((target_el < 2) && arm_feature(&target_cpu->env, ARM_FEATURE_EL2)) {
-            /*
-             * As target mode is AArch64, we need to set lower
-             * exception level (the requested level 1) to AArch64
-             */
-            target_cpu->env.cp15.hcr_el2 |= HCR_RW;
-        }
-
-        target_cpu->env.pstate = aarch64_pstate_mode(target_el, true);
-    } else {
-        /* We are requested to boot in AArch32 mode */
-        static uint32_t mode_for_el[] = { 0,
-                                          ARM_CPU_MODE_SVC,
-                                          ARM_CPU_MODE_HYP,
-                                          ARM_CPU_MODE_SVC };
-
-        cpsr_write(&target_cpu->env, mode_for_el[target_el], CPSR_M,
-                   CPSRWriteRaw);
-    }
-
-    if (target_el == 3) {
-        /* Processor is in secure mode */
-        target_cpu->env.cp15.scr_el3 &= ~SCR_NS;
-    } else {
-        /* Processor is not in secure mode */
-        target_cpu->env.cp15.scr_el3 |= SCR_NS;
-    }
-
-    /* We check if the started CPU is now at the correct level */
-    assert(target_el == arm_current_el(&target_cpu->env));
-
-    if (target_aa64) {
-        target_cpu->env.xregs[0] = context_id;
-        target_cpu->env.thumb = false;
-    } else {
-        target_cpu->env.regs[0] = context_id;
-        target_cpu->env.thumb = entry & 1;
-        entry &= 0xfffffffe;
+    /*
+     * If another CPU has powered the target on we are in the state
+     * ON_PENDING and additional attempts to power on the CPU should
+     * fail (see 6.6 Implementation CPU_ON/CPU_OFF races in the PSCI
+     * spec)
+     */
+    if (target_cpu->power_state == PSCI_ON_PENDING) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "[ARM]%s: CPU %" PRId64 " is already powering on\n",
+                      __func__, cpuid);
+        return QEMU_ARM_POWERCTL_ON_PENDING;
     }
 
-    /* Start the new CPU at the requested address */
-    cpu_set_pc(target_cpu_state, entry);
+    /* To avoid racing with a CPU we are just kicking off we do the
+     * final bit of preparation for the work in the target CPUs
+     * context.
+     */
+    info = g_new(struct CpuOnInfo, 1);
+    info->entry = entry;
+    info->context_id = context_id;
+    info->target_el = target_el;
+    info->target_aa64 = target_aa64;
 
-    qemu_cpu_kick(target_cpu_state);
+    async_run_on_cpu(target_cpu_state, arm_set_cpu_on_async_work,
+                     RUN_ON_CPU_HOST_PTR(info));
 
     /* We are good to go */
     return QEMU_ARM_POWERCTL_RET_SUCCESS;
 }
 
+static void arm_set_cpu_off_async_work(CPUState *target_cpu_state,
+                                       run_on_cpu_data data)
+{
+    ARMCPU *target_cpu = ARM_CPU(target_cpu_state);
+
+    assert(qemu_mutex_iothread_locked());
+    target_cpu->power_state = PSCI_OFF;
+    target_cpu_state->halted = 1;
+    target_cpu_state->exception_index = EXCP_HLT;
+}
+
 int arm_set_cpu_off(uint64_t cpuid)
 {
     CPUState *target_cpu_state;
     ARMCPU *target_cpu;
 
+    assert(qemu_mutex_iothread_locked());
+
     DPRINTF("cpu %" PRId64 "\n", cpuid);
 
     /* change to the cpu we are powering up */
@@ -185,27 +248,34 @@ int arm_set_cpu_off(uint64_t cpuid)
         return QEMU_ARM_POWERCTL_INVALID_PARAM;
     }
     target_cpu = ARM_CPU(target_cpu_state);
-    if (target_cpu->powered_off) {
+    if (target_cpu->power_state == PSCI_OFF) {
         qemu_log_mask(LOG_GUEST_ERROR,
                       "[ARM]%s: CPU %" PRId64 " is already off\n",
                       __func__, cpuid);
         return QEMU_ARM_POWERCTL_IS_OFF;
     }
 
-    target_cpu->powered_off = true;
-    target_cpu_state->halted = 1;
-    target_cpu_state->exception_index = EXCP_HLT;
-    cpu_loop_exit(target_cpu_state);
-    /* notreached */
+    /* Queue work to run under the target vCPUs context */
+    async_run_on_cpu(target_cpu_state, arm_set_cpu_off_async_work,
+                     RUN_ON_CPU_NULL);
 
     return QEMU_ARM_POWERCTL_RET_SUCCESS;
 }
 
+static void arm_reset_cpu_async_work(CPUState *target_cpu_state,
+                                     run_on_cpu_data data)
+{
+    /* Reset the cpu */
+    cpu_reset(target_cpu_state);
+}
+
 int arm_reset_cpu(uint64_t cpuid)
 {
     CPUState *target_cpu_state;
     ARMCPU *target_cpu;
 
+    assert(qemu_mutex_iothread_locked());
+
     DPRINTF("cpu %" PRId64 "\n", cpuid);
 
     /* change to the cpu we are resetting */
@@ -214,15 +284,17 @@ int arm_reset_cpu(uint64_t cpuid)
         return QEMU_ARM_POWERCTL_INVALID_PARAM;
     }
     target_cpu = ARM_CPU(target_cpu_state);
-    if (target_cpu->powered_off) {
+
+    if (target_cpu->power_state == PSCI_OFF) {
         qemu_log_mask(LOG_GUEST_ERROR,
                       "[ARM]%s: CPU %" PRId64 " is off\n",
                       __func__, cpuid);
         return QEMU_ARM_POWERCTL_IS_OFF;
     }
 
-    /* Reset the cpu */
-    cpu_reset(target_cpu_state);
+    /* Queue work to run under the target vCPUs context */
+    async_run_on_cpu(target_cpu_state, arm_reset_cpu_async_work,
+                     RUN_ON_CPU_NULL);
 
     return QEMU_ARM_POWERCTL_RET_SUCCESS;
 }
diff --git a/target/arm/arm-powerctl.h b/target/arm/arm-powerctl.h
index 98ee04989b..04353923c0 100644
--- a/target/arm/arm-powerctl.h
+++ b/target/arm/arm-powerctl.h
@@ -17,6 +17,7 @@
 #define QEMU_ARM_POWERCTL_INVALID_PARAM QEMU_PSCI_RET_INVALID_PARAMS
 #define QEMU_ARM_POWERCTL_ALREADY_ON QEMU_PSCI_RET_ALREADY_ON
 #define QEMU_ARM_POWERCTL_IS_OFF QEMU_PSCI_RET_DENIED
+#define QEMU_ARM_POWERCTL_ON_PENDING QEMU_PSCI_RET_ON_PENDING
 
 /*
  * arm_get_cpu_by_id:
@@ -43,6 +44,7 @@ CPUState *arm_get_cpu_by_id(uint64_t cpuid);
  * Returns: QEMU_ARM_POWERCTL_RET_SUCCESS on success.
  * QEMU_ARM_POWERCTL_INVALID_PARAM if bad parameters are provided.
  * QEMU_ARM_POWERCTL_ALREADY_ON if the CPU was already started.
+ * QEMU_ARM_POWERCTL_ON_PENDING if the CPU is still powering up
  */
 int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id,
                    uint32_t target_el, bool target_aa64);
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 4a069f6985..04b062cb7e 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -45,7 +45,7 @@ static bool arm_cpu_has_work(CPUState *cs)
 {
     ARMCPU *cpu = ARM_CPU(cs);
 
-    return !cpu->powered_off
+    return (cpu->power_state != PSCI_OFF)
         && cs->interrupt_request &
         (CPU_INTERRUPT_FIQ | CPU_INTERRUPT_HARD
          | CPU_INTERRUPT_VFIQ | CPU_INTERRUPT_VIRQ
@@ -132,7 +132,7 @@ static void arm_cpu_reset(CPUState *s)
     env->vfp.xregs[ARM_VFP_MVFR1] = cpu->mvfr1;
     env->vfp.xregs[ARM_VFP_MVFR2] = cpu->mvfr2;
 
-    cpu->powered_off = cpu->start_powered_off;
+    cpu->power_state = cpu->start_powered_off ? PSCI_OFF : PSCI_ON;
     s->halted = cpu->start_powered_off;
 
     if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
@@ -338,13 +338,6 @@ static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
     CPUARMState *env = &cpu->env;
     bool ret = false;
 
-
-    if (interrupt_request & CPU_INTERRUPT_FIQ
-        && !(env->daif & PSTATE_F)) {
-        cs->exception_index = EXCP_FIQ;
-        cc->do_interrupt(cs);
-        ret = true;
-    }
     /* ARMv7-M interrupt return works by loading a magic value
      * into the PC.  On real hardware the load causes the
      * return to occur.  The qemu implementation performs the
@@ -354,9 +347,16 @@ static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
      * the stack if an interrupt occurred at the wrong time.
      * We avoid this by disabling interrupts when
      * pc contains a magic address.
+     *
+     * ARMv7-M interrupt masking works differently than -A or -R.
+     * There is no FIQ/IRQ distinction. Instead of I and F bits
+     * masking FIQ and IRQ interrupts, an exception is taken only
+     * if it is higher priority than the current execution priority
+     * (which depends on state like BASEPRI, FAULTMASK and the
+     * currently active exception).
      */
     if (interrupt_request & CPU_INTERRUPT_HARD
-        && !(env->daif & PSTATE_I)
+        && (armv7m_nvic_can_take_pending_exception(env->nvic))
         && (env->regs[15] < 0xfffffff0)) {
         cs->exception_index = EXCP_IRQ;
         cc->do_interrupt(cs);
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 0956a54e89..25ceaabb5d 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -30,6 +30,9 @@
 #  define TARGET_LONG_BITS 32
 #endif
 
+/* ARM processors have a weak memory model */
+#define TCG_GUEST_DEFAULT_MO      (0)
+
 #define CPUArchState struct CPUARMState
 
 #include "qemu-common.h"
@@ -54,6 +57,7 @@
 #define EXCP_VFIQ           15
 #define EXCP_SEMIHOST       16   /* semihosting call */
 #define EXCP_NOCP           17   /* v7M NOCP UsageFault */
+#define EXCP_INVSTATE       18   /* v7M INVSTATE UsageFault */
 
 #define ARMV7M_EXCP_RESET   1
 #define ARMV7M_EXCP_NMI     2
@@ -517,6 +521,8 @@ typedef struct CPUARMState {
 
     void *nvic;
     const struct arm_boot_info *boot_info;
+    /* Store GICv3CPUState to access from this struct */
+    void *gicv3state;
 } CPUARMState;
 
 /**
@@ -526,6 +532,15 @@ typedef struct CPUARMState {
  */
 typedef void ARMELChangeHook(ARMCPU *cpu, void *opaque);
 
+
+/* These values map onto the return values for
+ * QEMU_PSCI_0_2_FN_AFFINITY_INFO */
+typedef enum ARMPSCIState {
+    PSCI_OFF = 0,
+    PSCI_ON = 1,
+    PSCI_ON_PENDING = 2
+} ARMPSCIState;
+
 /**
  * ARMCPU:
  * @env: #CPUARMState
@@ -582,8 +597,10 @@ struct ARMCPU {
 
     /* Should CPU start in PSCI powered-off state? */
     bool start_powered_off;
-    /* CPU currently in PSCI powered-off state */
-    bool powered_off;
+
+    /* Current power state, access guarded by BQL */
+    ARMPSCIState power_state;
+
     /* CPU has virtualization extension */
     bool has_el2;
     /* CPU has security extension */
@@ -1342,9 +1359,27 @@ uint32_t arm_phys_excp_target_el(CPUState *cs, uint32_t excp_idx,
                                  uint32_t cur_el, bool secure);
 
 /* Interface between CPU and Interrupt controller.  */
+#ifndef CONFIG_USER_ONLY
+bool armv7m_nvic_can_take_pending_exception(void *opaque);
+#else
+static inline bool armv7m_nvic_can_take_pending_exception(void *opaque)
+{
+    return true;
+}
+#endif
 void armv7m_nvic_set_pending(void *opaque, int irq);
-int armv7m_nvic_acknowledge_irq(void *opaque);
-void armv7m_nvic_complete_irq(void *opaque, int irq);
+void armv7m_nvic_acknowledge_irq(void *opaque);
+/**
+ * armv7m_nvic_complete_irq: complete specified interrupt or exception
+ * @opaque: the NVIC
+ * @irq: the exception number to complete
+ *
+ * Returns: -1 if the irq was not active
+ *           1 if completing this irq brought us back to base (no active irqs)
+ *           0 if there is still an irq active after this one was completed
+ * (Ignoring -1, this is the same as the RETTOBASE value before completion.)
+ */
+int armv7m_nvic_complete_irq(void *opaque, int irq);
 
 /* Interface for defining coprocessor registers.
  * Registers are defined in tables of arm_cp_reginfo structs
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 47250bcf16..3f4211b572 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -536,41 +536,33 @@ static void tlbimvaa_write(CPUARMState *env, const ARMCPRegInfo *ri,
 static void tlbiall_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                              uint64_t value)
 {
-    CPUState *other_cs;
+    CPUState *cs = ENV_GET_CPU(env);
 
-    CPU_FOREACH(other_cs) {
-        tlb_flush(other_cs);
-    }
+    tlb_flush_all_cpus_synced(cs);
 }
 
 static void tlbiasid_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                              uint64_t value)
 {
-    CPUState *other_cs;
+    CPUState *cs = ENV_GET_CPU(env);
 
-    CPU_FOREACH(other_cs) {
-        tlb_flush(other_cs);
-    }
+    tlb_flush_all_cpus_synced(cs);
 }
 
 static void tlbimva_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                              uint64_t value)
 {
-    CPUState *other_cs;
+    CPUState *cs = ENV_GET_CPU(env);
 
-    CPU_FOREACH(other_cs) {
-        tlb_flush_page(other_cs, value & TARGET_PAGE_MASK);
-    }
+    tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK);
 }
 
 static void tlbimvaa_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                              uint64_t value)
 {
-    CPUState *other_cs;
+    CPUState *cs = ENV_GET_CPU(env);
 
-    CPU_FOREACH(other_cs) {
-        tlb_flush_page(other_cs, value & TARGET_PAGE_MASK);
-    }
+    tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK);
 }
 
 static void tlbiall_nsnh_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -578,19 +570,21 @@ static void tlbiall_nsnh_write(CPUARMState *env, const ARMCPRegInfo *ri,
 {
     CPUState *cs = ENV_GET_CPU(env);
 
-    tlb_flush_by_mmuidx(cs, ARMMMUIdx_S12NSE1, ARMMMUIdx_S12NSE0,
-                        ARMMMUIdx_S2NS, -1);
+    tlb_flush_by_mmuidx(cs,
+                        (1 << ARMMMUIdx_S12NSE1) |
+                        (1 << ARMMMUIdx_S12NSE0) |
+                        (1 << ARMMMUIdx_S2NS));
 }
 
 static void tlbiall_nsnh_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                   uint64_t value)
 {
-    CPUState *other_cs;
+    CPUState *cs = ENV_GET_CPU(env);
 
-    CPU_FOREACH(other_cs) {
-        tlb_flush_by_mmuidx(other_cs, ARMMMUIdx_S12NSE1,
-                            ARMMMUIdx_S12NSE0, ARMMMUIdx_S2NS, -1);
-    }
+    tlb_flush_by_mmuidx_all_cpus_synced(cs,
+                                        (1 << ARMMMUIdx_S12NSE1) |
+                                        (1 << ARMMMUIdx_S12NSE0) |
+                                        (1 << ARMMMUIdx_S2NS));
 }
 
 static void tlbiipas2_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -611,13 +605,13 @@ static void tlbiipas2_write(CPUARMState *env, const ARMCPRegInfo *ri,
 
     pageaddr = sextract64(value << 12, 0, 40);
 
-    tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdx_S2NS, -1);
+    tlb_flush_page_by_mmuidx(cs, pageaddr, (1 << ARMMMUIdx_S2NS));
 }
 
 static void tlbiipas2_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                uint64_t value)
 {
-    CPUState *other_cs;
+    CPUState *cs = ENV_GET_CPU(env);
     uint64_t pageaddr;
 
     if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) {
@@ -626,9 +620,8 @@ static void tlbiipas2_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
 
     pageaddr = sextract64(value << 12, 0, 40);
 
-    CPU_FOREACH(other_cs) {
-        tlb_flush_page_by_mmuidx(other_cs, pageaddr, ARMMMUIdx_S2NS, -1);
-    }
+    tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
+                                             (1 << ARMMMUIdx_S2NS));
 }
 
 static void tlbiall_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -636,17 +629,15 @@ static void tlbiall_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
 {
     CPUState *cs = ENV_GET_CPU(env);
 
-    tlb_flush_by_mmuidx(cs, ARMMMUIdx_S1E2, -1);
+    tlb_flush_by_mmuidx(cs, (1 << ARMMMUIdx_S1E2));
 }
 
 static void tlbiall_hyp_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                  uint64_t value)
 {
-    CPUState *other_cs;
+    CPUState *cs = ENV_GET_CPU(env);
 
-    CPU_FOREACH(other_cs) {
-        tlb_flush_by_mmuidx(other_cs, ARMMMUIdx_S1E2, -1);
-    }
+    tlb_flush_by_mmuidx_all_cpus_synced(cs, (1 << ARMMMUIdx_S1E2));
 }
 
 static void tlbimva_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -655,18 +646,17 @@ static void tlbimva_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
     CPUState *cs = ENV_GET_CPU(env);
     uint64_t pageaddr = value & ~MAKE_64BIT_MASK(0, 12);
 
-    tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdx_S1E2, -1);
+    tlb_flush_page_by_mmuidx(cs, pageaddr, (1 << ARMMMUIdx_S1E2));
 }
 
 static void tlbimva_hyp_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                  uint64_t value)
 {
-    CPUState *other_cs;
+    CPUState *cs = ENV_GET_CPU(env);
     uint64_t pageaddr = value & ~MAKE_64BIT_MASK(0, 12);
 
-    CPU_FOREACH(other_cs) {
-        tlb_flush_page_by_mmuidx(other_cs, pageaddr, ARMMMUIdx_S1E2, -1);
-    }
+    tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
+                                             (1 << ARMMMUIdx_S1E2));
 }
 
 static const ARMCPRegInfo cp_reginfo[] = {
@@ -2542,8 +2532,10 @@ static void vttbr_write(CPUARMState *env, const ARMCPRegInfo *ri,
 
     /* Accesses to VTTBR may change the VMID so we must flush the TLB.  */
     if (raw_read(env, ri) != value) {
-        tlb_flush_by_mmuidx(cs, ARMMMUIdx_S12NSE1, ARMMMUIdx_S12NSE0,
-                            ARMMMUIdx_S2NS, -1);
+        tlb_flush_by_mmuidx(cs,
+                            (1 << ARMMMUIdx_S12NSE1) |
+                            (1 << ARMMMUIdx_S12NSE0) |
+                            (1 << ARMMMUIdx_S2NS));
         raw_write(env, ri, value);
     }
 }
@@ -2898,29 +2890,33 @@ static CPAccessResult aa64_cacheop_access(CPUARMState *env,
 static void tlbi_aa64_vmalle1_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                     uint64_t value)
 {
-    ARMCPU *cpu = arm_env_get_cpu(env);
-    CPUState *cs = CPU(cpu);
+    CPUState *cs = ENV_GET_CPU(env);
 
     if (arm_is_secure_below_el3(env)) {
-        tlb_flush_by_mmuidx(cs, ARMMMUIdx_S1SE1, ARMMMUIdx_S1SE0, -1);
+        tlb_flush_by_mmuidx(cs,
+                            (1 << ARMMMUIdx_S1SE1) |
+                            (1 << ARMMMUIdx_S1SE0));
     } else {
-        tlb_flush_by_mmuidx(cs, ARMMMUIdx_S12NSE1, ARMMMUIdx_S12NSE0, -1);
+        tlb_flush_by_mmuidx(cs,
+                            (1 << ARMMMUIdx_S12NSE1) |
+                            (1 << ARMMMUIdx_S12NSE0));
     }
 }
 
 static void tlbi_aa64_vmalle1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                       uint64_t value)
 {
+    CPUState *cs = ENV_GET_CPU(env);
     bool sec = arm_is_secure_below_el3(env);
-    CPUState *other_cs;
 
-    CPU_FOREACH(other_cs) {
-        if (sec) {
-            tlb_flush_by_mmuidx(other_cs, ARMMMUIdx_S1SE1, ARMMMUIdx_S1SE0, -1);
-        } else {
-            tlb_flush_by_mmuidx(other_cs, ARMMMUIdx_S12NSE1,
-                                ARMMMUIdx_S12NSE0, -1);
-        }
+    if (sec) {
+        tlb_flush_by_mmuidx_all_cpus_synced(cs,
+                                            (1 << ARMMMUIdx_S1SE1) |
+                                            (1 << ARMMMUIdx_S1SE0));
+    } else {
+        tlb_flush_by_mmuidx_all_cpus_synced(cs,
+                                            (1 << ARMMMUIdx_S12NSE1) |
+                                            (1 << ARMMMUIdx_S12NSE0));
     }
 }
 
@@ -2935,13 +2931,19 @@ static void tlbi_aa64_alle1_write(CPUARMState *env, const ARMCPRegInfo *ri,
     CPUState *cs = CPU(cpu);
 
     if (arm_is_secure_below_el3(env)) {
-        tlb_flush_by_mmuidx(cs, ARMMMUIdx_S1SE1, ARMMMUIdx_S1SE0, -1);
+        tlb_flush_by_mmuidx(cs,
+                            (1 << ARMMMUIdx_S1SE1) |
+                            (1 << ARMMMUIdx_S1SE0));
     } else {
         if (arm_feature(env, ARM_FEATURE_EL2)) {
-            tlb_flush_by_mmuidx(cs, ARMMMUIdx_S12NSE1, ARMMMUIdx_S12NSE0,
-                                ARMMMUIdx_S2NS, -1);
+            tlb_flush_by_mmuidx(cs,
+                                (1 << ARMMMUIdx_S12NSE1) |
+                                (1 << ARMMMUIdx_S12NSE0) |
+                                (1 << ARMMMUIdx_S2NS));
         } else {
-            tlb_flush_by_mmuidx(cs, ARMMMUIdx_S12NSE1, ARMMMUIdx_S12NSE0, -1);
+            tlb_flush_by_mmuidx(cs,
+                                (1 << ARMMMUIdx_S12NSE1) |
+                                (1 << ARMMMUIdx_S12NSE0));
         }
     }
 }
@@ -2952,7 +2954,7 @@ static void tlbi_aa64_alle2_write(CPUARMState *env, const ARMCPRegInfo *ri,
     ARMCPU *cpu = arm_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
 
-    tlb_flush_by_mmuidx(cs, ARMMMUIdx_S1E2, -1);
+    tlb_flush_by_mmuidx(cs, (1 << ARMMMUIdx_S1E2));
 }
 
 static void tlbi_aa64_alle3_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -2961,7 +2963,7 @@ static void tlbi_aa64_alle3_write(CPUARMState *env, const ARMCPRegInfo *ri,
     ARMCPU *cpu = arm_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
 
-    tlb_flush_by_mmuidx(cs, ARMMMUIdx_S1E3, -1);
+    tlb_flush_by_mmuidx(cs, (1 << ARMMMUIdx_S1E3));
 }
 
 static void tlbi_aa64_alle1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -2971,41 +2973,40 @@ static void tlbi_aa64_alle1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
      * stage 2 translations, whereas most other scopes only invalidate
      * stage 1 translations.
      */
+    CPUState *cs = ENV_GET_CPU(env);
     bool sec = arm_is_secure_below_el3(env);
     bool has_el2 = arm_feature(env, ARM_FEATURE_EL2);
-    CPUState *other_cs;
-
-    CPU_FOREACH(other_cs) {
-        if (sec) {
-            tlb_flush_by_mmuidx(other_cs, ARMMMUIdx_S1SE1, ARMMMUIdx_S1SE0, -1);
-        } else if (has_el2) {
-            tlb_flush_by_mmuidx(other_cs, ARMMMUIdx_S12NSE1,
-                                ARMMMUIdx_S12NSE0, ARMMMUIdx_S2NS, -1);
-        } else {
-            tlb_flush_by_mmuidx(other_cs, ARMMMUIdx_S12NSE1,
-                                ARMMMUIdx_S12NSE0, -1);
-        }
+
+    if (sec) {
+        tlb_flush_by_mmuidx_all_cpus_synced(cs,
+                                            (1 << ARMMMUIdx_S1SE1) |
+                                            (1 << ARMMMUIdx_S1SE0));
+    } else if (has_el2) {
+        tlb_flush_by_mmuidx_all_cpus_synced(cs,
+                                            (1 << ARMMMUIdx_S12NSE1) |
+                                            (1 << ARMMMUIdx_S12NSE0) |
+                                            (1 << ARMMMUIdx_S2NS));
+    } else {
+          tlb_flush_by_mmuidx_all_cpus_synced(cs,
+                                              (1 << ARMMMUIdx_S12NSE1) |
+                                              (1 << ARMMMUIdx_S12NSE0));
     }
 }
 
 static void tlbi_aa64_alle2is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                     uint64_t value)
 {
-    CPUState *other_cs;
+    CPUState *cs = ENV_GET_CPU(env);
 
-    CPU_FOREACH(other_cs) {
-        tlb_flush_by_mmuidx(other_cs, ARMMMUIdx_S1E2, -1);
-    }
+    tlb_flush_by_mmuidx_all_cpus_synced(cs, (1 << ARMMMUIdx_S1E2));
 }
 
 static void tlbi_aa64_alle3is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                     uint64_t value)
 {
-    CPUState *other_cs;
+    CPUState *cs = ENV_GET_CPU(env);
 
-    CPU_FOREACH(other_cs) {
-        tlb_flush_by_mmuidx(other_cs, ARMMMUIdx_S1E3, -1);
-    }
+    tlb_flush_by_mmuidx_all_cpus_synced(cs, (1 << ARMMMUIdx_S1E3));
 }
 
 static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -3021,11 +3022,13 @@ static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri,
     uint64_t pageaddr = sextract64(value << 12, 0, 56);
 
     if (arm_is_secure_below_el3(env)) {
-        tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdx_S1SE1,
-                                 ARMMMUIdx_S1SE0, -1);
+        tlb_flush_page_by_mmuidx(cs, pageaddr,
+                                 (1 << ARMMMUIdx_S1SE1) |
+                                 (1 << ARMMMUIdx_S1SE0));
     } else {
-        tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdx_S12NSE1,
-                                 ARMMMUIdx_S12NSE0, -1);
+        tlb_flush_page_by_mmuidx(cs, pageaddr,
+                                 (1 << ARMMMUIdx_S12NSE1) |
+                                 (1 << ARMMMUIdx_S12NSE0));
     }
 }
 
@@ -3040,7 +3043,7 @@ static void tlbi_aa64_vae2_write(CPUARMState *env, const ARMCPRegInfo *ri,
     CPUState *cs = CPU(cpu);
     uint64_t pageaddr = sextract64(value << 12, 0, 56);
 
-    tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdx_S1E2, -1);
+    tlb_flush_page_by_mmuidx(cs, pageaddr, (1 << ARMMMUIdx_S1E2));
 }
 
 static void tlbi_aa64_vae3_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -3054,47 +3057,46 @@ static void tlbi_aa64_vae3_write(CPUARMState *env, const ARMCPRegInfo *ri,
     CPUState *cs = CPU(cpu);
     uint64_t pageaddr = sextract64(value << 12, 0, 56);
 
-    tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdx_S1E3, -1);
+    tlb_flush_page_by_mmuidx(cs, pageaddr, (1 << ARMMMUIdx_S1E3));
 }
 
 static void tlbi_aa64_vae1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                    uint64_t value)
 {
+    ARMCPU *cpu = arm_env_get_cpu(env);
+    CPUState *cs = CPU(cpu);
     bool sec = arm_is_secure_below_el3(env);
-    CPUState *other_cs;
     uint64_t pageaddr = sextract64(value << 12, 0, 56);
 
-    CPU_FOREACH(other_cs) {
-        if (sec) {
-            tlb_flush_page_by_mmuidx(other_cs, pageaddr, ARMMMUIdx_S1SE1,
-                                     ARMMMUIdx_S1SE0, -1);
-        } else {
-            tlb_flush_page_by_mmuidx(other_cs, pageaddr, ARMMMUIdx_S12NSE1,
-                                     ARMMMUIdx_S12NSE0, -1);
-        }
+    if (sec) {
+        tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
+                                                 (1 << ARMMMUIdx_S1SE1) |
+                                                 (1 << ARMMMUIdx_S1SE0));
+    } else {
+        tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
+                                                 (1 << ARMMMUIdx_S12NSE1) |
+                                                 (1 << ARMMMUIdx_S12NSE0));
     }
 }
 
 static void tlbi_aa64_vae2is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                    uint64_t value)
 {
-    CPUState *other_cs;
+    CPUState *cs = ENV_GET_CPU(env);
     uint64_t pageaddr = sextract64(value << 12, 0, 56);
 
-    CPU_FOREACH(other_cs) {
-        tlb_flush_page_by_mmuidx(other_cs, pageaddr, ARMMMUIdx_S1E2, -1);
-    }
+    tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
+                                             (1 << ARMMMUIdx_S1E2));
 }
 
 static void tlbi_aa64_vae3is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                    uint64_t value)
 {
-    CPUState *other_cs;
+    CPUState *cs = ENV_GET_CPU(env);
     uint64_t pageaddr = sextract64(value << 12, 0, 56);
 
-    CPU_FOREACH(other_cs) {
-        tlb_flush_page_by_mmuidx(other_cs, pageaddr, ARMMMUIdx_S1E3, -1);
-    }
+    tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
+                                             (1 << ARMMMUIdx_S1E3));
 }
 
 static void tlbi_aa64_ipas2e1_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -3116,13 +3118,13 @@ static void tlbi_aa64_ipas2e1_write(CPUARMState *env, const ARMCPRegInfo *ri,
 
     pageaddr = sextract64(value << 12, 0, 48);
 
-    tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdx_S2NS, -1);
+    tlb_flush_page_by_mmuidx(cs, pageaddr, (1 << ARMMMUIdx_S2NS));
 }
 
 static void tlbi_aa64_ipas2e1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                       uint64_t value)
 {
-    CPUState *other_cs;
+    CPUState *cs = ENV_GET_CPU(env);
     uint64_t pageaddr;
 
     if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) {
@@ -3131,9 +3133,8 @@ static void tlbi_aa64_ipas2e1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
 
     pageaddr = sextract64(value << 12, 0, 48);
 
-    CPU_FOREACH(other_cs) {
-        tlb_flush_page_by_mmuidx(other_cs, pageaddr, ARMMMUIdx_S2NS, -1);
-    }
+    tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
+                                             (1 << ARMMMUIdx_S2NS));
 }
 
 static CPAccessResult aa64_zva_access(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -6001,22 +6002,165 @@ static void switch_v7m_sp(CPUARMState *env, bool new_spsel)
     }
 }
 
-static void do_v7m_exception_exit(CPUARMState *env)
+static uint32_t arm_v7m_load_vector(ARMCPU *cpu)
+{
+    CPUState *cs = CPU(cpu);
+    CPUARMState *env = &cpu->env;
+    MemTxResult result;
+    hwaddr vec = env->v7m.vecbase + env->v7m.exception * 4;
+    uint32_t addr;
+
+    addr = address_space_ldl(cs->as, vec,
+                             MEMTXATTRS_UNSPECIFIED, &result);
+    if (result != MEMTX_OK) {
+        /* Architecturally this should cause a HardFault setting HSFR.VECTTBL,
+         * which would then be immediately followed by our failing to load
+         * the entry vector for that HardFault, which is a Lockup case.
+         * Since we don't model Lockup, we just report this guest error
+         * via cpu_abort().
+         */
+        cpu_abort(cs, "Failed to read from exception vector table "
+                  "entry %08x\n", (unsigned)vec);
+    }
+    return addr;
+}
+
+static void v7m_exception_taken(ARMCPU *cpu, uint32_t lr)
+{
+    /* Do the "take the exception" parts of exception entry,
+     * but not the pushing of state to the stack. This is
+     * similar to the pseudocode ExceptionTaken() function.
+     */
+    CPUARMState *env = &cpu->env;
+    uint32_t addr;
+
+    armv7m_nvic_acknowledge_irq(env->nvic);
+    switch_v7m_sp(env, 0);
+    /* Clear IT bits */
+    env->condexec_bits = 0;
+    env->regs[14] = lr;
+    addr = arm_v7m_load_vector(cpu);
+    env->regs[15] = addr & 0xfffffffe;
+    env->thumb = addr & 1;
+}
+
+static void v7m_push_stack(ARMCPU *cpu)
+{
+    /* Do the "set up stack frame" part of exception entry,
+     * similar to pseudocode PushStack().
+     */
+    CPUARMState *env = &cpu->env;
+    uint32_t xpsr = xpsr_read(env);
+
+    /* Align stack pointer if the guest wants that */
+    if ((env->regs[13] & 4) && (env->v7m.ccr & R_V7M_CCR_STKALIGN_MASK)) {
+        env->regs[13] -= 4;
+        xpsr |= 0x200;
+    }
+    /* Switch to the handler mode.  */
+    v7m_push(env, xpsr);
+    v7m_push(env, env->regs[15]);
+    v7m_push(env, env->regs[14]);
+    v7m_push(env, env->regs[12]);
+    v7m_push(env, env->regs[3]);
+    v7m_push(env, env->regs[2]);
+    v7m_push(env, env->regs[1]);
+    v7m_push(env, env->regs[0]);
+}
+
+static void do_v7m_exception_exit(ARMCPU *cpu)
 {
+    CPUARMState *env = &cpu->env;
     uint32_t type;
     uint32_t xpsr;
-
+    bool ufault = false;
+    bool return_to_sp_process = false;
+    bool return_to_handler = false;
+    bool rettobase = false;
+
+    /* We can only get here from an EXCP_EXCEPTION_EXIT, and
+     * arm_v7m_do_unassigned_access() enforces the architectural rule
+     * that jumps to magic addresses don't have magic behaviour unless
+     * we're in Handler mode (compare pseudocode BXWritePC()).
+     */
+    assert(env->v7m.exception != 0);
+
+    /* In the spec pseudocode ExceptionReturn() is called directly
+     * from BXWritePC() and gets the full target PC value including
+     * bit zero. In QEMU's implementation we treat it as a normal
+     * jump-to-register (which is then caught later on), and so split
+     * the target value up between env->regs[15] and env->thumb in
+     * gen_bx(). Reconstitute it.
+     */
     type = env->regs[15];
+    if (env->thumb) {
+        type |= 1;
+    }
+
+    qemu_log_mask(CPU_LOG_INT, "Exception return: magic PC %" PRIx32
+                  " previous exception %d\n",
+                  type, env->v7m.exception);
+
+    if (extract32(type, 5, 23) != extract32(-1, 5, 23)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "M profile: zero high bits in exception "
+                      "exit PC value 0x%" PRIx32 " are UNPREDICTABLE\n", type);
+    }
+
     if (env->v7m.exception != ARMV7M_EXCP_NMI) {
         /* Auto-clear FAULTMASK on return from other than NMI */
         env->daif &= ~PSTATE_F;
     }
-    if (env->v7m.exception != 0) {
-        armv7m_nvic_complete_irq(env->nvic, env->v7m.exception);
+
+    switch (armv7m_nvic_complete_irq(env->nvic, env->v7m.exception)) {
+    case -1:
+        /* attempt to exit an exception that isn't active */
+        ufault = true;
+        break;
+    case 0:
+        /* still an irq active now */
+        break;
+    case 1:
+        /* we returned to base exception level, no nesting.
+         * (In the pseudocode this is written using "NestedActivation != 1"
+         * where we have 'rettobase == false'.)
+         */
+        rettobase = true;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    switch (type & 0xf) {
+    case 1: /* Return to Handler */
+        return_to_handler = true;
+        break;
+    case 13: /* Return to Thread using Process stack */
+        return_to_sp_process = true;
+        /* fall through */
+    case 9: /* Return to Thread using Main stack */
+        if (!rettobase &&
+            !(env->v7m.ccr & R_V7M_CCR_NONBASETHRDENA_MASK)) {
+            ufault = true;
+        }
+        break;
+    default:
+        ufault = true;
+    }
+
+    if (ufault) {
+        /* Bad exception return: instead of popping the exception
+         * stack, directly take a usage fault on the current stack.
+         */
+        env->v7m.cfsr |= R_V7M_CFSR_INVPC_MASK;
+        armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE);
+        v7m_exception_taken(cpu, type | 0xf0000000);
+        qemu_log_mask(CPU_LOG_INT, "...taking UsageFault on existing "
+                      "stackframe: failed exception return integrity check\n");
+        return;
     }
 
     /* Switch to the target stack.  */
-    switch_v7m_sp(env, (type & 4) != 0);
+    switch_v7m_sp(env, return_to_sp_process);
     /* Pop registers.  */
     env->regs[0] = v7m_pop(env);
     env->regs[1] = v7m_pop(env);
@@ -6040,11 +6184,24 @@ static void do_v7m_exception_exit(CPUARMState *env)
     /* Undo stack alignment.  */
     if (xpsr & 0x200)
         env->regs[13] |= 4;
-    /* ??? The exception return type specifies Thread/Handler mode.  However
-       this is also implied by the xPSR value. Not sure what to do
-       if there is a mismatch.  */
-    /* ??? Likewise for mismatches between the CONTROL register and the stack
-       pointer.  */
+
+    /* The restored xPSR exception field will be zero if we're
+     * resuming in Thread mode. If that doesn't match what the
+     * exception return type specified then this is a UsageFault.
+     */
+    if (return_to_handler == (env->v7m.exception == 0)) {
+        /* Take an INVPC UsageFault by pushing the stack again. */
+        armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE);
+        env->v7m.cfsr |= R_V7M_CFSR_INVPC_MASK;
+        v7m_push_stack(cpu);
+        v7m_exception_taken(cpu, type | 0xf0000000);
+        qemu_log_mask(CPU_LOG_INT, "...taking UsageFault on new stackframe: "
+                      "failed exception return integrity check\n");
+        return;
+    }
+
+    /* Otherwise, we have a successful exception exit. */
+    qemu_log_mask(CPU_LOG_INT, "...successful exception return\n");
 }
 
 static void arm_log_exception(int idx)
@@ -6062,37 +6219,11 @@ static void arm_log_exception(int idx)
     }
 }
 
-static uint32_t arm_v7m_load_vector(ARMCPU *cpu)
-
-{
-    CPUState *cs = CPU(cpu);
-    CPUARMState *env = &cpu->env;
-    MemTxResult result;
-    hwaddr vec = env->v7m.vecbase + env->v7m.exception * 4;
-    uint32_t addr;
-
-    addr = address_space_ldl(cs->as, vec,
-                             MEMTXATTRS_UNSPECIFIED, &result);
-    if (result != MEMTX_OK) {
-        /* Architecturally this should cause a HardFault setting HSFR.VECTTBL,
-         * which would then be immediately followed by our failing to load
-         * the entry vector for that HardFault, which is a Lockup case.
-         * Since we don't model Lockup, we just report this guest error
-         * via cpu_abort().
-         */
-        cpu_abort(cs, "Failed to read from exception vector table "
-                  "entry %08x\n", (unsigned)vec);
-    }
-    return addr;
-}
-
 void arm_v7m_cpu_do_interrupt(CPUState *cs)
 {
     ARMCPU *cpu = ARM_CPU(cs);
     CPUARMState *env = &cpu->env;
-    uint32_t xpsr = xpsr_read(env);
     uint32_t lr;
-    uint32_t addr;
 
     arm_log_exception(cs->exception_index);
 
@@ -6105,28 +6236,30 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs)
 
     /* For exceptions we just mark as pending on the NVIC, and let that
        handle it.  */
-    /* TODO: Need to escalate if the current priority is higher than the
-       one we're raising.  */
     switch (cs->exception_index) {
     case EXCP_UDEF:
         armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE);
         env->v7m.cfsr |= R_V7M_CFSR_UNDEFINSTR_MASK;
-        return;
+        break;
     case EXCP_NOCP:
         armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE);
         env->v7m.cfsr |= R_V7M_CFSR_NOCP_MASK;
-        return;
+        break;
+    case EXCP_INVSTATE:
+        armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE);
+        env->v7m.cfsr |= R_V7M_CFSR_INVSTATE_MASK;
+        break;
     case EXCP_SWI:
         /* The PC already points to the next instruction.  */
         armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SVC);
-        return;
+        break;
     case EXCP_PREFETCH_ABORT:
     case EXCP_DATA_ABORT:
         /* TODO: if we implemented the MPU registers, this is where we
          * should set the MMFAR, etc from exception.fsr and exception.vaddress.
          */
         armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_MEM);
-        return;
+        break;
     case EXCP_BKPT:
         if (semihosting_enabled()) {
             int nr;
@@ -6141,39 +6274,20 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs)
             }
         }
         armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_DEBUG);
-        return;
+        break;
     case EXCP_IRQ:
-        env->v7m.exception = armv7m_nvic_acknowledge_irq(env->nvic);
         break;
     case EXCP_EXCEPTION_EXIT:
-        do_v7m_exception_exit(env);
+        do_v7m_exception_exit(cpu);
         return;
     default:
         cpu_abort(cs, "Unhandled exception 0x%x\n", cs->exception_index);
         return; /* Never happens.  Keep compiler happy.  */
     }
 
-    /* Align stack pointer if the guest wants that */
-    if ((env->regs[13] & 4) && (env->v7m.ccr & R_V7M_CCR_STKALIGN_MASK)) {
-        env->regs[13] -= 4;
-        xpsr |= 0x200;
-    }
-    /* Switch to the handler mode.  */
-    v7m_push(env, xpsr);
-    v7m_push(env, env->regs[15]);
-    v7m_push(env, env->regs[14]);
-    v7m_push(env, env->regs[12]);
-    v7m_push(env, env->regs[3]);
-    v7m_push(env, env->regs[2]);
-    v7m_push(env, env->regs[1]);
-    v7m_push(env, env->regs[0]);
-    switch_v7m_sp(env, 0);
-    /* Clear IT bits */
-    env->condexec_bits = 0;
-    env->regs[14] = lr;
-    addr = arm_v7m_load_vector(cpu);
-    env->regs[15] = addr & 0xfffffffe;
-    env->thumb = addr & 1;
+    v7m_push_stack(cpu);
+    v7m_exception_taken(cpu, lr);
+    qemu_log_mask(CPU_LOG_INT, "... as %d\n", env->v7m.exception);
 }
 
 /* Function used to synchronize QEMU's AArch64 register set with AArch32
@@ -6769,6 +6883,12 @@ void arm_cpu_do_interrupt(CPUState *cs)
         arm_cpu_do_interrupt_aarch32(cs);
     }
 
+    /* Hooks may change global state so BQL should be held, also the
+     * BQL needs to be held for any modification of
+     * cs->interrupt_request.
+     */
+    g_assert(qemu_mutex_iothread_locked());
+
     arm_call_el_change_hook(cpu);
 
     if (!kvm_enabled()) {
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index c00b94e42a..395e986973 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -488,8 +488,8 @@ int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu)
 {
     if (cap_has_mp_state) {
         struct kvm_mp_state mp_state = {
-            .mp_state =
-            cpu->powered_off ? KVM_MP_STATE_STOPPED : KVM_MP_STATE_RUNNABLE
+            .mp_state = (cpu->power_state == PSCI_OFF) ?
+            KVM_MP_STATE_STOPPED : KVM_MP_STATE_RUNNABLE
         };
         int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state);
         if (ret) {
@@ -515,7 +515,8 @@ int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu)
                     __func__, ret, strerror(-ret));
             abort();
         }
-        cpu->powered_off = (mp_state.mp_state == KVM_MP_STATE_STOPPED);
+        cpu->power_state = (mp_state.mp_state == KVM_MP_STATE_STOPPED) ?
+            PSCI_OFF : PSCI_ON;
     }
 
     return 0;
diff --git a/target/arm/machine.c b/target/arm/machine.c
index fa5ec76090..d8094a840b 100644
--- a/target/arm/machine.c
+++ b/target/arm/machine.c
@@ -211,6 +211,38 @@ static const VMStateInfo vmstate_cpsr = {
     .put = put_cpsr,
 };
 
+static int get_power(QEMUFile *f, void *opaque, size_t size,
+                    VMStateField *field)
+{
+    ARMCPU *cpu = opaque;
+    bool powered_off = qemu_get_byte(f);
+    cpu->power_state = powered_off ? PSCI_OFF : PSCI_ON;
+    return 0;
+}
+
+static int put_power(QEMUFile *f, void *opaque, size_t size,
+                    VMStateField *field, QJSON *vmdesc)
+{
+    ARMCPU *cpu = opaque;
+
+    /* Migration should never happen while we transition power states */
+
+    if (cpu->power_state == PSCI_ON ||
+        cpu->power_state == PSCI_OFF) {
+        bool powered_off = (cpu->power_state == PSCI_OFF) ? true : false;
+        qemu_put_byte(f, powered_off);
+        return 0;
+    } else {
+        return 1;
+    }
+}
+
+static const VMStateInfo vmstate_powered_off = {
+    .name = "powered_off",
+    .get = get_power,
+    .put = put_power,
+};
+
 static void cpu_pre_save(void *opaque)
 {
     ARMCPU *cpu = opaque;
@@ -329,7 +361,14 @@ const VMStateDescription vmstate_arm_cpu = {
         VMSTATE_UINT64(env.exception.vaddress, ARMCPU),
         VMSTATE_TIMER_PTR(gt_timer[GTIMER_PHYS], ARMCPU),
         VMSTATE_TIMER_PTR(gt_timer[GTIMER_VIRT], ARMCPU),
-        VMSTATE_BOOL(powered_off, ARMCPU),
+        {
+            .name = "power_state",
+            .version_id = 0,
+            .size = sizeof(bool),
+            .info = &vmstate_powered_off,
+            .flags = VMS_SINGLE,
+            .offset = 0,
+        },
         VMSTATE_END_OF_LIST()
     },
     .subsections = (const VMStateDescription*[]) {
diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
index fb366fdc35..d64c8670fa 100644
--- a/target/arm/op_helper.c
+++ b/target/arm/op_helper.c
@@ -18,6 +18,7 @@
  */
 #include "qemu/osdep.h"
 #include "qemu/log.h"
+#include "qemu/main-loop.h"
 #include "cpu.h"
 #include "exec/helper-proto.h"
 #include "internals.h"
@@ -435,6 +436,13 @@ void HELPER(yield)(CPUARMState *env)
     ARMCPU *cpu = arm_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
 
+    /* When running in MTTCG we don't generate jumps to the yield and
+     * WFE helpers as it won't affect the scheduling of other vCPUs.
+     * If we wanted to more completely model WFE/SEV so we don't busy
+     * spin unnecessarily we would need to do something more involved.
+     */
+    g_assert(!parallel_cpus);
+
     /* This is a non-trappable hint instruction that generally indicates
      * that the guest is currently busy-looping. Yield control back to the
      * top level loop so that a more deserving VCPU has a chance to run.
@@ -487,7 +495,9 @@ void HELPER(cpsr_write_eret)(CPUARMState *env, uint32_t val)
      */
     env->regs[15] &= (env->thumb ? ~1 : ~3);
 
+    qemu_mutex_lock_iothread();
     arm_call_el_change_hook(arm_env_get_cpu(env));
+    qemu_mutex_unlock_iothread();
 }
 
 /* Access to user mode registers from privileged modes.  */
@@ -735,28 +745,58 @@ void HELPER(set_cp_reg)(CPUARMState *env, void *rip, uint32_t value)
 {
     const ARMCPRegInfo *ri = rip;
 
-    ri->writefn(env, ri, value);
+    if (ri->type & ARM_CP_IO) {
+        qemu_mutex_lock_iothread();
+        ri->writefn(env, ri, value);
+        qemu_mutex_unlock_iothread();
+    } else {
+        ri->writefn(env, ri, value);
+    }
 }
 
 uint32_t HELPER(get_cp_reg)(CPUARMState *env, void *rip)
 {
     const ARMCPRegInfo *ri = rip;
+    uint32_t res;
+
+    if (ri->type & ARM_CP_IO) {
+        qemu_mutex_lock_iothread();
+        res = ri->readfn(env, ri);
+        qemu_mutex_unlock_iothread();
+    } else {
+        res = ri->readfn(env, ri);
+    }
 
-    return ri->readfn(env, ri);
+    return res;
 }
 
 void HELPER(set_cp_reg64)(CPUARMState *env, void *rip, uint64_t value)
 {
     const ARMCPRegInfo *ri = rip;
 
-    ri->writefn(env, ri, value);
+    if (ri->type & ARM_CP_IO) {
+        qemu_mutex_lock_iothread();
+        ri->writefn(env, ri, value);
+        qemu_mutex_unlock_iothread();
+    } else {
+        ri->writefn(env, ri, value);
+    }
 }
 
 uint64_t HELPER(get_cp_reg64)(CPUARMState *env, void *rip)
 {
     const ARMCPRegInfo *ri = rip;
+    uint64_t res;
 
-    return ri->readfn(env, ri);
+    if (ri->type & ARM_CP_IO) {
+        qemu_mutex_lock_iothread();
+        res = ri->readfn(env, ri);
+        qemu_mutex_unlock_iothread();
+    } else {
+        res = ri->readfn(env, ri);
+    }
+
+    return res;
 }
 
 void HELPER(msr_i_pstate)(CPUARMState *env, uint32_t op, uint32_t imm)
@@ -989,7 +1029,9 @@ void HELPER(exception_return)(CPUARMState *env)
                       cur_el, new_el, env->pc);
     }
 
+    qemu_mutex_lock_iothread();
     arm_call_el_change_hook(arm_env_get_cpu(env));
+    qemu_mutex_unlock_iothread();
 
     return;
 
diff --git a/target/arm/psci.c b/target/arm/psci.c
index 64bf82eea1..ade9fe2ede 100644
--- a/target/arm/psci.c
+++ b/target/arm/psci.c
@@ -127,7 +127,9 @@ void arm_handle_psci_call(ARMCPU *cpu)
                 break;
             }
             target_cpu = ARM_CPU(target_cpu_state);
-            ret = target_cpu->powered_off ? 1 : 0;
+
+            g_assert(qemu_mutex_iothread_locked());
+            ret = target_cpu->power_state;
             break;
         default:
             /* Everything above affinity level 0 is always on. */
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index e61bbd6b3b..24de30d92c 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -1328,10 +1328,14 @@ static void handle_hint(DisasContext *s, uint32_t insn,
         s->is_jmp = DISAS_WFI;
         return;
     case 1: /* YIELD */
-        s->is_jmp = DISAS_YIELD;
+        if (!parallel_cpus) {
+            s->is_jmp = DISAS_YIELD;
+        }
         return;
     case 2: /* WFE */
-        s->is_jmp = DISAS_WFE;
+        if (!parallel_cpus) {
+            s->is_jmp = DISAS_WFE;
+        }
         return;
     case 4: /* SEV */
     case 5: /* SEVL */
@@ -10929,6 +10933,10 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn)
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     /* Note that we convert the Vx register indexes into the
      * index within the vfp.regs[] array, so we can share the
      * helper with the AArch32 instructions.
@@ -10993,6 +11001,10 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     tcg_rd_regno = tcg_const_i32(rd << 1);
     tcg_rn_regno = tcg_const_i32(rn << 1);
     tcg_rm_regno = tcg_const_i32(rm << 1);
@@ -11056,6 +11068,10 @@ static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
         return;
     }
 
+    if (!fp_access_check(s)) {
+        return;
+    }
+
     tcg_rd_regno = tcg_const_i32(rd << 1);
     tcg_rn_regno = tcg_const_i32(rn << 1);
 
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 4436d8f3a2..b859f10755 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -4404,20 +4404,32 @@ static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
     gen_rfe(s, pc, load_cpu_field(spsr));
 }
 
+/*
+ * For WFI we will halt the vCPU until an IRQ. For WFE and YIELD we
+ * only call the helper when running single threaded TCG code to ensure
+ * the next round-robin scheduled vCPU gets a crack. In MTTCG mode we
+ * just skip this instruction. Currently the SEV/SEVL instructions
+ * which are *one* of many ways to wake the CPU from WFE are not
+ * implemented so we can't sleep like WFI does.
+ */
 static void gen_nop_hint(DisasContext *s, int val)
 {
     switch (val) {
     case 1: /* yield */
-        gen_set_pc_im(s, s->pc);
-        s->is_jmp = DISAS_YIELD;
+        if (!parallel_cpus) {
+            gen_set_pc_im(s, s->pc);
+            s->is_jmp = DISAS_YIELD;
+        }
         break;
     case 3: /* wfi */
         gen_set_pc_im(s, s->pc);
         s->is_jmp = DISAS_WFI;
         break;
     case 2: /* wfe */
-        gen_set_pc_im(s, s->pc);
-        s->is_jmp = DISAS_WFE;
+        if (!parallel_cpus) {
+            gen_set_pc_im(s, s->pc);
+            s->is_jmp = DISAS_WFE;
+        }
         break;
     case 4: /* sev */
     case 5: /* sevl */
@@ -7978,9 +7990,13 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
     TCGv_i32 addr;
     TCGv_i64 tmp64;
 
-    /* M variants do not implement ARM mode.  */
+    /* M variants do not implement ARM mode; this must raise the INVSTATE
+     * UsageFault exception.
+     */
     if (arm_dc_feature(s, ARM_FEATURE_M)) {
-        goto illegal_op;
+        gen_exception_insn(s, 4, EXCP_INVSTATE, syn_uncategorized(),
+                           default_exception_el(s));
+        return;
     }
     cond = insn >> 28;
     if (cond == 0xf){
diff --git a/target/i386/cpu-qom.h b/target/i386/cpu-qom.h
index 8cd607e9a2..c2205e6077 100644
--- a/target/i386/cpu-qom.h
+++ b/target/i386/cpu-qom.h
@@ -48,7 +48,9 @@ typedef struct X86CPUDefinition X86CPUDefinition;
  * X86CPUClass:
  * @cpu_def: CPU model definition
  * @kvm_required: Whether CPU model requires KVM to be enabled.
+ * @ordering: Ordering on the "-cpu help" CPU model list.
  * @migration_safe: See CpuDefinitionInfo::migration_safe
+ * @static_model: See CpuDefinitionInfo::static
  * @parent_realize: The parent class' realize handler.
  * @parent_reset: The parent class' reset handler.
  *
@@ -59,11 +61,15 @@ typedef struct X86CPUClass {
     CPUClass parent_class;
     /*< public >*/
 
-    /* Should be eventually replaced by subclass-specific property defaults. */
+    /* CPU definition, automatically loaded by instance_init if not NULL.
+     * Should be eventually replaced by subclass-specific property defaults.
+     */
     X86CPUDefinition *cpu_def;
 
     bool kvm_required;
+    int ordering;
     bool migration_safe;
+    bool static_model;
 
     /* Optional description of CPU model.
      * If unavailable, cpu_def->model_id is used */
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index fd7add2521..89421c893b 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -29,10 +29,16 @@
 #include "qemu/option.h"
 #include "qemu/config-file.h"
 #include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qstring.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qbool.h"
+#include "qapi/qmp/qint.h"
+#include "qapi/qmp/qfloat.h"
 
 #include "qapi-types.h"
 #include "qapi-visit.h"
 #include "qapi/visitor.h"
+#include "qom/qom-qobject.h"
 #include "sysemu/arch_init.h"
 
 #if defined(CONFIG_KVM)
@@ -1503,15 +1509,15 @@ void x86_cpu_change_kvm_default(const char *prop, const char *value)
 static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w,
                                                    bool migratable_only);
 
-#ifdef CONFIG_KVM
-
 static bool lmce_supported(void)
 {
-    uint64_t mce_cap;
+    uint64_t mce_cap = 0;
 
+#ifdef CONFIG_KVM
     if (kvm_ioctl(kvm_state, KVM_X86_GET_MCE_CAP_SUPPORTED, &mce_cap) < 0) {
         return false;
     }
+#endif
 
     return !!(mce_cap & MCG_LMCE_P);
 }
@@ -1531,51 +1537,28 @@ static int cpu_x86_fill_model_id(char *str)
     return 0;
 }
 
-static X86CPUDefinition host_cpudef;
-
-static Property host_x86_cpu_properties[] = {
+static Property max_x86_cpu_properties[] = {
     DEFINE_PROP_BOOL("migratable", X86CPU, migratable, true),
     DEFINE_PROP_BOOL("host-cache-info", X86CPU, cache_info_passthrough, false),
     DEFINE_PROP_END_OF_LIST()
 };
 
-/* class_init for the "host" CPU model
- *
- * This function may be called before KVM is initialized.
- */
-static void host_x86_cpu_class_init(ObjectClass *oc, void *data)
+static void max_x86_cpu_class_init(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
     X86CPUClass *xcc = X86_CPU_CLASS(oc);
-    uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
-
-    xcc->kvm_required = true;
-
-    host_cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
-    x86_cpu_vendor_words2str(host_cpudef.vendor, ebx, edx, ecx);
-
-    host_cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
-    host_cpudef.family = ((eax >> 8) & 0x0F) + ((eax >> 20) & 0xFF);
-    host_cpudef.model = ((eax >> 4) & 0x0F) | ((eax & 0xF0000) >> 12);
-    host_cpudef.stepping = eax & 0x0F;
 
-    cpu_x86_fill_model_id(host_cpudef.model_id);
+    xcc->ordering = 9;
 
-    xcc->cpu_def = &host_cpudef;
     xcc->model_description =
-        "KVM processor with all supported host features "
-        "(only available in KVM mode)";
-
-    /* level, xlevel, xlevel2, and the feature words are initialized on
-     * instance_init, because they require KVM to be initialized.
-     */
+        "Enables all features supported by the accelerator in the current host";
 
-    dc->props = host_x86_cpu_properties;
-    /* Reason: host_x86_cpu_initfn() dies when !kvm_enabled() */
-    dc->cannot_destroy_with_object_finalize_yet = true;
+    dc->props = max_x86_cpu_properties;
 }
 
-static void host_x86_cpu_initfn(Object *obj)
+static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp);
+
+static void max_x86_cpu_initfn(Object *obj)
 {
     X86CPU *cpu = X86_CPU(obj);
     CPUX86State *env = &cpu->env;
@@ -1584,10 +1567,24 @@ static void host_x86_cpu_initfn(Object *obj)
     /* We can't fill the features array here because we don't know yet if
      * "migratable" is true or false.
      */
-    cpu->host_features = true;
+    cpu->max_features = true;
 
-    /* If KVM is disabled, x86_cpu_realizefn() will report an error later */
     if (kvm_enabled()) {
+        X86CPUDefinition host_cpudef = { };
+        uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0;
+
+        host_cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
+        x86_cpu_vendor_words2str(host_cpudef.vendor, ebx, edx, ecx);
+
+        host_cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
+        host_cpudef.family = ((eax >> 8) & 0x0F) + ((eax >> 20) & 0xFF);
+        host_cpudef.model = ((eax >> 4) & 0x0F) | ((eax & 0xF0000) >> 12);
+        host_cpudef.stepping = eax & 0x0F;
+
+        cpu_x86_fill_model_id(host_cpudef.model_id);
+
+        x86_cpu_load_def(cpu, &host_cpudef, &error_abort);
+
         env->cpuid_min_level =
             kvm_arch_get_supported_cpuid(s, 0x0, 0, R_EAX);
         env->cpuid_min_xlevel =
@@ -1598,15 +1595,44 @@ static void host_x86_cpu_initfn(Object *obj)
         if (lmce_supported()) {
             object_property_set_bool(OBJECT(cpu), true, "lmce", &error_abort);
         }
+    } else {
+        object_property_set_str(OBJECT(cpu), CPUID_VENDOR_AMD,
+                                "vendor", &error_abort);
+        object_property_set_int(OBJECT(cpu), 6, "family", &error_abort);
+        object_property_set_int(OBJECT(cpu), 6, "model", &error_abort);
+        object_property_set_int(OBJECT(cpu), 3, "stepping", &error_abort);
+        object_property_set_str(OBJECT(cpu),
+                                "QEMU TCG CPU version " QEMU_HW_VERSION,
+                                "model-id", &error_abort);
     }
 
     object_property_set_bool(OBJECT(cpu), true, "pmu", &error_abort);
 }
 
+static const TypeInfo max_x86_cpu_type_info = {
+    .name = X86_CPU_TYPE_NAME("max"),
+    .parent = TYPE_X86_CPU,
+    .instance_init = max_x86_cpu_initfn,
+    .class_init = max_x86_cpu_class_init,
+};
+
+#ifdef CONFIG_KVM
+
+static void host_x86_cpu_class_init(ObjectClass *oc, void *data)
+{
+    X86CPUClass *xcc = X86_CPU_CLASS(oc);
+
+    xcc->kvm_required = true;
+    xcc->ordering = 8;
+
+    xcc->model_description =
+        "KVM processor with all supported host features "
+        "(only available in KVM mode)";
+}
+
 static const TypeInfo host_x86_cpu_type_info = {
     .name = X86_CPU_TYPE_NAME("host"),
-    .parent = TYPE_X86_CPU,
-    .instance_init = host_x86_cpu_initfn,
+    .parent = X86_CPU_TYPE_NAME("max"),
     .class_init = host_x86_cpu_class_init,
 };
 
@@ -2033,12 +2059,11 @@ static void x86_cpu_parse_featurestr(const char *typename, char *features,
 
         /* Special case: */
         if (!strcmp(name, "tsc-freq")) {
-            int64_t tsc_freq;
-            char *err;
+            int ret;
+            uint64_t tsc_freq;
 
-            tsc_freq = qemu_strtosz_suffix_unit(val, &err,
-                                           QEMU_STRTOSZ_DEFSUFFIX_B, 1000);
-            if (tsc_freq < 0 || *err) {
+            ret = qemu_strtosz_metric(val, NULL, &tsc_freq);
+            if (ret < 0 || tsc_freq > INT64_MAX) {
                 error_setg(errp, "bad numerical value %s", val);
                 return;
             }
@@ -2061,7 +2086,7 @@ static void x86_cpu_parse_featurestr(const char *typename, char *features,
     }
 }
 
-static void x86_cpu_load_features(X86CPU *cpu, Error **errp);
+static void x86_cpu_expand_features(X86CPU *cpu, Error **errp);
 static int x86_cpu_filter_features(X86CPU *cpu);
 
 /* Check for missing features that may prevent the CPU class from
@@ -2084,9 +2109,9 @@ static void x86_cpu_class_check_missing_features(X86CPUClass *xcc,
 
     xc = X86_CPU(object_new(object_class_get_name(OBJECT_CLASS(xcc))));
 
-    x86_cpu_load_features(xc, &err);
+    x86_cpu_expand_features(xc, &err);
     if (err) {
-        /* Errors at x86_cpu_load_features should never happen,
+        /* Errors at x86_cpu_expand_features should never happen,
          * but in case it does, just report the model as not
          * runnable at all using the "type" property.
          */
@@ -2129,7 +2154,7 @@ static void listflags(FILE *f, fprintf_function print, const char **featureset)
     }
 }
 
-/* Sort alphabetically by type name, listing kvm_required models last. */
+/* Sort alphabetically by type name, respecting X86CPUClass::ordering. */
 static gint x86_cpu_list_compare(gconstpointer a, gconstpointer b)
 {
     ObjectClass *class_a = (ObjectClass *)a;
@@ -2138,9 +2163,8 @@ static gint x86_cpu_list_compare(gconstpointer a, gconstpointer b)
     X86CPUClass *cc_b = X86_CPU_CLASS(class_b);
     const char *name_a, *name_b;
 
-    if (cc_a->kvm_required != cc_b->kvm_required) {
-        /* kvm_required items go last */
-        return cc_a->kvm_required ? 1 : -1;
+    if (cc_a->ordering != cc_b->ordering) {
+        return cc_a->ordering - cc_b->ordering;
     } else {
         name_a = object_class_get_name(class_a);
         name_b = object_class_get_name(class_b);
@@ -2162,7 +2186,7 @@ static void x86_cpu_list_entry(gpointer data, gpointer user_data)
     CPUListState *s = user_data;
     char *name = x86_cpu_class_get_model_name(cc);
     const char *desc = cc->model_description;
-    if (!desc) {
+    if (!desc && cc->cpu_def) {
         desc = cc->cpu_def->model_id;
     }
 
@@ -2211,6 +2235,7 @@ static void x86_cpu_definition_entry(gpointer data, gpointer user_data)
     info->q_typename = g_strdup(object_class_get_name(oc));
     info->migration_safe = cc->migration_safe;
     info->has_migration_safe = true;
+    info->q_static = cc->static_model;
 
     entry = g_malloc0(sizeof(*entry));
     entry->value = info;
@@ -2248,31 +2273,6 @@ static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w,
     return r;
 }
 
-/*
- * Filters CPU feature words based on host availability of each feature.
- *
- * Returns: 0 if all flags are supported by the host, non-zero otherwise.
- */
-static int x86_cpu_filter_features(X86CPU *cpu)
-{
-    CPUX86State *env = &cpu->env;
-    FeatureWord w;
-    int rv = 0;
-
-    for (w = 0; w < FEATURE_WORDS; w++) {
-        uint32_t host_feat =
-            x86_cpu_get_supported_feature_word(w, false);
-        uint32_t requested_features = env->features[w];
-        env->features[w] &= host_feat;
-        cpu->filtered_features[w] = requested_features & ~env->features[w];
-        if (cpu->filtered_features[w]) {
-            rv = 1;
-        }
-    }
-
-    return rv;
-}
-
 static void x86_cpu_report_filtered_features(X86CPU *cpu)
 {
     FeatureWord w;
@@ -2294,7 +2294,7 @@ static void x86_cpu_apply_props(X86CPU *cpu, PropValue *props)
     }
 }
 
-/* Load data from X86CPUDefinition
+/* Load data from X86CPUDefinition into a X86CPU object
  */
 static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp)
 {
@@ -2303,6 +2303,11 @@ static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp)
     char host_vendor[CPUID_VENDOR_SZ + 1];
     FeatureWord w;
 
+    /*NOTE: any property set by this function should be returned by
+     * x86_cpu_static_props(), so static expansion of
+     * query-cpu-model-expansion is always complete.
+     */
+
     /* CPU models only set _minimum_ values for level/xlevel: */
     object_property_set_int(OBJECT(cpu), def->level, "min-level", errp);
     object_property_set_int(OBJECT(cpu), def->xlevel, "min-xlevel", errp);
@@ -2347,6 +2352,212 @@ static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp)
 
 }
 
+/* Return a QDict containing keys for all properties that can be included
+ * in static expansion of CPU models. All properties set by x86_cpu_load_def()
+ * must be included in the dictionary.
+ */
+static QDict *x86_cpu_static_props(void)
+{
+    FeatureWord w;
+    int i;
+    static const char *props[] = {
+        "min-level",
+        "min-xlevel",
+        "family",
+        "model",
+        "stepping",
+        "model-id",
+        "vendor",
+        "lmce",
+        NULL,
+    };
+    static QDict *d;
+
+    if (d) {
+        return d;
+    }
+
+    d = qdict_new();
+    for (i = 0; props[i]; i++) {
+        qdict_put_obj(d, props[i], qnull());
+    }
+
+    for (w = 0; w < FEATURE_WORDS; w++) {
+        FeatureWordInfo *fi = &feature_word_info[w];
+        int bit;
+        for (bit = 0; bit < 32; bit++) {
+            if (!fi->feat_names[bit]) {
+                continue;
+            }
+            qdict_put_obj(d, fi->feat_names[bit], qnull());
+        }
+    }
+
+    return d;
+}
+
+/* Add an entry to @props dict, with the value for property. */
+static void x86_cpu_expand_prop(X86CPU *cpu, QDict *props, const char *prop)
+{
+    QObject *value = object_property_get_qobject(OBJECT(cpu), prop,
+                                                 &error_abort);
+
+    qdict_put_obj(props, prop, value);
+}
+
+/* Convert CPU model data from X86CPU object to a property dictionary
+ * that can recreate exactly the same CPU model.
+ */
+static void x86_cpu_to_dict(X86CPU *cpu, QDict *props)
+{
+    QDict *sprops = x86_cpu_static_props();
+    const QDictEntry *e;
+
+    for (e = qdict_first(sprops); e; e = qdict_next(sprops, e)) {
+        const char *prop = qdict_entry_key(e);
+        x86_cpu_expand_prop(cpu, props, prop);
+    }
+}
+
+/* Convert CPU model data from X86CPU object to a property dictionary
+ * that can recreate exactly the same CPU model, including every
+ * writeable QOM property.
+ */
+static void x86_cpu_to_dict_full(X86CPU *cpu, QDict *props)
+{
+    ObjectPropertyIterator iter;
+    ObjectProperty *prop;
+
+    object_property_iter_init(&iter, OBJECT(cpu));
+    while ((prop = object_property_iter_next(&iter))) {
+        /* skip read-only or write-only properties */
+        if (!prop->get || !prop->set) {
+            continue;
+        }
+
+        /* "hotplugged" is the only property that is configurable
+         * on the command-line but will be set differently on CPUs
+         * created using "-cpu ... -smp ..." and by CPUs created
+         * on the fly by x86_cpu_from_model() for querying. Skip it.
+         */
+        if (!strcmp(prop->name, "hotplugged")) {
+            continue;
+        }
+        x86_cpu_expand_prop(cpu, props, prop->name);
+    }
+}
+
+static void object_apply_props(Object *obj, QDict *props, Error **errp)
+{
+    const QDictEntry *prop;
+    Error *err = NULL;
+
+    for (prop = qdict_first(props); prop; prop = qdict_next(props, prop)) {
+        object_property_set_qobject(obj, qdict_entry_value(prop),
+                                         qdict_entry_key(prop), &err);
+        if (err) {
+            break;
+        }
+    }
+
+    error_propagate(errp, err);
+}
+
+/* Create X86CPU object according to model+props specification */
+static X86CPU *x86_cpu_from_model(const char *model, QDict *props, Error **errp)
+{
+    X86CPU *xc = NULL;
+    X86CPUClass *xcc;
+    Error *err = NULL;
+
+    xcc = X86_CPU_CLASS(cpu_class_by_name(TYPE_X86_CPU, model));
+    if (xcc == NULL) {
+        error_setg(&err, "CPU model '%s' not found", model);
+        goto out;
+    }
+
+    xc = X86_CPU(object_new(object_class_get_name(OBJECT_CLASS(xcc))));
+    if (props) {
+        object_apply_props(OBJECT(xc), props, &err);
+        if (err) {
+            goto out;
+        }
+    }
+
+    x86_cpu_expand_features(xc, &err);
+    if (err) {
+        goto out;
+    }
+
+out:
+    if (err) {
+        error_propagate(errp, err);
+        object_unref(OBJECT(xc));
+        xc = NULL;
+    }
+    return xc;
+}
+
+CpuModelExpansionInfo *
+arch_query_cpu_model_expansion(CpuModelExpansionType type,
+                                                      CpuModelInfo *model,
+                                                      Error **errp)
+{
+    X86CPU *xc = NULL;
+    Error *err = NULL;
+    CpuModelExpansionInfo *ret = g_new0(CpuModelExpansionInfo, 1);
+    QDict *props = NULL;
+    const char *base_name;
+
+    xc = x86_cpu_from_model(model->name,
+                            model->has_props ?
+                                qobject_to_qdict(model->props) :
+                                NULL, &err);
+    if (err) {
+        goto out;
+    }
+
+    props = qdict_new();
+
+    switch (type) {
+    case CPU_MODEL_EXPANSION_TYPE_STATIC:
+        /* Static expansion will be based on "base" only */
+        base_name = "base";
+        x86_cpu_to_dict(xc, props);
+    break;
+    case CPU_MODEL_EXPANSION_TYPE_FULL:
+        /* As we don't return every single property, full expansion needs
+         * to keep the original model name+props, and add extra
+         * properties on top of that.
+         */
+        base_name = model->name;
+        x86_cpu_to_dict_full(xc, props);
+    break;
+    default:
+        error_setg(&err, "Unsupportted expansion type");
+        goto out;
+    }
+
+    if (!props) {
+        props = qdict_new();
+    }
+    x86_cpu_to_dict(xc, props);
+
+    ret->model = g_new0(CpuModelInfo, 1);
+    ret->model->name = g_strdup(base_name);
+    ret->model->props = QOBJECT(props);
+    ret->model->has_props = true;
+
+out:
+    object_unref(OBJECT(xc));
+    if (err) {
+        error_propagate(errp, err);
+        qapi_free_CpuModelExpansionInfo(ret);
+        ret = NULL;
+    }
+    return ret;
+}
+
 X86CPU *cpu_x86_init(const char *cpu_model)
 {
     return X86_CPU(cpu_generic_init(TYPE_X86_CPU, cpu_model));
@@ -3096,20 +3307,59 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu)
     env->features[FEAT_XSAVE_COMP_HI] = mask >> 32;
 }
 
-/* Load CPUID data based on configured features */
-static void x86_cpu_load_features(X86CPU *cpu, Error **errp)
+/***** Steps involved on loading and filtering CPUID data
+ *
+ * When initializing and realizing a CPU object, the steps
+ * involved in setting up CPUID data are:
+ *
+ * 1) Loading CPU model definition (X86CPUDefinition). This is
+ *    implemented by x86_cpu_load_def() and should be completely
+ *    transparent, as it is done automatically by instance_init.
+ *    No code should need to look at X86CPUDefinition structs
+ *    outside instance_init.
+ *
+ * 2) CPU expansion. This is done by realize before CPUID
+ *    filtering, and will make sure host/accelerator data is
+ *    loaded for CPU models that depend on host capabilities
+ *    (e.g. "host"). Done by x86_cpu_expand_features().
+ *
+ * 3) CPUID filtering. This initializes extra data related to
+ *    CPUID, and checks if the host supports all capabilities
+ *    required by the CPU. Runnability of a CPU model is
+ *    determined at this step. Done by x86_cpu_filter_features().
+ *
+ * Some operations don't require all steps to be performed.
+ * More precisely:
+ *
+ * - CPU instance creation (instance_init) will run only CPU
+ *   model loading. CPU expansion can't run at instance_init-time
+ *   because host/accelerator data may be not available yet.
+ * - CPU realization will perform both CPU model expansion and CPUID
+ *   filtering, and return an error in case one of them fails.
+ * - query-cpu-definitions needs to run all 3 steps. It needs
+ *   to run CPUID filtering, as the 'unavailable-features'
+ *   field is set based on the filtering results.
+ * - The query-cpu-model-expansion QMP command only needs to run
+ *   CPU model loading and CPU expansion. It should not filter
+ *   any CPUID data based on host capabilities.
+ */
+
+/* Expand CPU configuration data, based on configured features
+ * and host/accelerator capabilities when appropriate.
+ */
+static void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
 {
     CPUX86State *env = &cpu->env;
     FeatureWord w;
     GList *l;
     Error *local_err = NULL;
 
-    /*TODO: cpu->host_features incorrectly overwrites features
+    /*TODO: cpu->max_features incorrectly overwrites features
      * set using "feat=on|off". Once we fix this, we can convert
      * plus_features & minus_features to global properties
      * inside x86_cpu_parse_featurestr() too.
      */
-    if (cpu->host_features) {
+    if (cpu->max_features) {
         for (w = 0; w < FEATURE_WORDS; w++) {
             env->features[w] =
                 x86_cpu_get_supported_feature_word(w, cpu->migratable);
@@ -3174,6 +3424,32 @@ out:
     }
 }
 
+/*
+ * Finishes initialization of CPUID data, filters CPU feature
+ * words based on host availability of each feature.
+ *
+ * Returns: 0 if all flags are supported by the host, non-zero otherwise.
+ */
+static int x86_cpu_filter_features(X86CPU *cpu)
+{
+    CPUX86State *env = &cpu->env;
+    FeatureWord w;
+    int rv = 0;
+
+    for (w = 0; w < FEATURE_WORDS; w++) {
+        uint32_t host_feat =
+            x86_cpu_get_supported_feature_word(w, false);
+        uint32_t requested_features = env->features[w];
+        env->features[w] &= host_feat;
+        cpu->filtered_features[w] = requested_features & ~env->features[w];
+        if (cpu->filtered_features[w]) {
+            rv = 1;
+        }
+    }
+
+    return rv;
+}
+
 #define IS_INTEL_CPU(env) ((env)->cpuid_vendor1 == CPUID_VENDOR_INTEL_1 && \
                            (env)->cpuid_vendor2 == CPUID_VENDOR_INTEL_2 && \
                            (env)->cpuid_vendor3 == CPUID_VENDOR_INTEL_3)
@@ -3201,7 +3477,7 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
         return;
     }
 
-    x86_cpu_load_features(cpu, &local_err);
+    x86_cpu_expand_features(cpu, &local_err);
     if (local_err) {
         goto out;
     }
@@ -3620,7 +3896,9 @@ static void x86_cpu_initfn(Object *obj)
     object_property_add_alias(obj, "sse4_1", obj, "sse4.1", &error_abort);
     object_property_add_alias(obj, "sse4_2", obj, "sse4.2", &error_abort);
 
-    x86_cpu_load_def(cpu, xcc->cpu_def, &error_abort);
+    if (xcc->cpu_def) {
+        x86_cpu_load_def(cpu, xcc->cpu_def, &error_abort);
+    }
 }
 
 static int64_t x86_cpu_get_arch_id(CPUState *cs)
@@ -3775,6 +4053,24 @@ static const TypeInfo x86_cpu_type_info = {
     .class_init = x86_cpu_common_class_init,
 };
 
+
+/* "base" CPU model, used by query-cpu-model-expansion */
+static void x86_cpu_base_class_init(ObjectClass *oc, void *data)
+{
+    X86CPUClass *xcc = X86_CPU_CLASS(oc);
+
+    xcc->static_model = true;
+    xcc->migration_safe = true;
+    xcc->model_description = "base CPU model type with no features enabled";
+    xcc->ordering = 8;
+}
+
+static const TypeInfo x86_base_cpu_type_info = {
+        .name = X86_CPU_TYPE_NAME("base"),
+        .parent = TYPE_X86_CPU,
+        .class_init = x86_cpu_base_class_init,
+};
+
 static void x86_cpu_register_types(void)
 {
     int i;
@@ -3783,6 +4079,8 @@ static void x86_cpu_register_types(void)
     for (i = 0; i < ARRAY_SIZE(builtin_x86_defs); i++) {
         x86_register_cpudef_type(&builtin_x86_defs[i]);
     }
+    type_register_static(&max_x86_cpu_type_info);
+    type_register_static(&x86_base_cpu_type_info);
 #ifdef CONFIG_KVM
     type_register_static(&host_x86_cpu_type_info);
 #endif
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 8df124f332..12a39d590f 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1211,7 +1211,7 @@ struct X86CPU {
     bool enforce_cpuid;
     bool expose_kvm;
     bool migratable;
-    bool host_features;
+    bool max_features; /* Enable all supported features automatically */
     uint32_t apic_id;
 
     /* Enables publishing of TSC increment and Local APIC bus frequencies to
@@ -1417,6 +1417,8 @@ floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper);
 void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector);
 void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32);
 void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32);
+void cpu_x86_fxsave(CPUX86State *s, target_ulong ptr);
+void cpu_x86_fxrstor(CPUX86State *s, target_ulong ptr);
 
 /* you can call this signal handler from your SIGBUS and SIGSEGV
    signal handlers to inform the virtual CPU of exceptions. non zero
diff --git a/target/i386/fpu_helper.c b/target/i386/fpu_helper.c
index 66474ad98e..69ea33a5c2 100644
--- a/target/i386/fpu_helper.c
+++ b/target/i386/fpu_helper.c
@@ -1377,6 +1377,18 @@ void helper_fxrstor(CPUX86State *env, target_ulong ptr)
     }
 }
 
+#if defined(CONFIG_USER_ONLY)
+void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
+{
+    helper_fxsave(env, ptr);
+}
+
+void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
+{
+    helper_fxrstor(env, ptr);
+}
+#endif
+
 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
 {
     uintptr_t ra = GETPC();
diff --git a/target/i386/monitor.c b/target/i386/monitor.c
index 468aa073bc..77ead60437 100644
--- a/target/i386/monitor.c
+++ b/target/i386/monitor.c
@@ -210,6 +210,10 @@ void hmp_info_tlb(Monitor *mon, const QDict *qdict)
     CPUArchState *env;
 
     env = mon_get_cpu_env();
+    if (!env) {
+        monitor_printf(mon, "No CPU available\n");
+        return;
+    }
 
     if (!(env->cr[0] & CR0_PG_MASK)) {
         monitor_printf(mon, "PG disabled\n");
@@ -529,6 +533,10 @@ void hmp_info_mem(Monitor *mon, const QDict *qdict)
     CPUArchState *env;
 
     env = mon_get_cpu_env();
+    if (!env) {
+        monitor_printf(mon, "No CPU available\n");
+        return;
+    }
 
     if (!(env->cr[0] & CR0_PG_MASK)) {
         monitor_printf(mon, "PG disabled\n");
@@ -624,7 +632,13 @@ const MonitorDef *target_monitor_defs(void)
 
 void hmp_info_local_apic(Monitor *mon, const QDict *qdict)
 {
-    x86_cpu_dump_local_apic_state(mon_get_cpu(), (FILE *)mon, monitor_fprintf,
+    CPUState *cs = mon_get_cpu();
+
+    if (!cs) {
+        monitor_printf(mon, "No CPU available\n");
+        return;
+    }
+    x86_cpu_dump_local_apic_state(cs, (FILE *)mon, monitor_fprintf,
                                   CPU_DUMP_FPU);
 }
 
diff --git a/target/i386/smm_helper.c b/target/i386/smm_helper.c
index 4dd6a2c544..f051a77c4a 100644
--- a/target/i386/smm_helper.c
+++ b/target/i386/smm_helper.c
@@ -18,6 +18,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/main-loop.h"
 #include "cpu.h"
 #include "exec/helper-proto.h"
 #include "exec/log.h"
@@ -42,11 +43,14 @@ void helper_rsm(CPUX86State *env)
 #define SMM_REVISION_ID 0x00020000
 #endif
 
+/* Called with iothread lock taken */
 void cpu_smm_update(X86CPU *cpu)
 {
     CPUX86State *env = &cpu->env;
     bool smm_enabled = (env->hflags & HF_SMM_MASK);
 
+    g_assert(qemu_mutex_iothread_locked());
+
     if (cpu->smram) {
         memory_region_set_enabled(cpu->smram, smm_enabled);
     }
@@ -333,7 +337,10 @@ void helper_rsm(CPUX86State *env)
     }
     env->hflags2 &= ~HF2_SMM_INSIDE_NMI_MASK;
     env->hflags &= ~HF_SMM_MASK;
+
+    qemu_mutex_lock_iothread();
     cpu_smm_update(cpu);
+    qemu_mutex_unlock_iothread();
 
     qemu_log_mask(CPU_LOG_INT, "SMM: after RSM\n");
     log_cpu_state_mask(CPU_LOG_INT, CPU(cpu), CPU_DUMP_CCOP);
diff --git a/target/mips/cpu.h b/target/mips/cpu.h
index e1c78f55ec..4a4747af25 100644
--- a/target/mips/cpu.h
+++ b/target/mips/cpu.h
@@ -815,6 +815,7 @@ int cpu_mips_signal_handler(int host_signum, void *pinfo, void *puc);
 
 #define cpu_init(cpu_model) CPU(cpu_mips_init(cpu_model))
 bool cpu_supports_cps_smp(const char *cpu_model);
+bool cpu_supports_isa(const char *cpu_model, unsigned int isa);
 void cpu_set_exception_base(int vp_index, target_ulong address);
 
 /* TODO QOM'ify CPU reset and remove */
diff --git a/target/mips/translate.c b/target/mips/translate.c
index 7f8ecf42c2..8b4a072ecb 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -20233,6 +20233,16 @@ bool cpu_supports_cps_smp(const char *cpu_model)
     return (def->CP0_Config3 & (1 << CP0C3_CMGCR)) != 0;
 }
 
+bool cpu_supports_isa(const char *cpu_model, unsigned int isa)
+{
+    const mips_def_t *def = cpu_mips_find_by_name(cpu_model);
+    if (!def) {
+        return false;
+    }
+
+    return (def->insn_flags & isa) != 0;
+}
+
 void cpu_set_exception_base(int vp_index, target_ulong address)
 {
     MIPSCPU *vp = MIPS_CPU(qemu_get_cpu(vp_index));
diff --git a/target/ppc/Makefile.objs b/target/ppc/Makefile.objs
index a8c7a30cde..0057b319c0 100644
--- a/target/ppc/Makefile.objs
+++ b/target/ppc/Makefile.objs
@@ -1,8 +1,9 @@
 obj-y += cpu-models.o
+obj-y += cpu.o
 obj-y += translate.o
 ifeq ($(CONFIG_SOFTMMU),y)
-obj-y += machine.o mmu_helper.o mmu-hash32.o monitor.o
-obj-$(TARGET_PPC64) += mmu-hash64.o arch_dump.o compat.o
+obj-y += machine.o mmu_helper.o mmu-hash32.o monitor.o arch_dump.o
+obj-$(TARGET_PPC64) += mmu-hash64.o compat.o
 endif
 obj-$(CONFIG_KVM) += kvm.o
 obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o
diff --git a/target/ppc/arch_dump.c b/target/ppc/arch_dump.c
index 40282a1f50..28d9cc7d79 100644
--- a/target/ppc/arch_dump.c
+++ b/target/ppc/arch_dump.c
@@ -1,5 +1,5 @@
 /*
- * writing ELF notes for ppc64 arch
+ * writing ELF notes for ppc{64,} arch
  *
  *
  * Copyright IBM, Corp. 2013
@@ -19,36 +19,48 @@
 #include "sysemu/dump.h"
 #include "sysemu/kvm.h"
 
-struct PPC64UserRegStruct {
-    uint64_t gpr[32];
-    uint64_t nip;
-    uint64_t msr;
-    uint64_t orig_gpr3;
-    uint64_t ctr;
-    uint64_t link;
-    uint64_t xer;
-    uint64_t ccr;
-    uint64_t softe;
-    uint64_t trap;
-    uint64_t dar;
-    uint64_t dsisr;
-    uint64_t result;
+#ifdef TARGET_PPC64
+#define ELFCLASS ELFCLASS64
+#define cpu_to_dump_reg cpu_to_dump64
+typedef uint64_t reg_t;
+typedef Elf64_Nhdr Elf_Nhdr;
+#else
+#define ELFCLASS ELFCLASS32
+#define cpu_to_dump_reg cpu_to_dump32
+typedef uint32_t reg_t;
+typedef Elf32_Nhdr Elf_Nhdr;
+#endif /* TARGET_PPC64 */
+
+struct PPCUserRegStruct {
+    reg_t gpr[32];
+    reg_t nip;
+    reg_t msr;
+    reg_t orig_gpr3;
+    reg_t ctr;
+    reg_t link;
+    reg_t xer;
+    reg_t ccr;
+    reg_t softe;
+    reg_t trap;
+    reg_t dar;
+    reg_t dsisr;
+    reg_t result;
 } QEMU_PACKED;
 
-struct PPC64ElfPrstatus {
+struct PPCElfPrstatus {
     char pad1[112];
-    struct PPC64UserRegStruct pr_reg;
-    uint64_t pad2[4];
+    struct PPCUserRegStruct pr_reg;
+    reg_t pad2[4];
 } QEMU_PACKED;
 
 
-struct PPC64ElfFpregset {
+struct PPCElfFpregset {
     uint64_t fpr[32];
-    uint64_t fpscr;
+    reg_t fpscr;
 }  QEMU_PACKED;
 
 
-struct PPC64ElfVmxregset {
+struct PPCElfVmxregset {
     ppc_avr_t avr[32];
     ppc_avr_t vscr;
     union {
@@ -57,26 +69,26 @@ struct PPC64ElfVmxregset {
     } vrsave;
 }  QEMU_PACKED;
 
-struct PPC64ElfVsxregset {
+struct PPCElfVsxregset {
     uint64_t vsr[32];
 }  QEMU_PACKED;
 
-struct PPC64ElfSperegset {
+struct PPCElfSperegset {
     uint32_t evr[32];
     uint64_t spe_acc;
     uint32_t spe_fscr;
 }  QEMU_PACKED;
 
 typedef struct noteStruct {
-    Elf64_Nhdr hdr;
+    Elf_Nhdr hdr;
     char name[5];
     char pad3[3];
     union {
-        struct PPC64ElfPrstatus  prstatus;
-        struct PPC64ElfFpregset  fpregset;
-        struct PPC64ElfVmxregset vmxregset;
-        struct PPC64ElfVsxregset vsxregset;
-        struct PPC64ElfSperegset speregset;
+        struct PPCElfPrstatus  prstatus;
+        struct PPCElfFpregset  fpregset;
+        struct PPCElfVmxregset vmxregset;
+        struct PPCElfVsxregset vsxregset;
+        struct PPCElfSperegset speregset;
     } contents;
 } QEMU_PACKED Note;
 
@@ -85,12 +97,12 @@ typedef struct NoteFuncArg {
     DumpState *state;
 } NoteFuncArg;
 
-static void ppc64_write_elf64_prstatus(NoteFuncArg *arg, PowerPCCPU *cpu)
+static void ppc_write_elf_prstatus(NoteFuncArg *arg, PowerPCCPU *cpu)
 {
     int i;
-    uint64_t cr;
-    struct PPC64ElfPrstatus *prstatus;
-    struct PPC64UserRegStruct *reg;
+    reg_t cr;
+    struct PPCElfPrstatus *prstatus;
+    struct PPCUserRegStruct *reg;
     Note *note = &arg->note;
     DumpState *s = arg->state;
 
@@ -101,25 +113,25 @@ static void ppc64_write_elf64_prstatus(NoteFuncArg *arg, PowerPCCPU *cpu)
     reg = &prstatus->pr_reg;
 
     for (i = 0; i < 32; i++) {
-        reg->gpr[i] = cpu_to_dump64(s, cpu->env.gpr[i]);
+        reg->gpr[i] = cpu_to_dump_reg(s, cpu->env.gpr[i]);
     }
-    reg->nip = cpu_to_dump64(s, cpu->env.nip);
-    reg->msr = cpu_to_dump64(s, cpu->env.msr);
-    reg->ctr = cpu_to_dump64(s, cpu->env.ctr);
-    reg->link = cpu_to_dump64(s, cpu->env.lr);
-    reg->xer = cpu_to_dump64(s, cpu_read_xer(&cpu->env));
+    reg->nip = cpu_to_dump_reg(s, cpu->env.nip);
+    reg->msr = cpu_to_dump_reg(s, cpu->env.msr);
+    reg->ctr = cpu_to_dump_reg(s, cpu->env.ctr);
+    reg->link = cpu_to_dump_reg(s, cpu->env.lr);
+    reg->xer = cpu_to_dump_reg(s, cpu_read_xer(&cpu->env));
 
     cr = 0;
     for (i = 0; i < 8; i++) {
         cr |= (cpu->env.crf[i] & 15) << (4 * (7 - i));
     }
-    reg->ccr = cpu_to_dump64(s, cr);
+    reg->ccr = cpu_to_dump_reg(s, cr);
 }
 
-static void ppc64_write_elf64_fpregset(NoteFuncArg *arg, PowerPCCPU *cpu)
+static void ppc_write_elf_fpregset(NoteFuncArg *arg, PowerPCCPU *cpu)
 {
     int i;
-    struct PPC64ElfFpregset  *fpregset;
+    struct PPCElfFpregset  *fpregset;
     Note *note = &arg->note;
     DumpState *s = arg->state;
 
@@ -131,13 +143,13 @@ static void ppc64_write_elf64_fpregset(NoteFuncArg *arg, PowerPCCPU *cpu)
     for (i = 0; i < 32; i++) {
         fpregset->fpr[i] = cpu_to_dump64(s, cpu->env.fpr[i]);
     }
-    fpregset->fpscr = cpu_to_dump64(s, cpu->env.fpscr);
+    fpregset->fpscr = cpu_to_dump_reg(s, cpu->env.fpscr);
 }
 
-static void ppc64_write_elf64_vmxregset(NoteFuncArg *arg, PowerPCCPU *cpu)
+static void ppc_write_elf_vmxregset(NoteFuncArg *arg, PowerPCCPU *cpu)
 {
     int i;
-    struct PPC64ElfVmxregset *vmxregset;
+    struct PPCElfVmxregset *vmxregset;
     Note *note = &arg->note;
     DumpState *s = arg->state;
 
@@ -164,10 +176,11 @@ static void ppc64_write_elf64_vmxregset(NoteFuncArg *arg, PowerPCCPU *cpu)
     }
     vmxregset->vscr.u32[3] = cpu_to_dump32(s, cpu->env.vscr);
 }
-static void ppc64_write_elf64_vsxregset(NoteFuncArg *arg, PowerPCCPU *cpu)
+
+static void ppc_write_elf_vsxregset(NoteFuncArg *arg, PowerPCCPU *cpu)
 {
     int i;
-    struct PPC64ElfVsxregset *vsxregset;
+    struct PPCElfVsxregset *vsxregset;
     Note *note = &arg->note;
     DumpState *s = arg->state;
 
@@ -179,9 +192,10 @@ static void ppc64_write_elf64_vsxregset(NoteFuncArg *arg, PowerPCCPU *cpu)
         vsxregset->vsr[i] = cpu_to_dump64(s, cpu->env.vsr[i]);
     }
 }
-static void ppc64_write_elf64_speregset(NoteFuncArg *arg, PowerPCCPU *cpu)
+
+static void ppc_write_elf_speregset(NoteFuncArg *arg, PowerPCCPU *cpu)
 {
-    struct PPC64ElfSperegset *speregset;
+    struct PPCElfSperegset *speregset;
     Note *note = &arg->note;
     DumpState *s = arg->state;
 
@@ -197,11 +211,11 @@ static const struct NoteFuncDescStruct {
     int contents_size;
     void (*note_contents_func)(NoteFuncArg *arg, PowerPCCPU *cpu);
 } note_func[] = {
-    {sizeof(((Note *)0)->contents.prstatus),  ppc64_write_elf64_prstatus},
-    {sizeof(((Note *)0)->contents.fpregset),  ppc64_write_elf64_fpregset},
-    {sizeof(((Note *)0)->contents.vmxregset), ppc64_write_elf64_vmxregset},
-    {sizeof(((Note *)0)->contents.vsxregset), ppc64_write_elf64_vsxregset},
-    {sizeof(((Note *)0)->contents.speregset), ppc64_write_elf64_speregset},
+    {sizeof(((Note *)0)->contents.prstatus),  ppc_write_elf_prstatus},
+    {sizeof(((Note *)0)->contents.fpregset),  ppc_write_elf_fpregset},
+    {sizeof(((Note *)0)->contents.vmxregset), ppc_write_elf_vmxregset},
+    {sizeof(((Note *)0)->contents.vsxregset), ppc_write_elf_vsxregset},
+    {sizeof(((Note *)0)->contents.speregset), ppc_write_elf_speregset},
     { 0, NULL}
 };
 
@@ -213,8 +227,9 @@ int cpu_get_dump_info(ArchDumpInfo *info,
     PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
     PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
 
-    info->d_machine = EM_PPC64;
-    info->d_class = ELFCLASS64;
+    info->d_machine = PPC_ELF_MACHINE;
+    info->d_class = ELFCLASS;
+
     if ((*pcc->interrupts_big_endian)(cpu)) {
         info->d_endian = ELFDATA2MSB;
     } else {
@@ -236,25 +251,19 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus)
     int note_head_size;
     const NoteFuncDesc *nf;
 
-    if (class != ELFCLASS64) {
-        return -1;
-    }
-    assert(machine == EM_PPC64);
-
-    note_head_size = sizeof(Elf64_Nhdr);
-
+    note_head_size = sizeof(Elf_Nhdr);
     for (nf = note_func; nf->note_contents_func; nf++) {
         elf_note_size = elf_note_size + note_head_size + name_size +
-                        nf->contents_size;
+            nf->contents_size;
     }
 
     return (elf_note_size) * nr_cpus;
 }
 
-static int ppc64_write_all_elf64_notes(const char *note_name,
-                                       WriteCoreDumpFunction f,
-                                       PowerPCCPU *cpu, int id,
-                                       void *opaque)
+static int ppc_write_all_elf_notes(const char *note_name,
+                                   WriteCoreDumpFunction f,
+                                   PowerPCCPU *cpu, int id,
+                                   void *opaque)
 {
     NoteFuncArg arg = { .state = opaque };
     int ret = -1;
@@ -282,5 +291,12 @@ int ppc64_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
                                int cpuid, void *opaque)
 {
     PowerPCCPU *cpu = POWERPC_CPU(cs);
-    return ppc64_write_all_elf64_notes("CORE", f, cpu, cpuid, opaque);
+    return ppc_write_all_elf_notes("CORE", f, cpu, cpuid, opaque);
+}
+
+int ppc32_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs,
+                               int cpuid, void *opaque)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    return ppc_write_all_elf_notes("CORE", f, cpu, cpuid, opaque);
 }
diff --git a/target/ppc/cpu-qom.h b/target/ppc/cpu-qom.h
index b7977bad18..4e3132b56b 100644
--- a/target/ppc/cpu-qom.h
+++ b/target/ppc/cpu-qom.h
@@ -86,10 +86,13 @@ enum powerpc_mmu_t {
     POWERPC_MMU_2_07       = POWERPC_MMU_64 | POWERPC_MMU_1TSEG
                              | POWERPC_MMU_64K
                              | POWERPC_MMU_AMR | 0x00000004,
-    /* FIXME Add POWERPC_MMU_3_OO defines */
     /* Architecture 2.07 "degraded" (no 1T segments)           */
     POWERPC_MMU_2_07a      = POWERPC_MMU_64 | POWERPC_MMU_AMR
                              | 0x00000004,
+    /* Architecture 3.00 variant                               */
+    POWERPC_MMU_3_00       = POWERPC_MMU_64 | POWERPC_MMU_1TSEG
+                             | POWERPC_MMU_64K
+                             | POWERPC_MMU_AMR | 0x00000005,
 };
 
 /*****************************************************************************/
diff --git a/target/ppc/cpu.c b/target/ppc/cpu.c
new file mode 100644
index 0000000000..28011668e7
--- /dev/null
+++ b/target/ppc/cpu.c
@@ -0,0 +1,47 @@
+/*
+ *  PowerPC CPU routines for qemu.
+ *
+ * Copyright (c) 2017 Nikunj A Dadhania, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "cpu-models.h"
+
+target_ulong cpu_read_xer(CPUPPCState *env)
+{
+    if (is_isa300(env)) {
+        return env->xer | (env->so << XER_SO) |
+            (env->ov << XER_OV) | (env->ca << XER_CA) |
+            (env->ov32 << XER_OV32) | (env->ca32 << XER_CA32);
+    }
+
+    return env->xer | (env->so << XER_SO) | (env->ov << XER_OV) |
+        (env->ca << XER_CA);
+}
+
+void cpu_write_xer(CPUPPCState *env, target_ulong xer)
+{
+    env->so = (xer >> XER_SO) & 1;
+    env->ov = (xer >> XER_OV) & 1;
+    env->ca = (xer >> XER_CA) & 1;
+    /* write all the flags, while reading back check of isa300 */
+    env->ov32 = (xer >> XER_OV32) & 1;
+    env->ca32 = (xer >> XER_CA32) & 1;
+    env->xer = xer & ~((1ul << XER_SO) |
+                       (1ul << XER_OV) | (1ul << XER_CA) |
+                       (1ul << XER_OV32) | (1ul << XER_CA32));
+}
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index bc2a2ce431..d33c17e646 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -223,11 +223,12 @@ enum {
 typedef struct opc_handler_t opc_handler_t;
 
 /*****************************************************************************/
-/* Types used to describe some PowerPC registers */
+/* Types used to describe some PowerPC registers etc. */
 typedef struct DisasContext DisasContext;
 typedef struct ppc_spr_t ppc_spr_t;
 typedef union ppc_avr_t ppc_avr_t;
 typedef union ppc_tlb_t ppc_tlb_t;
+typedef struct ppc_hash_pte64 ppc_hash_pte64_t;
 
 /* SPR access micro-ops generations callbacks */
 struct ppc_spr_t {
@@ -305,14 +306,6 @@ union ppc_tlb_t {
 #define TLB_MAS                3
 #endif
 
-#define SDR_32_HTABORG         0xFFFF0000UL
-#define SDR_32_HTABMASK        0x000001FFUL
-
-#if defined(TARGET_PPC64)
-#define SDR_64_HTABORG         0xFFFFFFFFFFFC0000ULL
-#define SDR_64_HTABSIZE        0x000000000000001FULL
-#endif /* defined(TARGET_PPC64 */
-
 typedef struct ppc_slb_t ppc_slb_t;
 struct ppc_slb_t {
     uint64_t esid;
@@ -381,15 +374,22 @@ struct ppc_slb_t {
 #define LPCR_ISL          (1ull << (63 - 2))
 #define LPCR_KBV          (1ull << (63 - 3))
 #define LPCR_DPFD_SHIFT   (63 - 11)
-#define LPCR_DPFD         (0x3ull << LPCR_DPFD_SHIFT)
+#define LPCR_DPFD         (0x7ull << LPCR_DPFD_SHIFT)
 #define LPCR_VRMASD_SHIFT (63 - 16)
 #define LPCR_VRMASD       (0x1full << LPCR_VRMASD_SHIFT)
+/* P9: Power-saving mode Exit Cause Enable (Upper Section) Mask */
+#define LPCR_PECE_U_SHIFT (63 - 19)
+#define LPCR_PECE_U_MASK  (0x7ull << LPCR_PECE_U_SHIFT)
+#define LPCR_HVEE         (1ull << (63 - 17)) /* Hypervisor Virt Exit Enable */
 #define LPCR_RMLS_SHIFT   (63 - 37)
 #define LPCR_RMLS         (0xfull << LPCR_RMLS_SHIFT)
 #define LPCR_ILE          (1ull << (63 - 38))
 #define LPCR_AIL_SHIFT    (63 - 40)      /* Alternate interrupt location */
 #define LPCR_AIL          (3ull << LPCR_AIL_SHIFT)
+#define LPCR_UPRT         (1ull << (63 - 41)) /* Use Process Table */
+#define LPCR_EVIRT        (1ull << (63 - 42)) /* Enhanced Virtualisation */
 #define LPCR_ONL          (1ull << (63 - 45))
+#define LPCR_LD           (1ull << (63 - 46)) /* Large Decrementer */
 #define LPCR_P7_PECE0     (1ull << (63 - 49))
 #define LPCR_P7_PECE1     (1ull << (63 - 50))
 #define LPCR_P7_PECE2     (1ull << (63 - 51))
@@ -398,11 +398,22 @@ struct ppc_slb_t {
 #define LPCR_P8_PECE2     (1ull << (63 - 49))
 #define LPCR_P8_PECE3     (1ull << (63 - 50))
 #define LPCR_P8_PECE4     (1ull << (63 - 51))
+/* P9: Power-saving mode Exit Cause Enable (Lower Section) Mask */
+#define LPCR_PECE_L_SHIFT (63 - 51)
+#define LPCR_PECE_L_MASK  (0x1full << LPCR_PECE_L_SHIFT)
+#define LPCR_PDEE         (1ull << (63 - 47)) /* Privileged Doorbell Exit EN */
+#define LPCR_HDEE         (1ull << (63 - 48)) /* Hyperv Doorbell Exit Enable */
+#define LPCR_EEE          (1ull << (63 - 49)) /* External Exit Enable        */
+#define LPCR_DEE          (1ull << (63 - 50)) /* Decrementer Exit Enable     */
+#define LPCR_OEE          (1ull << (63 - 51)) /* Other Exit Enable           */
 #define LPCR_MER          (1ull << (63 - 52))
+#define LPCR_GTSE         (1ull << (63 - 53)) /* Guest Translation Shootdown */
 #define LPCR_TC           (1ull << (63 - 54))
+#define LPCR_HEIC         (1ull << (63 - 59)) /* HV Extern Interrupt Control */
 #define LPCR_LPES0        (1ull << (63 - 60))
 #define LPCR_LPES1        (1ull << (63 - 61))
 #define LPCR_RMI          (1ull << (63 - 62))
+#define LPCR_HVICE        (1ull << (63 - 62)) /* HV Virtualisation Int Enable */
 #define LPCR_HDICE        (1ull << (63 - 63))
 
 #define msr_sf   ((env->msr >> MSR_SF)   & 1)
@@ -947,6 +958,8 @@ struct CPUPPCState {
     target_ulong so;
     target_ulong ov;
     target_ulong ca;
+    target_ulong ov32;
+    target_ulong ca32;
     /* Reservation address */
     target_ulong reserve_addr;
     /* Reservation value */
@@ -987,12 +1000,7 @@ struct CPUPPCState {
     /* tcg TLB needs flush (deferred slb inval instruction typically) */
 #endif
     /* segment registers */
-    hwaddr htab_base;
-    /* mask used to normalize hash value to PTEG index */
-    hwaddr htab_mask;
     target_ulong sr[32];
-    /* externally stored hash table */
-    uint8_t *external_htab;
     /* BATs */
     uint32_t nb_BATs;
     target_ulong DBAT[2][8];
@@ -1200,6 +1208,14 @@ struct PPCVirtualHypervisor {
 struct PPCVirtualHypervisorClass {
     InterfaceClass parent;
     void (*hypercall)(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu);
+    hwaddr (*hpt_mask)(PPCVirtualHypervisor *vhyp);
+    const ppc_hash_pte64_t *(*map_hptes)(PPCVirtualHypervisor *vhyp,
+                                         hwaddr ptex, int n);
+    void (*unmap_hptes)(PPCVirtualHypervisor *vhyp,
+                        const ppc_hash_pte64_t *hptes,
+                        hwaddr ptex, int n);
+    void (*store_hpte)(PPCVirtualHypervisor *vhyp, hwaddr ptex,
+                       uint64_t pte0, uint64_t pte1);
 };
 
 #define TYPE_PPC_VIRTUAL_HYPERVISOR "ppc-virtual-hypervisor"
@@ -1225,6 +1241,8 @@ int ppc_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
 int ppc_cpu_gdb_write_register_apple(CPUState *cpu, uint8_t *buf, int reg);
 int ppc64_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
                                int cpuid, void *opaque);
+int ppc32_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs,
+                               int cpuid, void *opaque);
 #ifndef CONFIG_USER_ONLY
 void ppc_cpu_do_system_reset(CPUState *cs);
 extern const struct VMStateDescription vmstate_ppc_cpu;
@@ -1282,8 +1300,7 @@ void store_booke_tcr (CPUPPCState *env, target_ulong val);
 void store_booke_tsr (CPUPPCState *env, target_ulong val);
 void ppc_tlb_invalidate_all (CPUPPCState *env);
 void ppc_tlb_invalidate_one (CPUPPCState *env, target_ulong addr);
-void cpu_ppc_set_vhyp(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp);
-void cpu_ppc_set_papr(PowerPCCPU *cpu);
+void cpu_ppc_set_papr(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp);
 #endif
 #endif
 
@@ -1354,11 +1371,15 @@ int ppc_compat_max_threads(PowerPCCPU *cpu);
 #define XER_SO  31
 #define XER_OV  30
 #define XER_CA  29
+#define XER_OV32  19
+#define XER_CA32  18
 #define XER_CMP  8
 #define XER_BC   0
 #define xer_so  (env->so)
 #define xer_ov  (env->ov)
 #define xer_ca  (env->ca)
+#define xer_ov32  (env->ov)
+#define xer_ca32  (env->ca)
 #define xer_cmp ((env->xer >> XER_CMP) & 0xFF)
 #define xer_bc  ((env->xer >> XER_BC)  & 0x7F)
 
@@ -2325,18 +2346,9 @@ enum {
 
 /*****************************************************************************/
 
-static inline target_ulong cpu_read_xer(CPUPPCState *env)
-{
-    return env->xer | (env->so << XER_SO) | (env->ov << XER_OV) | (env->ca << XER_CA);
-}
-
-static inline void cpu_write_xer(CPUPPCState *env, target_ulong xer)
-{
-    env->so = (xer >> XER_SO) & 1;
-    env->ov = (xer >> XER_OV) & 1;
-    env->ca = (xer >> XER_CA) & 1;
-    env->xer = xer & ~((1u << XER_SO) | (1u << XER_OV) | (1u << XER_CA));
-}
+#define is_isa300(ctx) (!!(ctx->insns_flags2 & PPC2_ISA300))
+target_ulong cpu_read_xer(CPUPPCState *env);
+void cpu_write_xer(CPUPPCState *env, target_ulong xer);
 
 static inline void cpu_get_tb_cpu_state(CPUPPCState *env, target_ulong *pc,
                                         target_ulong *cs_base, uint32_t *flags)
diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index 9f5cafd5ba..58aee640c3 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -1850,12 +1850,11 @@ void helper_xsaddqp(CPUPPCState *env, uint32_t opcode)
     getVSR(rD(opcode) + 32, &xt, env);
     helper_reset_fpstatus(env);
 
+    tstat = env->fp_status;
     if (unlikely(Rc(opcode) != 0)) {
-        /* TODO: Support xsadddpo after round-to-odd is implemented */
-        abort();
+        tstat.float_rounding_mode = float_round_to_odd;
     }
 
-    tstat = env->fp_status;
     set_float_exception_flags(0, &tstat);
     xt.f128 = float128_add(xa.f128, xb.f128, &tstat);
     env->fp_status.float_exception_flags |= tstat.float_exception_flags;
@@ -1930,19 +1929,18 @@ VSX_MUL(xvmulsp, 4, float32, VsrW(i), 0, 0)
 void helper_xsmulqp(CPUPPCState *env, uint32_t opcode)
 {
     ppc_vsr_t xt, xa, xb;
+    float_status tstat;
 
     getVSR(rA(opcode) + 32, &xa, env);
     getVSR(rB(opcode) + 32, &xb, env);
     getVSR(rD(opcode) + 32, &xt, env);
 
+    helper_reset_fpstatus(env);
+    tstat = env->fp_status;
     if (unlikely(Rc(opcode) != 0)) {
-        /* TODO: Support xsmulpo after round-to-odd is implemented */
-        abort();
+        tstat.float_rounding_mode = float_round_to_odd;
     }
 
-    helper_reset_fpstatus(env);
-
-    float_status tstat = env->fp_status;
     set_float_exception_flags(0, &tstat);
     xt.f128 = float128_mul(xa.f128, xb.f128, &tstat);
     env->fp_status.float_exception_flags |= tstat.float_exception_flags;
@@ -2019,18 +2017,18 @@ VSX_DIV(xvdivsp, 4, float32, VsrW(i), 0, 0)
 void helper_xsdivqp(CPUPPCState *env, uint32_t opcode)
 {
     ppc_vsr_t xt, xa, xb;
+    float_status tstat;
 
     getVSR(rA(opcode) + 32, &xa, env);
     getVSR(rB(opcode) + 32, &xb, env);
     getVSR(rD(opcode) + 32, &xt, env);
 
+    helper_reset_fpstatus(env);
+    tstat = env->fp_status;
     if (unlikely(Rc(opcode) != 0)) {
-        /* TODO: Support xsdivqpo after round-to-odd is implemented */
-        abort();
+        tstat.float_rounding_mode = float_round_to_odd;
     }
 
-    helper_reset_fpstatus(env);
-    float_status tstat = env->fp_status;
     set_float_exception_flags(0, &tstat);
     xt.f128 = float128_div(xa.f128, xb.f128, &tstat);
     env->fp_status.float_exception_flags |= tstat.float_exception_flags;
@@ -2679,6 +2677,99 @@ VSX_MAX_MIN(xsmindp, minnum, 1, float64, VsrD(0))
 VSX_MAX_MIN(xvmindp, minnum, 2, float64, VsrD(i))
 VSX_MAX_MIN(xvminsp, minnum, 4, float32, VsrW(i))
 
+#define VSX_MAX_MINC(name, max)                                               \
+void helper_##name(CPUPPCState *env, uint32_t opcode)                         \
+{                                                                             \
+    ppc_vsr_t xt, xa, xb;                                                     \
+    bool vxsnan_flag = false, vex_flag = false;                               \
+                                                                              \
+    getVSR(rA(opcode) + 32, &xa, env);                                        \
+    getVSR(rB(opcode) + 32, &xb, env);                                        \
+    getVSR(rD(opcode) + 32, &xt, env);                                        \
+                                                                              \
+    if (unlikely(float64_is_any_nan(xa.VsrD(0)) ||                            \
+                 float64_is_any_nan(xb.VsrD(0)))) {                           \
+        if (float64_is_signaling_nan(xa.VsrD(0), &env->fp_status) ||          \
+            float64_is_signaling_nan(xb.VsrD(0), &env->fp_status)) {          \
+            vxsnan_flag = true;                                               \
+        }                                                                     \
+        xt.VsrD(0) = xb.VsrD(0);                                              \
+    } else if ((max &&                                                        \
+               !float64_lt(xa.VsrD(0), xb.VsrD(0), &env->fp_status)) ||       \
+               (!max &&                                                       \
+               float64_lt(xa.VsrD(0), xb.VsrD(0), &env->fp_status))) {        \
+        xt.VsrD(0) = xa.VsrD(0);                                              \
+    } else {                                                                  \
+        xt.VsrD(0) = xb.VsrD(0);                                              \
+    }                                                                         \
+                                                                              \
+    vex_flag = fpscr_ve & vxsnan_flag;                                        \
+    if (vxsnan_flag) {                                                        \
+            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0);            \
+    }                                                                         \
+    if (!vex_flag) {                                                          \
+        putVSR(rD(opcode) + 32, &xt, env);                                    \
+    }                                                                         \
+}                                                                             \
+
+VSX_MAX_MINC(xsmaxcdp, 1);
+VSX_MAX_MINC(xsmincdp, 0);
+
+#define VSX_MAX_MINJ(name, max)                                               \
+void helper_##name(CPUPPCState *env, uint32_t opcode)                         \
+{                                                                             \
+    ppc_vsr_t xt, xa, xb;                                                     \
+    bool vxsnan_flag = false, vex_flag = false;                               \
+                                                                              \
+    getVSR(rA(opcode) + 32, &xa, env);                                        \
+    getVSR(rB(opcode) + 32, &xb, env);                                        \
+    getVSR(rD(opcode) + 32, &xt, env);                                        \
+                                                                              \
+    if (unlikely(float64_is_any_nan(xa.VsrD(0)))) {                           \
+        if (float64_is_signaling_nan(xa.VsrD(0), &env->fp_status)) {          \
+            vxsnan_flag = true;                                               \
+        }                                                                     \
+        xt.VsrD(0) = xa.VsrD(0);                                              \
+    } else if (unlikely(float64_is_any_nan(xb.VsrD(0)))) {                    \
+        if (float64_is_signaling_nan(xb.VsrD(0), &env->fp_status)) {          \
+            vxsnan_flag = true;                                               \
+        }                                                                     \
+        xt.VsrD(0) = xb.VsrD(0);                                              \
+    } else if (float64_is_zero(xa.VsrD(0)) && float64_is_zero(xb.VsrD(0))) {  \
+        if (max) {                                                            \
+            if (!float64_is_neg(xa.VsrD(0)) || !float64_is_neg(xb.VsrD(0))) { \
+                xt.VsrD(0) = 0ULL;                                            \
+            } else {                                                          \
+                xt.VsrD(0) = 0x8000000000000000ULL;                           \
+            }                                                                 \
+        } else {                                                              \
+            if (float64_is_neg(xa.VsrD(0)) || float64_is_neg(xb.VsrD(0))) {   \
+                xt.VsrD(0) = 0x8000000000000000ULL;                           \
+            } else {                                                          \
+                xt.VsrD(0) = 0ULL;                                            \
+            }                                                                 \
+        }                                                                     \
+    } else if ((max &&                                                        \
+               !float64_lt(xa.VsrD(0), xb.VsrD(0), &env->fp_status)) ||       \
+               (!max &&                                                       \
+               float64_lt(xa.VsrD(0), xb.VsrD(0), &env->fp_status))) {        \
+        xt.VsrD(0) = xa.VsrD(0);                                              \
+    } else {                                                                  \
+        xt.VsrD(0) = xb.VsrD(0);                                              \
+    }                                                                         \
+                                                                              \
+    vex_flag = fpscr_ve & vxsnan_flag;                                        \
+    if (vxsnan_flag) {                                                        \
+            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0);            \
+    }                                                                         \
+    if (!vex_flag) {                                                          \
+        putVSR(rD(opcode) + 32, &xt, env);                                    \
+    }                                                                         \
+}                                                                             \
+
+VSX_MAX_MINJ(xsmaxjdp, 1);
+VSX_MAX_MINJ(xsminjdp, 0);
+
 /* VSX_CMP - VSX floating point compare
  *   op    - instruction mnemonic
  *   nels  - number of elements (1, 2 or 4)
@@ -2861,18 +2952,20 @@ VSX_CVT_FP_TO_FP_HP(xvcvhpsp, 4, float16, float32, VsrH(2 * i + 1), VsrW(i), 0)
 void helper_xscvqpdp(CPUPPCState *env, uint32_t opcode)
 {
     ppc_vsr_t xt, xb;
+    float_status tstat;
 
     getVSR(rB(opcode) + 32, &xb, env);
     memset(&xt, 0, sizeof(xt));
 
+    tstat = env->fp_status;
     if (unlikely(Rc(opcode) != 0)) {
-        /* TODO: Support xscvqpdpo after round-to-odd is implemented */
-        abort();
+        tstat.float_rounding_mode = float_round_to_odd;
     }
 
-    xt.VsrD(0) = float128_to_float64(xb.f128, &env->fp_status);
+    xt.VsrD(0) = float128_to_float64(xb.f128, &tstat);
+    env->fp_status.float_exception_flags |= tstat.float_exception_flags;
     if (unlikely(float128_is_signaling_nan(xb.f128,
-                                           &env->fp_status))) {
+                                           &tstat))) {
         float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0);
         xt.VsrD(0) = float64_snan_to_qnan(xt.VsrD(0));
     }
@@ -2993,6 +3086,8 @@ VSX_CVT_FP_TO_INT_VECTOR(xscvqpsdz, float128, int64, f128, VsrD(0),          \
 
 VSX_CVT_FP_TO_INT_VECTOR(xscvqpswz, float128, int32, f128, VsrD(0),          \
                   0xffffffff80000000ULL)
+VSX_CVT_FP_TO_INT_VECTOR(xscvqpudz, float128, uint64, f128, VsrD(0), 0x0ULL)
+VSX_CVT_FP_TO_INT_VECTOR(xscvqpuwz, float128, uint32, f128, VsrD(0), 0x0ULL)
 
 /* VSX_CVT_INT_TO_FP - VSX integer to floating point conversion
  *   op    - instruction mnemonic
@@ -3277,3 +3372,188 @@ void helper_xststdcsp(CPUPPCState *env, uint32_t opcode)
     env->fpscr |= cc << FPSCR_FPRF;
     env->crf[BF(opcode)] = cc;
 }
+
+void helper_xsrqpi(CPUPPCState *env, uint32_t opcode)
+{
+    ppc_vsr_t xb;
+    ppc_vsr_t xt;
+    uint8_t r = Rrm(opcode);
+    uint8_t ex = Rc(opcode);
+    uint8_t rmc = RMC(opcode);
+    uint8_t rmode = 0;
+    float_status tstat;
+
+    getVSR(rB(opcode) + 32, &xb, env);
+    memset(&xt, 0, sizeof(xt));
+    helper_reset_fpstatus(env);
+
+    if (r == 0 && rmc == 0) {
+        rmode = float_round_ties_away;
+    } else if (r == 0 && rmc == 0x3) {
+        rmode = fpscr_rn;
+    } else if (r == 1) {
+        switch (rmc) {
+        case 0:
+            rmode = float_round_nearest_even;
+            break;
+        case 1:
+            rmode = float_round_to_zero;
+            break;
+        case 2:
+            rmode = float_round_up;
+            break;
+        case 3:
+            rmode = float_round_down;
+            break;
+        default:
+            abort();
+        }
+    }
+
+    tstat = env->fp_status;
+    set_float_exception_flags(0, &tstat);
+    set_float_rounding_mode(rmode, &tstat);
+    xt.f128 = float128_round_to_int(xb.f128, &tstat);
+    env->fp_status.float_exception_flags |= tstat.float_exception_flags;
+
+    if (unlikely(tstat.float_exception_flags & float_flag_invalid)) {
+        if (float128_is_signaling_nan(xb.f128, &tstat)) {
+            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0);
+            xt.f128 = float128_snan_to_qnan(xt.f128);
+        }
+    }
+
+    if (ex == 0 && (tstat.float_exception_flags & float_flag_inexact)) {
+        env->fp_status.float_exception_flags &= ~float_flag_inexact;
+    }
+
+    helper_compute_fprf_float128(env, xt.f128);
+    float_check_status(env);
+    putVSR(rD(opcode) + 32, &xt, env);
+}
+
+void helper_xsrqpxp(CPUPPCState *env, uint32_t opcode)
+{
+    ppc_vsr_t xb;
+    ppc_vsr_t xt;
+    uint8_t r = Rrm(opcode);
+    uint8_t rmc = RMC(opcode);
+    uint8_t rmode = 0;
+    floatx80 round_res;
+    float_status tstat;
+
+    getVSR(rB(opcode) + 32, &xb, env);
+    memset(&xt, 0, sizeof(xt));
+    helper_reset_fpstatus(env);
+
+    if (r == 0 && rmc == 0) {
+        rmode = float_round_ties_away;
+    } else if (r == 0 && rmc == 0x3) {
+        rmode = fpscr_rn;
+    } else if (r == 1) {
+        switch (rmc) {
+        case 0:
+            rmode = float_round_nearest_even;
+            break;
+        case 1:
+            rmode = float_round_to_zero;
+            break;
+        case 2:
+            rmode = float_round_up;
+            break;
+        case 3:
+            rmode = float_round_down;
+            break;
+        default:
+            abort();
+        }
+    }
+
+    tstat = env->fp_status;
+    set_float_exception_flags(0, &tstat);
+    set_float_rounding_mode(rmode, &tstat);
+    round_res = float128_to_floatx80(xb.f128, &tstat);
+    xt.f128 = floatx80_to_float128(round_res, &tstat);
+    env->fp_status.float_exception_flags |= tstat.float_exception_flags;
+
+    if (unlikely(tstat.float_exception_flags & float_flag_invalid)) {
+        if (float128_is_signaling_nan(xb.f128, &tstat)) {
+            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0);
+            xt.f128 = float128_snan_to_qnan(xt.f128);
+        }
+    }
+
+    helper_compute_fprf_float128(env, xt.f128);
+    putVSR(rD(opcode) + 32, &xt, env);
+    float_check_status(env);
+}
+
+void helper_xssqrtqp(CPUPPCState *env, uint32_t opcode)
+{
+    ppc_vsr_t xb;
+    ppc_vsr_t xt;
+    float_status tstat;
+
+    getVSR(rB(opcode) + 32, &xb, env);
+    memset(&xt, 0, sizeof(xt));
+    helper_reset_fpstatus(env);
+
+    tstat = env->fp_status;
+    if (unlikely(Rc(opcode) != 0)) {
+        tstat.float_rounding_mode = float_round_to_odd;
+    }
+
+    set_float_exception_flags(0, &tstat);
+    xt.f128 = float128_sqrt(xb.f128, &tstat);
+    env->fp_status.float_exception_flags |= tstat.float_exception_flags;
+
+    if (unlikely(tstat.float_exception_flags & float_flag_invalid)) {
+        if (float128_is_signaling_nan(xb.f128, &tstat)) {
+            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
+            xt.f128 = float128_snan_to_qnan(xb.f128);
+        } else if  (float128_is_quiet_nan(xb.f128, &tstat)) {
+            xt.f128 = xb.f128;
+        } else if (float128_is_neg(xb.f128) && !float128_is_zero(xb.f128)) {
+            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, 1);
+            set_snan_bit_is_one(0, &env->fp_status);
+            xt.f128 = float128_default_nan(&env->fp_status);
+        }
+    }
+
+    helper_compute_fprf_float128(env, xt.f128);
+    putVSR(rD(opcode) + 32, &xt, env);
+    float_check_status(env);
+}
+
+void helper_xssubqp(CPUPPCState *env, uint32_t opcode)
+{
+    ppc_vsr_t xt, xa, xb;
+    float_status tstat;
+
+    getVSR(rA(opcode) + 32, &xa, env);
+    getVSR(rB(opcode) + 32, &xb, env);
+    getVSR(rD(opcode) + 32, &xt, env);
+    helper_reset_fpstatus(env);
+
+    tstat = env->fp_status;
+    if (unlikely(Rc(opcode) != 0)) {
+        tstat.float_rounding_mode = float_round_to_odd;
+    }
+
+    set_float_exception_flags(0, &tstat);
+    xt.f128 = float128_sub(xa.f128, xb.f128, &tstat);
+    env->fp_status.float_exception_flags |= tstat.float_exception_flags;
+
+    if (unlikely(tstat.float_exception_flags & float_flag_invalid)) {
+        if (float128_is_infinity(xa.f128) && float128_is_infinity(xb.f128)) {
+            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
+        } else if (float128_is_signaling_nan(xa.f128, &tstat) ||
+                   float128_is_signaling_nan(xb.f128, &tstat)) {
+            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
+        }
+    }
+
+    helper_compute_fprf_float128(env, xt.f128);
+    putVSR(rD(opcode) + 32, &xt, env);
+    float_check_status(env);
+}
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 85af9df36d..6d77661f7c 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -431,6 +431,10 @@ DEF_HELPER_2(xscmpoqp, void, env, i32)
 DEF_HELPER_2(xscmpuqp, void, env, i32)
 DEF_HELPER_2(xsmaxdp, void, env, i32)
 DEF_HELPER_2(xsmindp, void, env, i32)
+DEF_HELPER_2(xsmaxcdp, void, env, i32)
+DEF_HELPER_2(xsmincdp, void, env, i32)
+DEF_HELPER_2(xsmaxjdp, void, env, i32)
+DEF_HELPER_2(xsminjdp, void, env, i32)
 DEF_HELPER_2(xscvdphp, void, env, i32)
 DEF_HELPER_2(xscvdpqp, void, env, i32)
 DEF_HELPER_2(xscvdpsp, void, env, i32)
@@ -438,6 +442,8 @@ DEF_HELPER_2(xscvdpspn, i64, env, i64)
 DEF_HELPER_2(xscvqpdp, void, env, i32)
 DEF_HELPER_2(xscvqpsdz, void, env, i32)
 DEF_HELPER_2(xscvqpswz, void, env, i32)
+DEF_HELPER_2(xscvqpudz, void, env, i32)
+DEF_HELPER_2(xscvqpuwz, void, env, i32)
 DEF_HELPER_2(xscvhpdp, void, env, i32)
 DEF_HELPER_2(xscvsdqp, void, env, i32)
 DEF_HELPER_2(xscvspdp, void, env, i32)
@@ -459,6 +465,10 @@ DEF_HELPER_2(xsrdpic, void, env, i32)
 DEF_HELPER_2(xsrdpim, void, env, i32)
 DEF_HELPER_2(xsrdpip, void, env, i32)
 DEF_HELPER_2(xsrdpiz, void, env, i32)
+DEF_HELPER_2(xsrqpi, void, env, i32)
+DEF_HELPER_2(xsrqpxp, void, env, i32)
+DEF_HELPER_2(xssqrtqp, void, env, i32)
+DEF_HELPER_2(xssubqp, void, env, i32)
 
 DEF_HELPER_2(xsaddsp, void, env, i32)
 DEF_HELPER_2(xssubsp, void, env, i32)
@@ -661,6 +671,7 @@ DEF_HELPER_2(load_slb_vsid, tl, env, tl)
 DEF_HELPER_2(find_slb_vsid, tl, env, tl)
 DEF_HELPER_FLAGS_1(slbia, TCG_CALL_NO_RWG, void, env)
 DEF_HELPER_FLAGS_2(slbie, TCG_CALL_NO_RWG, void, env, tl)
+DEF_HELPER_FLAGS_2(slbieg, TCG_CALL_NO_RWG, void, env, tl)
 #endif
 DEF_HELPER_FLAGS_2(load_sr, TCG_CALL_NO_RWG, tl, env, tl)
 DEF_HELPER_FLAGS_3(store_sr, TCG_CALL_NO_RWG, void, env, tl, tl)
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index dd0a8929b3..da4e1a62c9 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -28,6 +28,15 @@
 /*****************************************************************************/
 /* Fixed point operations helpers */
 
+static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
+{
+    if (unlikely(ov)) {
+        env->so = env->ov = 1;
+    } else {
+        env->ov = 0;
+    }
+}
+
 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
                            uint32_t oe)
 {
@@ -49,11 +58,7 @@ target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
     }
 
     if (oe) {
-        if (unlikely(overflow)) {
-            env->so = env->ov = 1;
-        } else {
-            env->ov = 0;
-        }
+        helper_update_ov_legacy(env, overflow);
     }
 
     return (target_ulong)rt;
@@ -81,11 +86,7 @@ target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
     }
 
     if (oe) {
-        if (unlikely(overflow)) {
-            env->so = env->ov = 1;
-        } else {
-            env->ov = 0;
-        }
+        helper_update_ov_legacy(env, overflow);
     }
 
     return (target_ulong)rt;
@@ -105,11 +106,7 @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
     }
 
     if (oe) {
-        if (unlikely(overflow)) {
-            env->so = env->ov = 1;
-        } else {
-            env->ov = 0;
-        }
+        helper_update_ov_legacy(env, overflow);
     }
 
     return rt;
@@ -127,12 +124,7 @@ uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
     }
 
     if (oe) {
-
-        if (unlikely(overflow)) {
-            env->so = env->ov = 1;
-        } else {
-            env->ov = 0;
-        }
+        helper_update_ov_legacy(env, overflow);
     }
 
     return rt;
diff --git a/target/ppc/internal.h b/target/ppc/internal.h
index 5a2fd68427..1f441c6483 100644
--- a/target/ppc/internal.h
+++ b/target/ppc/internal.h
@@ -133,6 +133,8 @@ EXTRACT_HELPER(UIMM4, 16, 4);
 EXTRACT_HELPER(NB, 11, 5);
 /* Shift count */
 EXTRACT_HELPER(SH, 11, 5);
+/* lwat/stwat/ldat/lwat */
+EXTRACT_HELPER(FC, 11, 5);
 /* Vector shift count */
 EXTRACT_HELPER(VSH, 6, 4);
 /* Mask start */
@@ -186,6 +188,7 @@ EXTRACT_HELPER(DCM, 10, 6)
 
 /* DFP Z23-form */
 EXTRACT_HELPER(RMC, 9, 2)
+EXTRACT_HELPER(Rrm, 16, 1)
 
 EXTRACT_HELPER_SPLIT(DQxT, 3, 1, 21, 5);
 EXTRACT_HELPER_SPLIT(xT, 0, 1, 21, 5);
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 663d2e79c9..acc40ece65 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -438,12 +438,13 @@ static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
     return (1ul << shift) <= rampgsize;
 }
 
+static long max_cpu_page_size;
+
 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 {
     static struct kvm_ppc_smmu_info smmu_info;
     static bool has_smmu_info;
     CPUPPCState *env = &cpu->env;
-    long rampagesize;
     int iq, ik, jq, jk;
     bool has_64k_pages = false;
 
@@ -458,7 +459,9 @@ static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
         has_smmu_info = true;
     }
 
-    rampagesize = getrampagesize();
+    if (!max_cpu_page_size) {
+        max_cpu_page_size = getrampagesize();
+    }
 
     /* Convert to QEMU form */
     memset(&env->sps, 0, sizeof(env->sps));
@@ -478,14 +481,14 @@ static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 
-        if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
+        if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
                                  ksps->page_shift)) {
             continue;
         }
         qsps->page_shift = ksps->page_shift;
         qsps->slb_enc = ksps->slb_enc;
         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
-            if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
+            if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
                                      ksps->enc[jk].page_shift)) {
                 continue;
             }
@@ -510,12 +513,33 @@ static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
         env->mmu_model &= ~POWERPC_MMU_64K;
     }
 }
+
+bool kvmppc_is_mem_backend_page_size_ok(char *obj_path)
+{
+    Object *mem_obj = object_resolve_path(obj_path, NULL);
+    char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
+    long pagesize;
+
+    if (mempath) {
+        pagesize = gethugepagesize(mempath);
+    } else {
+        pagesize = getpagesize();
+    }
+
+    return pagesize >= max_cpu_page_size;
+}
+
 #else /* defined (TARGET_PPC64) */
 
 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 {
 }
 
+bool kvmppc_is_mem_backend_page_size_ok(char *obj_path)
+{
+    return true;
+}
+
 #endif /* !defined (TARGET_PPC64) */
 
 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
@@ -1227,7 +1251,7 @@ static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
         return ret;
     }
 
-    if (!env->external_htab) {
+    if (!cpu->vhyp) {
         ppc_store_sdr1(env, sregs.u.s.sdr1);
     }
 
@@ -2572,89 +2596,85 @@ void kvm_arch_init_irq_routing(KVMState *s)
 {
 }
 
-struct kvm_get_htab_buf {
-    struct kvm_get_htab_header header;
-    /*
-     * We require one extra byte for read
-     */
-    target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
-};
-
-uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
+void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
 {
-    int htab_fd;
-    struct kvm_get_htab_fd ghf;
-    struct kvm_get_htab_buf  *hpte_buf;
+    struct kvm_get_htab_fd ghf = {
+        .flags = 0,
+        .start_index = ptex,
+    };
+    int fd, rc;
+    int i;
 
-    ghf.flags = 0;
-    ghf.start_index = pte_index;
-    htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
-    if (htab_fd < 0) {
-        goto error_out;
+    fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
+    if (fd < 0) {
+        hw_error("kvmppc_read_hptes: Unable to open HPT fd");
     }
 
-    hpte_buf = g_malloc0(sizeof(*hpte_buf));
-    /*
-     * Read the hpte group
-     */
-    if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
-        goto out_close;
-    }
+    i = 0;
+    while (i < n) {
+        struct kvm_get_htab_header *hdr;
+        int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
+        char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
 
-    close(htab_fd);
-    return (uint64_t)(uintptr_t) hpte_buf->hpte;
+        rc = read(fd, buf, sizeof(buf));
+        if (rc < 0) {
+            hw_error("kvmppc_read_hptes: Unable to read HPTEs");
+        }
 
-out_close:
-    g_free(hpte_buf);
-    close(htab_fd);
-error_out:
-    return 0;
-}
+        hdr = (struct kvm_get_htab_header *)buf;
+        while ((i < n) && ((char *)hdr < (buf + rc))) {
+            int invalid = hdr->n_invalid;
 
-void kvmppc_hash64_free_pteg(uint64_t token)
-{
-    struct kvm_get_htab_buf *htab_buf;
+            if (hdr->index != (ptex + i)) {
+                hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
+                         " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
+            }
+
+            memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid);
+            i += hdr->n_valid;
 
-    htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
-                            hpte);
-    g_free(htab_buf);
-    return;
+            if ((n - i) < invalid) {
+                invalid = n - i;
+            }
+            memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
+            i += hdr->n_invalid;
+
+            hdr = (struct kvm_get_htab_header *)
+                ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
+        }
+    }
+
+    close(fd);
 }
 
-void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
-                             target_ulong pte0, target_ulong pte1)
+void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
 {
-    int htab_fd;
+    int fd, rc;
     struct kvm_get_htab_fd ghf;
-    struct kvm_get_htab_buf hpte_buf;
+    struct {
+        struct kvm_get_htab_header hdr;
+        uint64_t pte0;
+        uint64_t pte1;
+    } buf;
 
     ghf.flags = 0;
     ghf.start_index = 0;     /* Ignored */
-    htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
-    if (htab_fd < 0) {
-        goto error_out;
-    }
-
-    hpte_buf.header.n_valid = 1;
-    hpte_buf.header.n_invalid = 0;
-    hpte_buf.header.index = pte_index;
-    hpte_buf.hpte[0] = pte0;
-    hpte_buf.hpte[1] = pte1;
-    /*
-     * Write the hpte entry.
-     * CAUTION: write() has the warn_unused_result attribute. Hence we
-     * need to check the return value, even though we do nothing.
-     */
-    if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
-        goto out_close;
+    fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
+    if (fd < 0) {
+        hw_error("kvmppc_write_hpte: Unable to open HPT fd");
     }
 
-out_close:
-    close(htab_fd);
-    return;
+    buf.hdr.n_valid = 1;
+    buf.hdr.n_invalid = 0;
+    buf.hdr.index = ptex;
+    buf.pte0 = cpu_to_be64(pte0);
+    buf.pte1 = cpu_to_be64(pte1);
 
-error_out:
-    return;
+    rc = write(fd, &buf, sizeof(buf));
+    if (rc != sizeof(buf)) {
+        hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
+    }
+    close(fd);
 }
 
 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index 151c00bac7..8e9f42d0c6 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -49,17 +49,16 @@ int kvmppc_get_htab_fd(bool write);
 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns);
 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
                            uint16_t n_valid, uint16_t n_invalid);
-uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index);
-void kvmppc_hash64_free_pteg(uint64_t token);
-
-void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
-                             target_ulong pte0, target_ulong pte1);
+void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n);
+void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1);
 bool kvmppc_has_cap_fixup_hcalls(void);
 bool kvmppc_has_cap_htm(void);
 int kvmppc_enable_hwrng(void);
 int kvmppc_put_books_sregs(PowerPCCPU *cpu);
 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void);
 
+bool kvmppc_is_mem_backend_page_size_ok(char *obj_path);
+
 #else
 
 static inline uint32_t kvmppc_get_tbfreq(void)
@@ -192,6 +191,11 @@ static inline uint64_t kvmppc_rma_size(uint64_t current_size,
     return ram_size;
 }
 
+static inline bool kvmppc_is_mem_backend_page_size_ok(char *obj_path)
+{
+    return true;
+}
+
 #endif /* !CONFIG_USER_ONLY */
 
 static inline bool kvmppc_has_cap_epr(void)
@@ -227,20 +231,13 @@ static inline int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
     abort();
 }
 
-static inline uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu,
-                                               target_ulong pte_index)
-{
-    abort();
-}
-
-static inline void kvmppc_hash64_free_pteg(uint64_t token)
+static inline void kvmppc_read_hptes(ppc_hash_pte64_t *hptes,
+                                     hwaddr ptex, int n)
 {
     abort();
 }
 
-static inline void kvmppc_hash64_write_pte(CPUPPCState *env,
-                                           target_ulong pte_index,
-                                           target_ulong pte0, target_ulong pte1)
+static inline void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
 {
     abort();
 }
diff --git a/target/ppc/machine.c b/target/ppc/machine.c
index df9f7a4e05..6cb3a48db1 100644
--- a/target/ppc/machine.c
+++ b/target/ppc/machine.c
@@ -76,7 +76,7 @@ static int cpu_load_old(QEMUFile *f, void *opaque, int version_id)
         qemu_get_betls(f, &env->pb[i]);
     for (i = 0; i < 1024; i++)
         qemu_get_betls(f, &env->spr[i]);
-    if (!env->external_htab) {
+    if (!cpu->vhyp) {
         ppc_store_sdr1(env, sdr1);
     }
     qemu_get_be32s(f, &env->vscr);
@@ -228,8 +228,7 @@ static int cpu_post_load(void *opaque, int version_id)
         env->IBAT[1][i+4] = env->spr[SPR_IBAT4U + 2*i + 1];
     }
 
-    if (!env->external_htab) {
-        /* Restore htab_base and htab_mask variables */
+    if (!cpu->vhyp) {
         ppc_store_sdr1(env, env->spr[SPR_SDR1]);
     }
 
diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c
index ab432bafaf..fa573dd7d2 100644
--- a/target/ppc/misc_helper.c
+++ b/target/ppc/misc_helper.c
@@ -82,11 +82,9 @@ void helper_store_sdr1(CPUPPCState *env, target_ulong val)
 {
     PowerPCCPU *cpu = ppc_env_get_cpu(env);
 
-    if (!env->external_htab) {
-        if (env->spr[SPR_SDR1] != val) {
-            ppc_store_sdr1(env, val);
-            tlb_flush(CPU(cpu));
-        }
+    if (env->spr[SPR_SDR1] != val) {
+        ppc_store_sdr1(env, val);
+        tlb_flush(CPU(cpu));
     }
 }
 
diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
index 29bace622a..03ae3c1279 100644
--- a/target/ppc/mmu-hash32.c
+++ b/target/ppc/mmu-hash32.c
@@ -304,9 +304,9 @@ static int ppc_hash32_direct_store(PowerPCCPU *cpu, target_ulong sr,
 
 hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash)
 {
-    CPUPPCState *env = &cpu->env;
+    target_ulong mask = ppc_hash32_hpt_mask(cpu);
 
-    return (hash * HASH_PTEG_SIZE_32) & env->htab_mask;
+    return (hash * HASH_PTEG_SIZE_32) & mask;
 }
 
 static hwaddr ppc_hash32_pteg_search(PowerPCCPU *cpu, hwaddr pteg_off,
@@ -339,7 +339,6 @@ static hwaddr ppc_hash32_htab_lookup(PowerPCCPU *cpu,
                                      target_ulong sr, target_ulong eaddr,
                                      ppc_hash_pte32_t *pte)
 {
-    CPUPPCState *env = &cpu->env;
     hwaddr pteg_off, pte_offset;
     hwaddr hash;
     uint32_t vsid, pgidx, ptem;
@@ -353,21 +352,22 @@ static hwaddr ppc_hash32_htab_lookup(PowerPCCPU *cpu,
     qemu_log_mask(CPU_LOG_MMU, "htab_base " TARGET_FMT_plx
             " htab_mask " TARGET_FMT_plx
             " hash " TARGET_FMT_plx "\n",
-            env->htab_base, env->htab_mask, hash);
+            ppc_hash32_hpt_base(cpu), ppc_hash32_hpt_mask(cpu), hash);
 
     /* Primary PTEG lookup */
     qemu_log_mask(CPU_LOG_MMU, "0 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx
             " vsid=%" PRIx32 " ptem=%" PRIx32
             " hash=" TARGET_FMT_plx "\n",
-            env->htab_base, env->htab_mask, vsid, ptem, hash);
+            ppc_hash32_hpt_base(cpu), ppc_hash32_hpt_mask(cpu),
+            vsid, ptem, hash);
     pteg_off = get_pteg_offset32(cpu, hash);
     pte_offset = ppc_hash32_pteg_search(cpu, pteg_off, 0, ptem, pte);
     if (pte_offset == -1) {
         /* Secondary PTEG lookup */
         qemu_log_mask(CPU_LOG_MMU, "1 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx
                 " vsid=%" PRIx32 " api=%" PRIx32
-                " hash=" TARGET_FMT_plx "\n", env->htab_base,
-                env->htab_mask, vsid, ptem, ~hash);
+                " hash=" TARGET_FMT_plx "\n", ppc_hash32_hpt_base(cpu),
+                ppc_hash32_hpt_mask(cpu), vsid, ptem, ~hash);
         pteg_off = get_pteg_offset32(cpu, ~hash);
         pte_offset = ppc_hash32_pteg_search(cpu, pteg_off, 1, ptem, pte);
     }
diff --git a/target/ppc/mmu-hash32.h b/target/ppc/mmu-hash32.h
index 5b9fb08d1a..898021f0d8 100644
--- a/target/ppc/mmu-hash32.h
+++ b/target/ppc/mmu-hash32.h
@@ -44,6 +44,8 @@ int ppc_hash32_handle_mmu_fault(PowerPCCPU *cpu, vaddr address, int rw,
 /*
  * Hash page table definitions
  */
+#define SDR_32_HTABORG         0xFFFF0000UL
+#define SDR_32_HTABMASK        0x000001FFUL
 
 #define HPTES_PER_GROUP         8
 #define HASH_PTE_SIZE_32        8
@@ -65,42 +67,46 @@ int ppc_hash32_handle_mmu_fault(PowerPCCPU *cpu, vaddr address, int rw,
 #define HPTE32_R_WIMG           0x00000078
 #define HPTE32_R_PP             0x00000003
 
+static inline hwaddr ppc_hash32_hpt_base(PowerPCCPU *cpu)
+{
+    return cpu->env.spr[SPR_SDR1] & SDR_32_HTABORG;
+}
+
+static inline hwaddr ppc_hash32_hpt_mask(PowerPCCPU *cpu)
+{
+    return ((cpu->env.spr[SPR_SDR1] & SDR_32_HTABMASK) << 16) | 0xFFFF;
+}
+
 static inline target_ulong ppc_hash32_load_hpte0(PowerPCCPU *cpu,
                                                  hwaddr pte_offset)
 {
-    CPUPPCState *env = &cpu->env;
+    target_ulong base = ppc_hash32_hpt_base(cpu);
 
-    assert(!env->external_htab); /* Not supported on 32-bit for now */
-    return ldl_phys(CPU(cpu)->as, env->htab_base + pte_offset);
+    return ldl_phys(CPU(cpu)->as, base + pte_offset);
 }
 
 static inline target_ulong ppc_hash32_load_hpte1(PowerPCCPU *cpu,
                                                  hwaddr pte_offset)
 {
-    CPUPPCState *env = &cpu->env;
+    target_ulong base = ppc_hash32_hpt_base(cpu);
 
-    assert(!env->external_htab); /* Not supported on 32-bit for now */
-    return ldl_phys(CPU(cpu)->as,
-                    env->htab_base + pte_offset + HASH_PTE_SIZE_32 / 2);
+    return ldl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2);
 }
 
 static inline void ppc_hash32_store_hpte0(PowerPCCPU *cpu,
                                           hwaddr pte_offset, target_ulong pte0)
 {
-    CPUPPCState *env = &cpu->env;
+    target_ulong base = ppc_hash32_hpt_base(cpu);
 
-    assert(!env->external_htab); /* Not supported on 32-bit for now */
-    stl_phys(CPU(cpu)->as, env->htab_base + pte_offset, pte0);
+    stl_phys(CPU(cpu)->as, base + pte_offset, pte0);
 }
 
 static inline void ppc_hash32_store_hpte1(PowerPCCPU *cpu,
                                           hwaddr pte_offset, target_ulong pte1)
 {
-    CPUPPCState *env = &cpu->env;
+    target_ulong base = ppc_hash32_hpt_base(cpu);
 
-    assert(!env->external_htab); /* Not supported on 32-bit for now */
-    stl_phys(CPU(cpu)->as,
-             env->htab_base + pte_offset + HASH_PTE_SIZE_32 / 2, pte1);
+    stl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2, pte1);
 }
 
 typedef struct {
diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
index bb78fb5497..d44f2bb432 100644
--- a/target/ppc/mmu-hash64.c
+++ b/target/ppc/mmu-hash64.c
@@ -27,6 +27,7 @@
 #include "kvm_ppc.h"
 #include "mmu-hash64.h"
 #include "exec/log.h"
+#include "hw/hw.h"
 
 //#define DEBUG_SLB
 
@@ -37,12 +38,6 @@
 #endif
 
 /*
- * Used to indicate that a CPU has its hash page table (HPT) managed
- * within the host kernel
- */
-#define MMU_HASH64_KVM_MANAGED_HPT      ((void *)-1)
-
-/*
  * SLB handling
  */
 
@@ -115,7 +110,8 @@ void helper_slbia(CPUPPCState *env)
     }
 }
 
-void helper_slbie(CPUPPCState *env, target_ulong addr)
+static void __helper_slbie(CPUPPCState *env, target_ulong addr,
+                           target_ulong global)
 {
     PowerPCCPU *cpu = ppc_env_get_cpu(env);
     ppc_slb_t *slb;
@@ -132,10 +128,21 @@ void helper_slbie(CPUPPCState *env, target_ulong addr)
          *      and we still don't have a tlb_flush_mask(env, n, mask)
          *      in QEMU, we just invalidate all TLBs
          */
-        env->tlb_need_flush |= TLB_NEED_LOCAL_FLUSH;
+        env->tlb_need_flush |=
+            (global == false ? TLB_NEED_LOCAL_FLUSH : TLB_NEED_GLOBAL_FLUSH);
     }
 }
 
+void helper_slbie(CPUPPCState *env, target_ulong addr)
+{
+    __helper_slbie(env, addr, false);
+}
+
+void helper_slbieg(CPUPPCState *env, target_ulong addr)
+{
+    __helper_slbie(env, addr, true);
+}
+
 int ppc_store_slb(PowerPCCPU *cpu, target_ulong slot,
                   target_ulong esid, target_ulong vsid)
 {
@@ -282,55 +289,6 @@ target_ulong helper_load_slb_vsid(CPUPPCState *env, target_ulong rb)
     return rt;
 }
 
-/*
- * 64-bit hash table MMU handling
- */
-void ppc_hash64_set_sdr1(PowerPCCPU *cpu, target_ulong value,
-                         Error **errp)
-{
-    CPUPPCState *env = &cpu->env;
-    target_ulong htabsize = value & SDR_64_HTABSIZE;
-
-    env->spr[SPR_SDR1] = value;
-    if (htabsize > 28) {
-        error_setg(errp,
-                   "Invalid HTABSIZE 0x" TARGET_FMT_lx" stored in SDR1",
-                   htabsize);
-        htabsize = 28;
-    }
-    env->htab_mask = (1ULL << (htabsize + 18 - 7)) - 1;
-    env->htab_base = value & SDR_64_HTABORG;
-}
-
-void ppc_hash64_set_external_hpt(PowerPCCPU *cpu, void *hpt, int shift,
-                                 Error **errp)
-{
-    CPUPPCState *env = &cpu->env;
-    Error *local_err = NULL;
-
-    if (hpt) {
-        env->external_htab = hpt;
-    } else {
-        env->external_htab = MMU_HASH64_KVM_MANAGED_HPT;
-    }
-    ppc_hash64_set_sdr1(cpu, (target_ulong)(uintptr_t)hpt | (shift - 18),
-                        &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return;
-    }
-
-    /* Not strictly necessary, but makes it clearer that an external
-     * htab is in use when debugging */
-    env->htab_base = -1;
-
-    if (kvm_enabled()) {
-        if (kvmppc_put_books_sregs(cpu) < 0) {
-            error_setg(errp, "Unable to update SDR1 in KVM");
-        }
-    }
-}
-
 static int ppc_hash64_pte_prot(PowerPCCPU *cpu,
                                ppc_slb_t *slb, ppc_hash_pte64_t pte)
 {
@@ -419,34 +377,43 @@ static int ppc_hash64_amr_prot(PowerPCCPU *cpu, ppc_hash_pte64_t pte)
     return prot;
 }
 
-uint64_t ppc_hash64_start_access(PowerPCCPU *cpu, target_ulong pte_index)
+const ppc_hash_pte64_t *ppc_hash64_map_hptes(PowerPCCPU *cpu,
+                                             hwaddr ptex, int n)
 {
-    uint64_t token = 0;
-    hwaddr pte_offset;
+    hwaddr pte_offset = ptex * HASH_PTE_SIZE_64;
+    hwaddr base = ppc_hash64_hpt_base(cpu);
+    hwaddr plen = n * HASH_PTE_SIZE_64;
+    const ppc_hash_pte64_t *hptes;
+
+    if (cpu->vhyp) {
+        PPCVirtualHypervisorClass *vhc =
+            PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
+        return vhc->map_hptes(cpu->vhyp, ptex, n);
+    }
 
-    pte_offset = pte_index * HASH_PTE_SIZE_64;
-    if (cpu->env.external_htab == MMU_HASH64_KVM_MANAGED_HPT) {
-        /*
-         * HTAB is controlled by KVM. Fetch the PTEG into a new buffer.
-         */
-        token = kvmppc_hash64_read_pteg(cpu, pte_index);
-    } else if (cpu->env.external_htab) {
-        /*
-         * HTAB is controlled by QEMU. Just point to the internally
-         * accessible PTEG.
-         */
-        token = (uint64_t)(uintptr_t) cpu->env.external_htab + pte_offset;
-    } else if (cpu->env.htab_base) {
-        token = cpu->env.htab_base + pte_offset;
+    if (!base) {
+        return NULL;
     }
-    return token;
+
+    hptes = address_space_map(CPU(cpu)->as, base + pte_offset, &plen, false);
+    if (plen < (n * HASH_PTE_SIZE_64)) {
+        hw_error("%s: Unable to map all requested HPTEs\n", __func__);
+    }
+    return hptes;
 }
 
-void ppc_hash64_stop_access(PowerPCCPU *cpu, uint64_t token)
+void ppc_hash64_unmap_hptes(PowerPCCPU *cpu, const ppc_hash_pte64_t *hptes,
+                            hwaddr ptex, int n)
 {
-    if (cpu->env.external_htab == MMU_HASH64_KVM_MANAGED_HPT) {
-        kvmppc_hash64_free_pteg(token);
+    if (cpu->vhyp) {
+        PPCVirtualHypervisorClass *vhc =
+            PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
+        vhc->unmap_hptes(cpu->vhyp, hptes, ptex, n);
+        return;
     }
+
+    address_space_unmap(CPU(cpu)->as, (void *)hptes, n * HASH_PTE_SIZE_64,
+                        false, n * HASH_PTE_SIZE_64);
 }
 
 static unsigned hpte_page_shift(const struct ppc_one_seg_page_size *sps,
@@ -491,20 +458,19 @@ static hwaddr ppc_hash64_pteg_search(PowerPCCPU *cpu, hwaddr hash,
                                      target_ulong ptem,
                                      ppc_hash_pte64_t *pte, unsigned *pshift)
 {
-    CPUPPCState *env = &cpu->env;
     int i;
-    uint64_t token;
+    const ppc_hash_pte64_t *pteg;
     target_ulong pte0, pte1;
-    target_ulong pte_index;
+    target_ulong ptex;
 
-    pte_index = (hash & env->htab_mask) * HPTES_PER_GROUP;
-    token = ppc_hash64_start_access(cpu, pte_index);
-    if (!token) {
+    ptex = (hash & ppc_hash64_hpt_mask(cpu)) * HPTES_PER_GROUP;
+    pteg = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
+    if (!pteg) {
         return -1;
     }
     for (i = 0; i < HPTES_PER_GROUP; i++) {
-        pte0 = ppc_hash64_load_hpte0(cpu, token, i);
-        pte1 = ppc_hash64_load_hpte1(cpu, token, i);
+        pte0 = ppc_hash64_hpte0(cpu, pteg, i);
+        pte1 = ppc_hash64_hpte1(cpu, pteg, i);
 
         /* This compares V, B, H (secondary) and the AVPN */
         if (HPTE64_V_COMPARE(pte0, ptem)) {
@@ -524,11 +490,11 @@ static hwaddr ppc_hash64_pteg_search(PowerPCCPU *cpu, hwaddr hash,
              */
             pte->pte0 = pte0;
             pte->pte1 = pte1;
-            ppc_hash64_stop_access(cpu, token);
-            return (pte_index + i) * HASH_PTE_SIZE_64;
+            ppc_hash64_unmap_hptes(cpu, pteg, ptex, HPTES_PER_GROUP);
+            return ptex + i;
         }
     }
-    ppc_hash64_stop_access(cpu, token);
+    ppc_hash64_unmap_hptes(cpu, pteg, ptex, HPTES_PER_GROUP);
     /*
      * We didn't find a valid entry.
      */
@@ -540,8 +506,7 @@ static hwaddr ppc_hash64_htab_lookup(PowerPCCPU *cpu,
                                      ppc_hash_pte64_t *pte, unsigned *pshift)
 {
     CPUPPCState *env = &cpu->env;
-    hwaddr pte_offset;
-    hwaddr hash;
+    hwaddr hash, ptex;
     uint64_t vsid, epnmask, epn, ptem;
     const struct ppc_one_seg_page_size *sps = slb->sps;
 
@@ -576,29 +541,30 @@ static hwaddr ppc_hash64_htab_lookup(PowerPCCPU *cpu,
     qemu_log_mask(CPU_LOG_MMU,
             "htab_base " TARGET_FMT_plx " htab_mask " TARGET_FMT_plx
             " hash " TARGET_FMT_plx "\n",
-            env->htab_base, env->htab_mask, hash);
+            ppc_hash64_hpt_base(cpu), ppc_hash64_hpt_mask(cpu), hash);
 
     /* Primary PTEG lookup */
     qemu_log_mask(CPU_LOG_MMU,
             "0 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx
             " vsid=" TARGET_FMT_lx " ptem=" TARGET_FMT_lx
             " hash=" TARGET_FMT_plx "\n",
-            env->htab_base, env->htab_mask, vsid, ptem,  hash);
-    pte_offset = ppc_hash64_pteg_search(cpu, hash, sps, ptem, pte, pshift);
+            ppc_hash64_hpt_base(cpu), ppc_hash64_hpt_mask(cpu),
+            vsid, ptem,  hash);
+    ptex = ppc_hash64_pteg_search(cpu, hash, sps, ptem, pte, pshift);
 
-    if (pte_offset == -1) {
+    if (ptex == -1) {
         /* Secondary PTEG lookup */
         ptem |= HPTE64_V_SECONDARY;
         qemu_log_mask(CPU_LOG_MMU,
                 "1 htab=" TARGET_FMT_plx "/" TARGET_FMT_plx
                 " vsid=" TARGET_FMT_lx " api=" TARGET_FMT_lx
-                " hash=" TARGET_FMT_plx "\n", env->htab_base,
-                env->htab_mask, vsid, ptem, ~hash);
+                " hash=" TARGET_FMT_plx "\n", ppc_hash64_hpt_base(cpu),
+                ppc_hash64_hpt_mask(cpu), vsid, ptem, ~hash);
 
-        pte_offset = ppc_hash64_pteg_search(cpu, ~hash, sps, ptem, pte, pshift);
+        ptex = ppc_hash64_pteg_search(cpu, ~hash, sps, ptem, pte, pshift);
     }
 
-    return pte_offset;
+    return ptex;
 }
 
 unsigned ppc_hash64_hpte_page_shift_noslb(PowerPCCPU *cpu,
@@ -640,7 +606,15 @@ static void ppc_hash64_set_isi(CPUState *cs, CPUPPCState *env,
     if (msr_ir) {
         vpm = !!(env->spr[SPR_LPCR] & LPCR_VPM1);
     } else {
-        vpm = !!(env->spr[SPR_LPCR] & LPCR_VPM0);
+        switch (env->mmu_model) {
+        case POWERPC_MMU_3_00:
+            /* Field deprecated in ISAv3.00 - interrupts always go to hyperv */
+            vpm = true;
+            break;
+        default:
+            vpm = !!(env->spr[SPR_LPCR] & LPCR_VPM0);
+            break;
+        }
     }
     if (vpm && !msr_hv) {
         cs->exception_index = POWERPC_EXCP_HISI;
@@ -658,7 +632,15 @@ static void ppc_hash64_set_dsi(CPUState *cs, CPUPPCState *env, uint64_t dar,
     if (msr_dr) {
         vpm = !!(env->spr[SPR_LPCR] & LPCR_VPM1);
     } else {
-        vpm = !!(env->spr[SPR_LPCR] & LPCR_VPM0);
+        switch (env->mmu_model) {
+        case POWERPC_MMU_3_00:
+            /* Field deprecated in ISAv3.00 - interrupts always go to hyperv */
+            vpm = true;
+            break;
+        default:
+            vpm = !!(env->spr[SPR_LPCR] & LPCR_VPM0);
+            break;
+        }
     }
     if (vpm && !msr_hv) {
         cs->exception_index = POWERPC_EXCP_HDSI;
@@ -680,7 +662,7 @@ int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr eaddr,
     CPUPPCState *env = &cpu->env;
     ppc_slb_t *slb;
     unsigned apshift;
-    hwaddr pte_offset;
+    hwaddr ptex;
     ppc_hash_pte64_t pte;
     int pp_prot, amr_prot, prot;
     uint64_t new_pte1, dsisr;
@@ -764,8 +746,8 @@ skip_slb_search:
     }
 
     /* 4. Locate the PTE in the hash table */
-    pte_offset = ppc_hash64_htab_lookup(cpu, slb, eaddr, &pte, &apshift);
-    if (pte_offset == -1) {
+    ptex = ppc_hash64_htab_lookup(cpu, slb, eaddr, &pte, &apshift);
+    if (ptex == -1) {
         dsisr = 0x40000000;
         if (rwx == 2) {
             ppc_hash64_set_isi(cs, env, dsisr);
@@ -778,7 +760,7 @@ skip_slb_search:
         return 1;
     }
     qemu_log_mask(CPU_LOG_MMU,
-                "found PTE at offset %08" HWADDR_PRIx "\n", pte_offset);
+                  "found PTE at index %08" HWADDR_PRIx "\n", ptex);
 
     /* 5. Check access permissions */
 
@@ -821,8 +803,7 @@ skip_slb_search:
     }
 
     if (new_pte1 != pte.pte1) {
-        ppc_hash64_store_hpte(cpu, pte_offset / HASH_PTE_SIZE_64,
-                              pte.pte0, new_pte1);
+        ppc_hash64_store_hpte(cpu, ptex, pte.pte0, new_pte1);
     }
 
     /* 7. Determine the real address from the PTE */
@@ -839,7 +820,7 @@ hwaddr ppc_hash64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr)
 {
     CPUPPCState *env = &cpu->env;
     ppc_slb_t *slb;
-    hwaddr pte_offset, raddr;
+    hwaddr ptex, raddr;
     ppc_hash_pte64_t pte;
     unsigned apshift;
 
@@ -872,8 +853,8 @@ hwaddr ppc_hash64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr)
         }
     }
 
-    pte_offset = ppc_hash64_htab_lookup(cpu, slb, addr, &pte, &apshift);
-    if (pte_offset == -1) {
+    ptex = ppc_hash64_htab_lookup(cpu, slb, addr, &pte, &apshift);
+    if (ptex == -1) {
         return -1;
     }
 
@@ -881,30 +862,24 @@ hwaddr ppc_hash64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr)
         & TARGET_PAGE_MASK;
 }
 
-void ppc_hash64_store_hpte(PowerPCCPU *cpu,
-                           target_ulong pte_index,
-                           target_ulong pte0, target_ulong pte1)
+void ppc_hash64_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
+                           uint64_t pte0, uint64_t pte1)
 {
-    CPUPPCState *env = &cpu->env;
+    hwaddr base = ppc_hash64_hpt_base(cpu);
+    hwaddr offset = ptex * HASH_PTE_SIZE_64;
 
-    if (env->external_htab == MMU_HASH64_KVM_MANAGED_HPT) {
-        kvmppc_hash64_write_pte(env, pte_index, pte0, pte1);
+    if (cpu->vhyp) {
+        PPCVirtualHypervisorClass *vhc =
+            PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
+        vhc->store_hpte(cpu->vhyp, ptex, pte0, pte1);
         return;
     }
 
-    pte_index *= HASH_PTE_SIZE_64;
-    if (env->external_htab) {
-        stq_p(env->external_htab + pte_index, pte0);
-        stq_p(env->external_htab + pte_index + HASH_PTE_SIZE_64 / 2, pte1);
-    } else {
-        stq_phys(CPU(cpu)->as, env->htab_base + pte_index, pte0);
-        stq_phys(CPU(cpu)->as,
-                 env->htab_base + pte_index + HASH_PTE_SIZE_64 / 2, pte1);
-    }
+    stq_phys(CPU(cpu)->as, base + offset, pte0);
+    stq_phys(CPU(cpu)->as, base + offset + HASH_PTE_SIZE_64 / 2, pte1);
 }
 
-void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu,
-                               target_ulong pte_index,
+void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu, target_ulong ptex,
                                target_ulong pte0, target_ulong pte1)
 {
     /*
@@ -1050,6 +1025,14 @@ void helper_store_lpcr(CPUPPCState *env, target_ulong val)
                       LPCR_P8_PECE2 | LPCR_P8_PECE3 | LPCR_P8_PECE4 |
                       LPCR_MER | LPCR_TC | LPCR_LPES0 | LPCR_HDICE);
         break;
+    case POWERPC_MMU_3_00: /* P9 */
+        lpcr = val & (LPCR_VPM1 | LPCR_ISL | LPCR_KBV | LPCR_DPFD |
+                      (LPCR_PECE_U_MASK & LPCR_HVEE) | LPCR_ILE | LPCR_AIL |
+                      LPCR_UPRT | LPCR_EVIRT | LPCR_ONL |
+                      (LPCR_PECE_L_MASK & (LPCR_PDEE | LPCR_HDEE | LPCR_EEE |
+                      LPCR_DEE | LPCR_OEE)) | LPCR_MER | LPCR_GTSE | LPCR_TC |
+                      LPCR_HEIC | LPCR_LPES0 | LPCR_HVICE | LPCR_HDICE);
+        break;
     default:
         ;
     }
diff --git a/target/ppc/mmu-hash64.h b/target/ppc/mmu-hash64.h
index 7a0b7fca41..54f1e37655 100644
--- a/target/ppc/mmu-hash64.h
+++ b/target/ppc/mmu-hash64.h
@@ -10,8 +10,8 @@ int ppc_store_slb(PowerPCCPU *cpu, target_ulong slot,
 hwaddr ppc_hash64_get_phys_page_debug(PowerPCCPU *cpu, target_ulong addr);
 int ppc_hash64_handle_mmu_fault(PowerPCCPU *cpu, vaddr address, int rw,
                                 int mmu_idx);
-void ppc_hash64_store_hpte(PowerPCCPU *cpu, target_ulong index,
-                           target_ulong pte0, target_ulong pte1);
+void ppc_hash64_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
+                           uint64_t pte0, uint64_t pte1);
 void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu,
                                target_ulong pte_index,
                                target_ulong pte0, target_ulong pte1);
@@ -56,6 +56,9 @@ void ppc_hash64_update_rmls(CPUPPCState *env);
  * Hash page table definitions
  */
 
+#define SDR_64_HTABORG         0x0FFFFFFFFFFC0000ULL
+#define SDR_64_HTABSIZE        0x000000000000001FULL
+
 #define HPTES_PER_GROUP         8
 #define HASH_PTE_SIZE_64        16
 #define HASH_PTEG_SIZE_64       (HASH_PTE_SIZE_64 * HPTES_PER_GROUP)
@@ -91,45 +94,41 @@ void ppc_hash64_update_rmls(CPUPPCState *env);
 #define HPTE64_V_1TB_SEG        0x4000000000000000ULL
 #define HPTE64_V_VRMA_MASK      0x4001ffffff000000ULL
 
-void ppc_hash64_set_sdr1(PowerPCCPU *cpu, target_ulong value,
-                         Error **errp);
-void ppc_hash64_set_external_hpt(PowerPCCPU *cpu, void *hpt, int shift,
-                                 Error **errp);
-
-uint64_t ppc_hash64_start_access(PowerPCCPU *cpu, target_ulong pte_index);
-void ppc_hash64_stop_access(PowerPCCPU *cpu, uint64_t token);
-
-static inline target_ulong ppc_hash64_load_hpte0(PowerPCCPU *cpu,
-                                                 uint64_t token, int index)
+static inline hwaddr ppc_hash64_hpt_base(PowerPCCPU *cpu)
 {
-    CPUPPCState *env = &cpu->env;
-    uint64_t addr;
-
-    addr = token + (index * HASH_PTE_SIZE_64);
-    if (env->external_htab) {
-        return  ldq_p((const void *)(uintptr_t)addr);
-    } else {
-        return ldq_phys(CPU(cpu)->as, addr);
-    }
+    return cpu->env.spr[SPR_SDR1] & SDR_64_HTABORG;
 }
 
-static inline target_ulong ppc_hash64_load_hpte1(PowerPCCPU *cpu,
-                                                 uint64_t token, int index)
+static inline hwaddr ppc_hash64_hpt_mask(PowerPCCPU *cpu)
 {
-    CPUPPCState *env = &cpu->env;
-    uint64_t addr;
-
-    addr = token + (index * HASH_PTE_SIZE_64) + HASH_PTE_SIZE_64/2;
-    if (env->external_htab) {
-        return  ldq_p((const void *)(uintptr_t)addr);
-    } else {
-        return ldq_phys(CPU(cpu)->as, addr);
+    if (cpu->vhyp) {
+        PPCVirtualHypervisorClass *vhc =
+            PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
+        return vhc->hpt_mask(cpu->vhyp);
     }
+    return (1ULL << ((cpu->env.spr[SPR_SDR1] & SDR_64_HTABSIZE) + 18 - 7)) - 1;
 }
 
-typedef struct {
+struct ppc_hash_pte64 {
     uint64_t pte0, pte1;
-} ppc_hash_pte64_t;
+};
+
+const ppc_hash_pte64_t *ppc_hash64_map_hptes(PowerPCCPU *cpu,
+                                             hwaddr ptex, int n);
+void ppc_hash64_unmap_hptes(PowerPCCPU *cpu, const ppc_hash_pte64_t *hptes,
+                            hwaddr ptex, int n);
+
+static inline uint64_t ppc_hash64_hpte0(PowerPCCPU *cpu,
+                                        const ppc_hash_pte64_t *hptes, int i)
+{
+    return ldq_p(&(hptes[i].pte0));
+}
+
+static inline uint64_t ppc_hash64_hpte1(PowerPCCPU *cpu,
+                                        const ppc_hash_pte64_t *hptes, int i)
+{
+    return ldq_p(&(hptes[i].pte1));
+}
 
 #endif /* CONFIG_USER_ONLY */
 
diff --git a/target/ppc/mmu_helper.c b/target/ppc/mmu_helper.c
index f746f53615..a1af3d6bf2 100644
--- a/target/ppc/mmu_helper.c
+++ b/target/ppc/mmu_helper.c
@@ -28,6 +28,7 @@
 #include "exec/cpu_ldst.h"
 #include "exec/log.h"
 #include "helper_regs.h"
+#include "qemu/error-report.h"
 
 //#define DEBUG_MMU
 //#define DEBUG_BATS
@@ -466,6 +467,7 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
 static inline int get_segment_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
                                       target_ulong eaddr, int rw, int type)
 {
+    PowerPCCPU *cpu = ppc_env_get_cpu(env);
     hwaddr hash;
     target_ulong vsid;
     int ds, pr, target_page_bits;
@@ -503,7 +505,7 @@ static inline int get_segment_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
             qemu_log_mask(CPU_LOG_MMU, "htab_base " TARGET_FMT_plx
                     " htab_mask " TARGET_FMT_plx
                     " hash " TARGET_FMT_plx "\n",
-                    env->htab_base, env->htab_mask, hash);
+                    ppc_hash32_hpt_base(cpu), ppc_hash32_hpt_mask(cpu), hash);
             ctx->hash[0] = hash;
             ctx->hash[1] = ~hash;
 
@@ -518,9 +520,11 @@ static inline int get_segment_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
                 uint32_t a0, a1, a2, a3;
 
                 qemu_log("Page table: " TARGET_FMT_plx " len " TARGET_FMT_plx
-                         "\n", env->htab_base, env->htab_mask + 0x80);
-                for (curaddr = env->htab_base;
-                     curaddr < (env->htab_base + env->htab_mask + 0x80);
+                         "\n", ppc_hash32_hpt_base(cpu),
+                         ppc_hash32_hpt_mask(env) + 0x80);
+                for (curaddr = ppc_hash32_hpt_base(cpu);
+                     curaddr < (ppc_hash32_hpt_base(cpu)
+                                + ppc_hash32_hpt_mask(cpu) + 0x80);
                      curaddr += 16) {
                     a0 = ldl_phys(cs->as, curaddr);
                     a1 = ldl_phys(cs->as, curaddr + 4);
@@ -825,7 +829,7 @@ static int mmubooke_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx,
         tlb = &env->tlb.tlbe[i];
         ret = mmubooke_check_tlb(env, tlb, &raddr, &ctx->prot, address, rw,
                                  access_type, i);
-        if (!ret) {
+        if (ret != -1) {
             break;
         }
     }
@@ -1205,12 +1209,13 @@ static void mmu6xx_dump_BATs(FILE *f, fprintf_function cpu_fprintf,
 static void mmu6xx_dump_mmu(FILE *f, fprintf_function cpu_fprintf,
                             CPUPPCState *env)
 {
+    PowerPCCPU *cpu = ppc_env_get_cpu(env);
     ppc6xx_tlb_t *tlb;
     target_ulong sr;
     int type, way, entry, i;
 
-    cpu_fprintf(f, "HTAB base = 0x%"HWADDR_PRIx"\n", env->htab_base);
-    cpu_fprintf(f, "HTAB mask = 0x%"HWADDR_PRIx"\n", env->htab_mask);
+    cpu_fprintf(f, "HTAB base = 0x%"HWADDR_PRIx"\n", ppc_hash32_hpt_base(cpu));
+    cpu_fprintf(f, "HTAB mask = 0x%"HWADDR_PRIx"\n", ppc_hash32_hpt_mask(cpu));
 
     cpu_fprintf(f, "\nSegment registers:\n");
     for (i = 0; i < 32; i++) {
@@ -1592,9 +1597,9 @@ static int cpu_ppc_handle_mmu_fault(CPUPPCState *env, target_ulong address,
                     env->spr[SPR_DCMP] = 0x80000000 | ctx.ptem;
                 tlb_miss:
                     env->error_code |= ctx.key << 19;
-                    env->spr[SPR_HASH1] = env->htab_base +
+                    env->spr[SPR_HASH1] = ppc_hash32_hpt_base(cpu) +
                         get_pteg_offset32(cpu, ctx.hash[0]);
-                    env->spr[SPR_HASH2] = env->htab_base +
+                    env->spr[SPR_HASH2] = ppc_hash32_hpt_base(cpu) +
                         get_pteg_offset32(cpu, ctx.hash[1]);
                     break;
                 case POWERPC_MMU_SOFT_74xx:
@@ -1935,6 +1940,7 @@ void ppc_tlb_invalidate_all(CPUPPCState *env)
     case POWERPC_MMU_2_06a:
     case POWERPC_MMU_2_07:
     case POWERPC_MMU_2_07a:
+    case POWERPC_MMU_3_00:
 #endif /* defined(TARGET_PPC64) */
         env->tlb_need_flush = 0;
         tlb_flush(CPU(cpu));
@@ -1974,6 +1980,7 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr)
     case POWERPC_MMU_2_06a:
     case POWERPC_MMU_2_07:
     case POWERPC_MMU_2_07a:
+    case POWERPC_MMU_3_00:
         /* tlbie invalidate TLBs for all segments */
         /* XXX: given the fact that there are too many segments to invalidate,
          *      and we still don't have a tlb_flush_mask(env, n, mask) in QEMU,
@@ -1995,26 +2002,28 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr)
 /* Special registers manipulation */
 void ppc_store_sdr1(CPUPPCState *env, target_ulong value)
 {
+    PowerPCCPU *cpu = ppc_env_get_cpu(env);
     qemu_log_mask(CPU_LOG_MMU, "%s: " TARGET_FMT_lx "\n", __func__, value);
-    assert(!env->external_htab);
-    env->spr[SPR_SDR1] = value;
+    assert(!cpu->vhyp);
 #if defined(TARGET_PPC64)
     if (env->mmu_model & POWERPC_MMU_64) {
-        PowerPCCPU *cpu = ppc_env_get_cpu(env);
-        Error *local_err = NULL;
+        target_ulong sdr_mask = SDR_64_HTABORG | SDR_64_HTABSIZE;
+        target_ulong htabsize = value & SDR_64_HTABSIZE;
 
-        ppc_hash64_set_sdr1(cpu, value, &local_err);
-        if (local_err) {
-            error_report_err(local_err);
-            error_free(local_err);
+        if (value & ~sdr_mask) {
+            error_report("Invalid bits 0x"TARGET_FMT_lx" set in SDR1",
+                         value & ~sdr_mask);
+            value &= sdr_mask;
+        }
+        if (htabsize > 28) {
+            error_report("Invalid HTABSIZE 0x" TARGET_FMT_lx" stored in SDR1",
+                         htabsize);
+            return;
         }
-    } else
-#endif /* defined(TARGET_PPC64) */
-    {
-        /* FIXME: Should check for valid HTABMASK values */
-        env->htab_mask = ((value & SDR_32_HTABMASK) << 16) | 0xFFFF;
-        env->htab_base = value & SDR_32_HTABORG;
     }
+#endif /* defined(TARGET_PPC64) */
+    /* FIXME: Should check for valid HTABMASK values in 32-bit case */
+    env->spr[SPR_SDR1] = value;
 }
 
 /* Segment registers load and store */
diff --git a/target/ppc/monitor.c b/target/ppc/monitor.c
index c2d0806dd1..b8f30e9eaf 100644
--- a/target/ppc/monitor.c
+++ b/target/ppc/monitor.c
@@ -62,6 +62,10 @@ void hmp_info_tlb(Monitor *mon, const QDict *qdict)
 {
     CPUArchState *env1 = mon_get_cpu_env();
 
+    if (!env1) {
+        monitor_printf(mon, "No CPU available\n");
+        return;
+    }
     dump_mmu((FILE*)mon, (fprintf_function)monitor_printf, env1);
 }
 
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index b48abaedfb..6e6868b7a0 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -71,7 +71,7 @@ static TCGv cpu_lr;
 #if defined(TARGET_PPC64)
 static TCGv cpu_cfar;
 #endif
-static TCGv cpu_xer, cpu_so, cpu_ov, cpu_ca;
+static TCGv cpu_xer, cpu_so, cpu_ov, cpu_ca, cpu_ov32, cpu_ca32;
 static TCGv cpu_reserve;
 static TCGv cpu_fpscr;
 static TCGv_i32 cpu_access_type;
@@ -173,6 +173,10 @@ void ppc_translate_init(void)
                                 offsetof(CPUPPCState, ov), "OV");
     cpu_ca = tcg_global_mem_new(cpu_env,
                                 offsetof(CPUPPCState, ca), "CA");
+    cpu_ov32 = tcg_global_mem_new(cpu_env,
+                                  offsetof(CPUPPCState, ov32), "OV32");
+    cpu_ca32 = tcg_global_mem_new(cpu_env,
+                                  offsetof(CPUPPCState, ca32), "CA32");
 
     cpu_reserve = tcg_global_mem_new(cpu_env,
                                      offsetof(CPUPPCState, reserve_addr),
@@ -806,12 +810,40 @@ static inline void gen_op_arith_compute_ov(DisasContext *ctx, TCGv arg0,
     }
     tcg_temp_free(t0);
     if (NARROW_MODE(ctx)) {
-        tcg_gen_ext32s_tl(cpu_ov, cpu_ov);
+        tcg_gen_extract_tl(cpu_ov, cpu_ov, 31, 1);
+        if (is_isa300(ctx)) {
+            tcg_gen_mov_tl(cpu_ov32, cpu_ov);
+        }
+    } else {
+        if (is_isa300(ctx)) {
+            tcg_gen_extract_tl(cpu_ov32, cpu_ov, 31, 1);
+        }
+        tcg_gen_extract_tl(cpu_ov, cpu_ov, 63, 1);
     }
-    tcg_gen_shri_tl(cpu_ov, cpu_ov, TARGET_LONG_BITS - 1);
     tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
 }
 
+static inline void gen_op_arith_compute_ca32(DisasContext *ctx,
+                                             TCGv res, TCGv arg0, TCGv arg1,
+                                             int sub)
+{
+    TCGv t0;
+
+    if (!is_isa300(ctx)) {
+        return;
+    }
+
+    t0 = tcg_temp_new();
+    if (sub) {
+        tcg_gen_eqv_tl(t0, arg0, arg1);
+    } else {
+        tcg_gen_xor_tl(t0, arg0, arg1);
+    }
+    tcg_gen_xor_tl(t0, t0, res);
+    tcg_gen_extract_tl(cpu_ca32, t0, 32, 1);
+    tcg_temp_free(t0);
+}
+
 /* Common add function */
 static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1,
                                     TCGv arg2, bool add_ca, bool compute_ca,
@@ -838,6 +870,9 @@ static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1,
             tcg_temp_free(t1);
             tcg_gen_shri_tl(cpu_ca, cpu_ca, 32);   /* extract bit 32 */
             tcg_gen_andi_tl(cpu_ca, cpu_ca, 1);
+            if (is_isa300(ctx)) {
+                tcg_gen_mov_tl(cpu_ca32, cpu_ca);
+            }
         } else {
             TCGv zero = tcg_const_tl(0);
             if (add_ca) {
@@ -846,6 +881,7 @@ static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1,
             } else {
                 tcg_gen_add2_tl(t0, cpu_ca, arg1, zero, arg2, zero);
             }
+            gen_op_arith_compute_ca32(ctx, t0, arg1, arg2, 0);
             tcg_temp_free(zero);
         }
     } else {
@@ -985,6 +1021,9 @@ static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, TCGv arg1,
     }
     if (compute_ov) {
         tcg_gen_extu_i32_tl(cpu_ov, t2);
+        if (is_isa300(ctx)) {
+            tcg_gen_extu_i32_tl(cpu_ov32, t2);
+        }
         tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
     }
     tcg_temp_free_i32(t0);
@@ -1056,6 +1095,9 @@ static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret, TCGv arg1,
     }
     if (compute_ov) {
         tcg_gen_mov_tl(cpu_ov, t2);
+        if (is_isa300(ctx)) {
+            tcg_gen_mov_tl(cpu_ov32, t2);
+        }
         tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
     }
     tcg_temp_free_i64(t0);
@@ -1074,10 +1116,10 @@ static void glue(gen_, name)(DisasContext *ctx)
                       cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)],     \
                       sign, compute_ov);                                      \
 }
-/* divwu  divwu.  divwuo  divwuo.   */
+/* divdu  divdu.  divduo  divduo.   */
 GEN_INT_ARITH_DIVD(divdu, 0x0E, 0, 0);
 GEN_INT_ARITH_DIVD(divduo, 0x1E, 0, 1);
-/* divw  divw.  divwo  divwo.   */
+/* divd  divd.  divdo  divdo.   */
 GEN_INT_ARITH_DIVD(divd, 0x0F, 1, 0);
 GEN_INT_ARITH_DIVD(divdo, 0x1F, 1, 1);
 
@@ -1249,6 +1291,9 @@ static void gen_mullwo(DisasContext *ctx)
     tcg_gen_sari_i32(t0, t0, 31);
     tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t1);
     tcg_gen_extu_i32_tl(cpu_ov, t0);
+    if (is_isa300(ctx)) {
+        tcg_gen_mov_tl(cpu_ov32, cpu_ov);
+    }
     tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
 
     tcg_temp_free_i32(t0);
@@ -1310,6 +1355,9 @@ static void gen_mulldo(DisasContext *ctx)
 
     tcg_gen_sari_i64(t0, t0, 63);
     tcg_gen_setcond_i64(TCG_COND_NE, cpu_ov, t0, t1);
+    if (is_isa300(ctx)) {
+        tcg_gen_mov_tl(cpu_ov32, cpu_ov);
+    }
     tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
 
     tcg_temp_free_i64(t0);
@@ -1353,17 +1401,22 @@ static inline void gen_op_arith_subf(DisasContext *ctx, TCGv ret, TCGv arg1,
             tcg_temp_free(t1);
             tcg_gen_shri_tl(cpu_ca, cpu_ca, 32);    /* extract bit 32 */
             tcg_gen_andi_tl(cpu_ca, cpu_ca, 1);
+            if (is_isa300(ctx)) {
+                tcg_gen_mov_tl(cpu_ca32, cpu_ca);
+            }
         } else if (add_ca) {
             TCGv zero, inv1 = tcg_temp_new();
             tcg_gen_not_tl(inv1, arg1);
             zero = tcg_const_tl(0);
             tcg_gen_add2_tl(t0, cpu_ca, arg2, zero, cpu_ca, zero);
             tcg_gen_add2_tl(t0, cpu_ca, t0, cpu_ca, inv1, zero);
+            gen_op_arith_compute_ca32(ctx, t0, inv1, arg2, 0);
             tcg_temp_free(zero);
             tcg_temp_free(inv1);
         } else {
             tcg_gen_setcond_tl(TCG_COND_GEU, cpu_ca, arg2, arg1);
             tcg_gen_sub_tl(t0, arg2, arg1);
+            gen_op_arith_compute_ca32(ctx, t0, arg1, arg2, 1);
         }
     } else if (add_ca) {
         /* Since we're ignoring carry-out, we can simplify the
@@ -1442,7 +1495,10 @@ static inline void gen_op_arith_neg(DisasContext *ctx, bool compute_ov)
 
 static void gen_neg(DisasContext *ctx)
 {
-    gen_op_arith_neg(ctx, 0);
+    tcg_gen_neg_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
+    if (unlikely(Rc(ctx->opcode))) {
+        gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
+    }
 }
 
 static void gen_nego(DisasContext *ctx)
@@ -2976,6 +3032,113 @@ LARX(lbarx, DEF_MEMOP(MO_UB))
 LARX(lharx, DEF_MEMOP(MO_UW))
 LARX(lwarx, DEF_MEMOP(MO_UL))
 
+#define LD_ATOMIC(name, memop, tp, op, eop)                             \
+static void gen_##name(DisasContext *ctx)                               \
+{                                                                       \
+    int len = MEMOP_GET_SIZE(memop);                                    \
+    uint32_t gpr_FC = FC(ctx->opcode);                                  \
+    TCGv EA = tcg_temp_local_new();                                     \
+    TCGv_##tp t0, t1;                                                   \
+                                                                        \
+    gen_addr_register(ctx, EA);                                         \
+    if (len > 1) {                                                      \
+        gen_check_align(ctx, EA, len - 1);                              \
+    }                                                                   \
+    t0 = tcg_temp_new_##tp();                                           \
+    t1 = tcg_temp_new_##tp();                                           \
+    tcg_gen_##op(t0, cpu_gpr[rD(ctx->opcode) + 1]);                     \
+                                                                        \
+    switch (gpr_FC) {                                                   \
+    case 0: /* Fetch and add */                                         \
+        tcg_gen_atomic_fetch_add_##tp(t1, EA, t0, ctx->mem_idx, memop); \
+        break;                                                          \
+    case 1: /* Fetch and xor */                                         \
+        tcg_gen_atomic_fetch_xor_##tp(t1, EA, t0, ctx->mem_idx, memop); \
+        break;                                                          \
+    case 2: /* Fetch and or */                                          \
+        tcg_gen_atomic_fetch_or_##tp(t1, EA, t0, ctx->mem_idx, memop);  \
+        break;                                                          \
+    case 3: /* Fetch and 'and' */                                       \
+        tcg_gen_atomic_fetch_and_##tp(t1, EA, t0, ctx->mem_idx, memop); \
+        break;                                                          \
+    case 8: /* Swap */                                                  \
+        tcg_gen_atomic_xchg_##tp(t1, EA, t0, ctx->mem_idx, memop);      \
+        break;                                                          \
+    case 4:  /* Fetch and max unsigned */                               \
+    case 5:  /* Fetch and max signed */                                 \
+    case 6:  /* Fetch and min unsigned */                               \
+    case 7:  /* Fetch and min signed */                                 \
+    case 16: /* compare and swap not equal */                           \
+    case 24: /* Fetch and increment bounded */                          \
+    case 25: /* Fetch and increment equal */                            \
+    case 28: /* Fetch and decrement bounded */                          \
+        gen_invalid(ctx);                                               \
+        break;                                                          \
+    default:                                                            \
+        /* invoke data storage error handler */                         \
+        gen_exception_err(ctx, POWERPC_EXCP_DSI, POWERPC_EXCP_INVAL);   \
+    }                                                                   \
+    tcg_gen_##eop(cpu_gpr[rD(ctx->opcode)], t1);                        \
+    tcg_temp_free_##tp(t0);                                             \
+    tcg_temp_free_##tp(t1);                                             \
+    tcg_temp_free(EA);                                                  \
+}
+
+LD_ATOMIC(lwat, DEF_MEMOP(MO_UL), i32, trunc_tl_i32, extu_i32_tl)
+#if defined(TARGET_PPC64)
+LD_ATOMIC(ldat, DEF_MEMOP(MO_Q), i64, mov_i64, mov_i64)
+#endif
+
+#define ST_ATOMIC(name, memop, tp, op)                                  \
+static void gen_##name(DisasContext *ctx)                               \
+{                                                                       \
+    int len = MEMOP_GET_SIZE(memop);                                    \
+    uint32_t gpr_FC = FC(ctx->opcode);                                  \
+    TCGv EA = tcg_temp_local_new();                                     \
+    TCGv_##tp t0, t1;                                                   \
+                                                                        \
+    gen_addr_register(ctx, EA);                                         \
+    if (len > 1) {                                                      \
+        gen_check_align(ctx, EA, len - 1);                              \
+    }                                                                   \
+    t0 = tcg_temp_new_##tp();                                           \
+    t1 = tcg_temp_new_##tp();                                           \
+    tcg_gen_##op(t0, cpu_gpr[rD(ctx->opcode) + 1]);                     \
+                                                                        \
+    switch (gpr_FC) {                                                   \
+    case 0: /* add and Store */                                         \
+        tcg_gen_atomic_add_fetch_##tp(t1, EA, t0, ctx->mem_idx, memop); \
+        break;                                                          \
+    case 1: /* xor and Store */                                         \
+        tcg_gen_atomic_xor_fetch_##tp(t1, EA, t0, ctx->mem_idx, memop); \
+        break;                                                          \
+    case 2: /* Or and Store */                                          \
+        tcg_gen_atomic_or_fetch_##tp(t1, EA, t0, ctx->mem_idx, memop);  \
+        break;                                                          \
+    case 3: /* 'and' and Store */                                       \
+        tcg_gen_atomic_and_fetch_##tp(t1, EA, t0, ctx->mem_idx, memop); \
+        break;                                                          \
+    case 4:  /* Store max unsigned */                                   \
+    case 5:  /* Store max signed */                                     \
+    case 6:  /* Store min unsigned */                                   \
+    case 7:  /* Store min signed */                                     \
+    case 24: /* Store twin  */                                          \
+        gen_invalid(ctx);                                               \
+        break;                                                          \
+    default:                                                            \
+        /* invoke data storage error handler */                         \
+        gen_exception_err(ctx, POWERPC_EXCP_DSI, POWERPC_EXCP_INVAL);   \
+    }                                                                   \
+    tcg_temp_free_##tp(t0);                                             \
+    tcg_temp_free_##tp(t1);                                             \
+    tcg_temp_free(EA);                                                  \
+}
+
+ST_ATOMIC(stwat, DEF_MEMOP(MO_UL), i32, trunc_tl_i32)
+#if defined(TARGET_PPC64)
+ST_ATOMIC(stdat, DEF_MEMOP(MO_Q), i64, mov_i64)
+#endif
+
 #if defined(CONFIG_USER_ONLY)
 static void gen_conditional_store(DisasContext *ctx, TCGv EA,
                                   int reg, int memop)
@@ -3596,7 +3759,7 @@ static void gen_tdi(DisasContext *ctx)
 
 /***                          Processor control                            ***/
 
-static void gen_read_xer(TCGv dst)
+static void gen_read_xer(DisasContext *ctx, TCGv dst)
 {
     TCGv t0 = tcg_temp_new();
     TCGv t1 = tcg_temp_new();
@@ -3608,6 +3771,12 @@ static void gen_read_xer(TCGv dst)
     tcg_gen_or_tl(t0, t0, t1);
     tcg_gen_or_tl(dst, dst, t2);
     tcg_gen_or_tl(dst, dst, t0);
+    if (is_isa300(ctx)) {
+        tcg_gen_shli_tl(t0, cpu_ov32, XER_OV32);
+        tcg_gen_or_tl(dst, dst, t0);
+        tcg_gen_shli_tl(t0, cpu_ca32, XER_CA32);
+        tcg_gen_or_tl(dst, dst, t0);
+    }
     tcg_temp_free(t0);
     tcg_temp_free(t1);
     tcg_temp_free(t2);
@@ -3615,14 +3784,16 @@ static void gen_read_xer(TCGv dst)
 
 static void gen_write_xer(TCGv src)
 {
+    /* Write all flags, while reading back check for isa300 */
     tcg_gen_andi_tl(cpu_xer, src,
-                    ~((1u << XER_SO) | (1u << XER_OV) | (1u << XER_CA)));
-    tcg_gen_shri_tl(cpu_so, src, XER_SO);
-    tcg_gen_shri_tl(cpu_ov, src, XER_OV);
-    tcg_gen_shri_tl(cpu_ca, src, XER_CA);
-    tcg_gen_andi_tl(cpu_so, cpu_so, 1);
-    tcg_gen_andi_tl(cpu_ov, cpu_ov, 1);
-    tcg_gen_andi_tl(cpu_ca, cpu_ca, 1);
+                    ~((1u << XER_SO) |
+                      (1u << XER_OV) | (1u << XER_OV32) |
+                      (1u << XER_CA) | (1u << XER_CA32)));
+    tcg_gen_extract_tl(cpu_ov32, src, XER_OV32, 1);
+    tcg_gen_extract_tl(cpu_ca32, src, XER_CA32, 1);
+    tcg_gen_extract_tl(cpu_so, src, XER_SO, 1);
+    tcg_gen_extract_tl(cpu_ov, src, XER_OV, 1);
+    tcg_gen_extract_tl(cpu_ca, src, XER_CA, 1);
 }
 
 /* mcrxr */
@@ -3648,6 +3819,28 @@ static void gen_mcrxr(DisasContext *ctx)
     tcg_gen_movi_tl(cpu_ca, 0);
 }
 
+#ifdef TARGET_PPC64
+/* mcrxrx */
+static void gen_mcrxrx(DisasContext *ctx)
+{
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    TCGv_i32 dst = cpu_crf[crfD(ctx->opcode)];
+
+    /* copy OV and OV32 */
+    tcg_gen_shli_tl(t0, cpu_ov, 1);
+    tcg_gen_or_tl(t0, t0, cpu_ov32);
+    tcg_gen_shli_tl(t0, t0, 2);
+    /* copy CA and CA32 */
+    tcg_gen_shli_tl(t1, cpu_ca, 1);
+    tcg_gen_or_tl(t1, t1, cpu_ca32);
+    tcg_gen_or_tl(t0, t0, t1);
+    tcg_gen_trunc_tl_i32(dst, t0);
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+#endif
+
 /* mfcr mfocrf */
 static void gen_mfcr(DisasContext *ctx)
 {
@@ -4377,6 +4570,30 @@ static void gen_slbie(DisasContext *ctx)
     gen_helper_slbie(cpu_env, cpu_gpr[rB(ctx->opcode)]);
 #endif /* defined(CONFIG_USER_ONLY) */
 }
+
+/* slbieg */
+static void gen_slbieg(DisasContext *ctx)
+{
+#if defined(CONFIG_USER_ONLY)
+    GEN_PRIV;
+#else
+    CHK_SV;
+
+    gen_helper_slbieg(cpu_env, cpu_gpr[rB(ctx->opcode)]);
+#endif /* defined(CONFIG_USER_ONLY) */
+}
+
+/* slbsync */
+static void gen_slbsync(DisasContext *ctx)
+{
+#if defined(CONFIG_USER_ONLY)
+    GEN_PRIV;
+#else
+    CHK_SV;
+    gen_check_tlb_flush(ctx, true);
+#endif /* defined(CONFIG_USER_ONLY) */
+}
+
 #endif  /* defined(TARGET_PPC64) */
 
 /***                              External control                         ***/
@@ -6025,6 +6242,19 @@ static inline void gen_cp_abort(DisasContext *ctx)
     // Do Nothing
 }
 
+#define GEN_CP_PASTE_NOOP(name)                           \
+static inline void gen_##name(DisasContext *ctx)          \
+{                                                         \
+    /* Generate invalid exception until                   \
+     * we have an implementation of the copy              \
+     * paste facility                                     \
+     */                                                   \
+    gen_invalid(ctx);                                     \
+}
+
+GEN_CP_PASTE_NOOP(copy)
+GEN_CP_PASTE_NOOP(paste)
+
 static void gen_tcheck(DisasContext *ctx)
 {
     if (unlikely(!ctx->tm_enabled)) {
@@ -6174,7 +6404,9 @@ GEN_HANDLER2(andi_, "andi.", 0x1C, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
 GEN_HANDLER2(andis_, "andis.", 0x1D, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
 GEN_HANDLER(cntlzw, 0x1F, 0x1A, 0x00, 0x00000000, PPC_INTEGER),
 GEN_HANDLER_E(cnttzw, 0x1F, 0x1A, 0x10, 0x00000000, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(copy, 0x1F, 0x06, 0x18, 0x03C00001, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(cp_abort, 0x1F, 0x06, 0x1A, 0x03FFF801, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(paste, 0x1F, 0x06, 0x1C, 0x03C00000, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER(or, 0x1F, 0x1C, 0x0D, 0x00000000, PPC_INTEGER),
 GEN_HANDLER(xor, 0x1F, 0x1C, 0x09, 0x00000000, PPC_INTEGER),
 GEN_HANDLER(ori, 0x18, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
@@ -6230,10 +6462,14 @@ GEN_HANDLER(isync, 0x13, 0x16, 0x04, 0x03FFF801, PPC_MEM),
 GEN_HANDLER_E(lbarx, 0x1F, 0x14, 0x01, 0, PPC_NONE, PPC2_ATOMIC_ISA206),
 GEN_HANDLER_E(lharx, 0x1F, 0x14, 0x03, 0, PPC_NONE, PPC2_ATOMIC_ISA206),
 GEN_HANDLER(lwarx, 0x1F, 0x14, 0x00, 0x00000000, PPC_RES),
+GEN_HANDLER_E(lwat, 0x1F, 0x06, 0x12, 0x00000001, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(stwat, 0x1F, 0x06, 0x16, 0x00000001, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(stbcx_, 0x1F, 0x16, 0x15, 0, PPC_NONE, PPC2_ATOMIC_ISA206),
 GEN_HANDLER_E(sthcx_, 0x1F, 0x16, 0x16, 0, PPC_NONE, PPC2_ATOMIC_ISA206),
 GEN_HANDLER2(stwcx_, "stwcx.", 0x1F, 0x16, 0x04, 0x00000000, PPC_RES),
 #if defined(TARGET_PPC64)
+GEN_HANDLER_E(ldat, 0x1F, 0x06, 0x13, 0x00000001, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(stdat, 0x1F, 0x06, 0x17, 0x00000001, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER(ldarx, 0x1F, 0x14, 0x02, 0x00000000, PPC_64B),
 GEN_HANDLER_E(lqarx, 0x1F, 0x14, 0x08, 0, PPC_NONE, PPC2_LSQ_ISA207),
 GEN_HANDLER2(stdcx_, "stdcx.", 0x1F, 0x16, 0x06, 0x00000000, PPC_64B),
@@ -6241,6 +6477,7 @@ GEN_HANDLER_E(stqcx_, 0x1F, 0x16, 0x05, 0, PPC_NONE, PPC2_LSQ_ISA207),
 #endif
 GEN_HANDLER(sync, 0x1F, 0x16, 0x12, 0x039FF801, PPC_MEM_SYNC),
 GEN_HANDLER(wait, 0x1F, 0x1E, 0x01, 0x03FFF801, PPC_WAIT),
+GEN_HANDLER_E(wait, 0x1F, 0x1E, 0x00, 0x039FF801, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER(b, 0x12, 0xFF, 0xFF, 0x00000000, PPC_FLOW),
 GEN_HANDLER(bc, 0x10, 0xFF, 0xFF, 0x00000000, PPC_FLOW),
 GEN_HANDLER(bcctr, 0x13, 0x10, 0x10, 0x00000000, PPC_FLOW),
@@ -6273,6 +6510,7 @@ GEN_HANDLER(mtcrf, 0x1F, 0x10, 0x04, 0x00000801, PPC_MISC),
 #if defined(TARGET_PPC64)
 GEN_HANDLER(mtmsrd, 0x1F, 0x12, 0x05, 0x001EF801, PPC_64B),
 GEN_HANDLER_E(setb, 0x1F, 0x00, 0x04, 0x0003F801, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(mcrxrx, 0x1F, 0x00, 0x12, 0x007FF801, PPC_NONE, PPC2_ISA300),
 #endif
 GEN_HANDLER(mtmsr, 0x1F, 0x12, 0x04, 0x001EF801, PPC_MISC),
 GEN_HANDLER(mtspr, 0x1F, 0x13, 0x0E, 0x00000000, PPC_MISC),
@@ -6313,6 +6551,8 @@ GEN_HANDLER(tlbsync, 0x1F, 0x16, 0x11, 0x03FFF801, PPC_MEM_TLBSYNC),
 #if defined(TARGET_PPC64)
 GEN_HANDLER(slbia, 0x1F, 0x12, 0x0F, 0x031FFC01, PPC_SLBI),
 GEN_HANDLER(slbie, 0x1F, 0x12, 0x0D, 0x03FF0001, PPC_SLBI),
+GEN_HANDLER_E(slbieg, 0x1F, 0x12, 0x0E, 0x001F0001, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(slbsync, 0x1F, 0x12, 0x0A, 0x03FFF801, PPC_NONE, PPC2_ISA300),
 #endif
 GEN_HANDLER(eciwx, 0x1F, 0x16, 0x0D, 0x00000001, PPC_EXTERN),
 GEN_HANDLER(ecowx, 0x1F, 0x16, 0x09, 0x00000001, PPC_EXTERN),
diff --git a/target/ppc/translate/vsx-impl.inc.c b/target/ppc/translate/vsx-impl.inc.c
index a44c0034a8..7f12908029 100644
--- a/target/ppc/translate/vsx-impl.inc.c
+++ b/target/ppc/translate/vsx-impl.inc.c
@@ -808,6 +808,10 @@ GEN_VSX_HELPER_2(xscmpoqp, 0x04, 0x04, 0, PPC2_VSX)
 GEN_VSX_HELPER_2(xscmpuqp, 0x04, 0x14, 0, PPC2_VSX)
 GEN_VSX_HELPER_2(xsmaxdp, 0x00, 0x14, 0, PPC2_VSX)
 GEN_VSX_HELPER_2(xsmindp, 0x00, 0x15, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsmaxcdp, 0x00, 0x10, 0, PPC2_ISA300)
+GEN_VSX_HELPER_2(xsmincdp, 0x00, 0x11, 0, PPC2_ISA300)
+GEN_VSX_HELPER_2(xsmaxjdp, 0x00, 0x12, 0, PPC2_ISA300)
+GEN_VSX_HELPER_2(xsminjdp, 0x00, 0x12, 0, PPC2_ISA300)
 GEN_VSX_HELPER_2(xscvdphp, 0x16, 0x15, 0x11, PPC2_ISA300)
 GEN_VSX_HELPER_2(xscvdpsp, 0x12, 0x10, 0, PPC2_VSX)
 GEN_VSX_HELPER_2(xscvdpqp, 0x04, 0x1A, 0x16, PPC2_ISA300)
@@ -815,6 +819,8 @@ GEN_VSX_HELPER_XT_XB_ENV(xscvdpspn, 0x16, 0x10, 0, PPC2_VSX207)
 GEN_VSX_HELPER_2(xscvqpdp, 0x04, 0x1A, 0x14, PPC2_ISA300)
 GEN_VSX_HELPER_2(xscvqpsdz, 0x04, 0x1A, 0x19, PPC2_ISA300)
 GEN_VSX_HELPER_2(xscvqpswz, 0x04, 0x1A, 0x09, PPC2_ISA300)
+GEN_VSX_HELPER_2(xscvqpudz, 0x04, 0x1A, 0x11, PPC2_ISA300)
+GEN_VSX_HELPER_2(xscvqpuwz, 0x04, 0x1A, 0x01, PPC2_ISA300)
 GEN_VSX_HELPER_2(xscvhpdp, 0x16, 0x15, 0x10, PPC2_ISA300)
 GEN_VSX_HELPER_2(xscvsdqp, 0x04, 0x1A, 0x0A, PPC2_ISA300)
 GEN_VSX_HELPER_2(xscvspdp, 0x12, 0x14, 0, PPC2_VSX)
@@ -833,6 +839,11 @@ GEN_VSX_HELPER_2(xsrdpip, 0x12, 0x06, 0, PPC2_VSX)
 GEN_VSX_HELPER_2(xsrdpiz, 0x12, 0x05, 0, PPC2_VSX)
 GEN_VSX_HELPER_XT_XB_ENV(xsrsp, 0x12, 0x11, 0, PPC2_VSX207)
 
+GEN_VSX_HELPER_2(xsrqpi, 0x05, 0x00, 0, PPC2_ISA300)
+GEN_VSX_HELPER_2(xsrqpxp, 0x05, 0x01, 0, PPC2_ISA300)
+GEN_VSX_HELPER_2(xssqrtqp, 0x04, 0x19, 0x1B, PPC2_ISA300)
+GEN_VSX_HELPER_2(xssubqp, 0x04, 0x10, 0, PPC2_ISA300)
+
 GEN_VSX_HELPER_2(xsaddsp, 0x00, 0x00, 0, PPC2_VSX207)
 GEN_VSX_HELPER_2(xssubsp, 0x00, 0x01, 0, PPC2_VSX207)
 GEN_VSX_HELPER_2(xsmulsp, 0x00, 0x02, 0, PPC2_VSX207)
diff --git a/target/ppc/translate/vsx-ops.inc.c b/target/ppc/translate/vsx-ops.inc.c
index 7dc9f6f477..5030c4aceb 100644
--- a/target/ppc/translate/vsx-ops.inc.c
+++ b/target/ppc/translate/vsx-ops.inc.c
@@ -103,6 +103,21 @@ GEN_HANDLER_E(name, 0x3F, opc2, opc3, inval, PPC_NONE, PPC2_ISA300)
 #define GEN_VSX_XFORM_300_EO(name, opc2, opc3, opc4, inval)             \
 GEN_HANDLER_E_2(name, 0x3F, opc2, opc3, opc4, inval, PPC_NONE, PPC2_ISA300)
 
+#define GEN_VSX_Z23FORM_300(name, opc2, opc3, opc4, inval) \
+GEN_VSX_XFORM_300_EO(name, opc2, opc3 | 0x00, opc4 | 0x0, inval), \
+GEN_VSX_XFORM_300_EO(name, opc2, opc3 | 0x08, opc4 | 0x0, inval), \
+GEN_VSX_XFORM_300_EO(name, opc2, opc3 | 0x10, opc4 | 0x0, inval), \
+GEN_VSX_XFORM_300_EO(name, opc2, opc3 | 0x18, opc4 | 0x0, inval), \
+GEN_VSX_XFORM_300_EO(name, opc2, opc3 | 0x00, opc4 | 0x1, inval), \
+GEN_VSX_XFORM_300_EO(name, opc2, opc3 | 0x08, opc4 | 0x1, inval), \
+GEN_VSX_XFORM_300_EO(name, opc2, opc3 | 0x10, opc4 | 0x1, inval), \
+GEN_VSX_XFORM_300_EO(name, opc2, opc3 | 0x18, opc4 | 0x1, inval)
+
+GEN_VSX_Z23FORM_300(xsrqpi, 0x05, 0x0, 0x0, 0x0),
+GEN_VSX_Z23FORM_300(xsrqpxp, 0x05, 0x1, 0x0, 0x0),
+GEN_VSX_XFORM_300_EO(xssqrtqp, 0x04, 0x19, 0x1B, 0x0),
+GEN_VSX_XFORM_300(xssubqp, 0x04, 0x10, 0x0),
+
 GEN_XX2FORM(xsabsdp, 0x12, 0x15, PPC2_VSX),
 GEN_XX2FORM(xsnabsdp, 0x12, 0x16, PPC2_VSX),
 GEN_XX2FORM(xsnegdp, 0x12, 0x17, PPC2_VSX),
@@ -116,6 +131,8 @@ GEN_VSX_XFORM_300_EO(xscvdpqp, 0x04, 0x1A, 0x16, 0x00000001),
 GEN_VSX_XFORM_300_EO(xscvqpdp, 0x04, 0x1A, 0x14, 0x0),
 GEN_VSX_XFORM_300_EO(xscvqpsdz, 0x04, 0x1A, 0x19, 0x00000001),
 GEN_VSX_XFORM_300_EO(xscvqpswz, 0x04, 0x1A, 0x09, 0x00000001),
+GEN_VSX_XFORM_300_EO(xscvqpudz, 0x04, 0x1A, 0x11, 0x00000001),
+GEN_VSX_XFORM_300_EO(xscvqpuwz, 0x04, 0x1A, 0x01, 0x00000001),
 
 #ifdef TARGET_PPC64
 GEN_XX2FORM_EO(xsxexpdp, 0x16, 0x15, 0x00, PPC2_ISA300),
@@ -185,6 +202,10 @@ GEN_VSX_XFORM_300(xscmpoqp, 0x04, 0x04, 0x00600001),
 GEN_VSX_XFORM_300(xscmpuqp, 0x04, 0x14, 0x00600001),
 GEN_XX3FORM(xsmaxdp, 0x00, 0x14, PPC2_VSX),
 GEN_XX3FORM(xsmindp, 0x00, 0x15, PPC2_VSX),
+GEN_XX3FORM(xsmaxcdp, 0x00, 0x10, PPC2_ISA300),
+GEN_XX3FORM(xsmincdp, 0x00, 0x11, PPC2_ISA300),
+GEN_XX3FORM(xsmaxjdp, 0x00, 0x12, PPC2_ISA300),
+GEN_XX3FORM(xsminjdp, 0x00, 0x13, PPC2_ISA300),
 GEN_XX2FORM_EO(xscvdphp, 0x16, 0x15, 0x11, PPC2_ISA300),
 GEN_XX2FORM(xscvdpsp, 0x12, 0x10, PPC2_VSX),
 GEN_XX2FORM(xscvdpspn, 0x16, 0x10, PPC2_VSX207),
diff --git a/target/ppc/translate_init.c b/target/ppc/translate_init.c
index 76f79fa77b..37f74be984 100644
--- a/target/ppc/translate_init.c
+++ b/target/ppc/translate_init.c
@@ -107,7 +107,7 @@ static void spr_access_nop(DisasContext *ctx, int sprn, int gprn)
 /* XER */
 static void spr_read_xer (DisasContext *ctx, int gprn, int sprn)
 {
-    gen_read_xer(cpu_gpr[gprn]);
+    gen_read_xer(ctx, cpu_gpr[gprn]);
 }
 
 static void spr_write_xer (DisasContext *ctx, int sprn, int gprn)
@@ -740,10 +740,22 @@ static void gen_spr_ne_601 (CPUPPCState *env)
                  &spr_read_decr, &spr_write_decr,
                  0x00000000);
     /* Memory management */
-    spr_register(env, SPR_SDR1, "SDR1",
-                 SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_sdr1,
-                 0x00000000);
+#ifndef CONFIG_USER_ONLY
+    if (env->has_hv_mode) {
+        /* SDR1 is a hypervisor resource on CPUs which have a
+         * hypervisor mode */
+        spr_register_hv(env, SPR_SDR1, "SDR1",
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        SPR_NOACCESS, SPR_NOACCESS,
+                        &spr_read_generic, &spr_write_sdr1,
+                        0x00000000);
+    } else {
+        spr_register(env, SPR_SDR1, "SDR1",
+                     SPR_NOACCESS, SPR_NOACCESS,
+                     &spr_read_generic, &spr_write_sdr1,
+                     0x00000000);
+    }
+#endif
 }
 
 /* BATs 0-3 */
@@ -8816,8 +8828,7 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data)
                     (1ull << MSR_PMM) |
                     (1ull << MSR_RI) |
                     (1ull << MSR_LE);
-    /* Using 2.07 defines until new radix model is added. */
-    pcc->mmu_model = POWERPC_MMU_2_07;
+    pcc->mmu_model = POWERPC_MMU_3_00;
 #if defined(CONFIG_SOFTMMU)
     pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
     /* segment page size remain the same */
@@ -8836,18 +8847,14 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data)
 }
 
 #if !defined(CONFIG_USER_ONLY)
-
-void cpu_ppc_set_vhyp(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp)
-{
-    cpu->vhyp = vhyp;
-}
-
-void cpu_ppc_set_papr(PowerPCCPU *cpu)
+void cpu_ppc_set_papr(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp)
 {
     CPUPPCState *env = &cpu->env;
     ppc_spr_t *lpcr = &env->spr_cb[SPR_LPCR];
     ppc_spr_t *amor = &env->spr_cb[SPR_AMOR];
 
+    cpu->vhyp = vhyp;
+
     /* PAPR always has exception vectors in RAM not ROM. To ensure this,
      * MSR[IP] should never be set.
      *
@@ -8871,12 +8878,24 @@ void cpu_ppc_set_papr(PowerPCCPU *cpu)
     lpcr->default_value &= ~LPCR_RMLS;
     lpcr->default_value |= 1ull << LPCR_RMLS_SHIFT;
 
-    /* P7 and P8 has slightly different PECE bits, mostly because P8 adds
-     * bit 47 and 48 which are reserved on P7. Here we set them all, which
-     * will work as expected for both implementations
-     */
-    lpcr->default_value |= LPCR_P8_PECE0 | LPCR_P8_PECE1 | LPCR_P8_PECE2 |
-                           LPCR_P8_PECE3 | LPCR_P8_PECE4;
+    switch (env->mmu_model) {
+    case POWERPC_MMU_3_00:
+        /* By default we choose legacy mode and switch to new hash or radix
+         * when a register process table hcall is made. So disable process
+         * tables and guest translation shootdown by default
+         */
+        lpcr->default_value &= ~(LPCR_UPRT | LPCR_GTSE);
+        lpcr->default_value |= LPCR_PDEE | LPCR_HDEE | LPCR_EEE | LPCR_DEE |
+                               LPCR_OEE;
+        break;
+    default:
+        /* P7 and P8 has slightly different PECE bits, mostly because P8 adds
+         * bit 47 and 48 which are reserved on P7. Here we set them all, which
+         * will work as expected for both implementations
+         */
+        lpcr->default_value |= LPCR_P8_PECE0 | LPCR_P8_PECE1 | LPCR_P8_PECE2 |
+                               LPCR_P8_PECE3 | LPCR_P8_PECE4;
+    }
 
     /* We should be followed by a CPU reset but update the active value
      * just in case...
@@ -10478,11 +10497,12 @@ static void ppc_cpu_class_init(ObjectClass *oc, void *data)
 #else
     cc->get_phys_page_debug = ppc_cpu_get_phys_page_debug;
     cc->vmsd = &vmstate_ppc_cpu;
-#if defined(TARGET_PPC64)
-    cc->write_elf64_note = ppc64_cpu_write_elf64_note;
-#endif
 #endif
     cc->cpu_exec_enter = ppc_cpu_exec_enter;
+#if defined(CONFIG_SOFTMMU)
+    cc->write_elf64_note = ppc64_cpu_write_elf64_note;
+    cc->write_elf32_note = ppc32_cpu_write_elf32_note;
+#endif
 
     cc->gdb_num_core_regs = 71;
 
diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c
index 4731869f6b..105ae9a5d8 100644
--- a/target/s390x/arch_dump.c
+++ b/target/s390x/arch_dump.c
@@ -59,8 +59,7 @@ typedef struct S390xElfVregsHiStruct S390xElfVregsHi;
 
 typedef struct noteStruct {
     Elf64_Nhdr hdr;
-    char name[5];
-    char pad3[3];
+    char name[8];
     union {
         S390xElfPrstatus prstatus;
         S390xElfFpregset fpregset;
@@ -74,7 +73,7 @@ typedef struct noteStruct {
     } contents;
 } QEMU_PACKED Note;
 
-static void s390x_write_elf64_prstatus(Note *note, S390CPU *cpu)
+static void s390x_write_elf64_prstatus(Note *note, S390CPU *cpu, int id)
 {
     int i;
     S390xUserRegs *regs;
@@ -88,9 +87,10 @@ static void s390x_write_elf64_prstatus(Note *note, S390CPU *cpu)
         regs->acrs[i] = cpu_to_be32(cpu->env.aregs[i]);
         regs->gprs[i] = cpu_to_be64(cpu->env.regs[i]);
     }
+    note->contents.prstatus.pid = id;
 }
 
-static void s390x_write_elf64_fpregset(Note *note, S390CPU *cpu)
+static void s390x_write_elf64_fpregset(Note *note, S390CPU *cpu, int id)
 {
     int i;
     CPUS390XState *cs = &cpu->env;
@@ -102,7 +102,7 @@ static void s390x_write_elf64_fpregset(Note *note, S390CPU *cpu)
     }
 }
 
-static void s390x_write_elf64_vregslo(Note *note, S390CPU *cpu)
+static void s390x_write_elf64_vregslo(Note *note, S390CPU *cpu,  int id)
 {
     int i;
 
@@ -112,7 +112,7 @@ static void s390x_write_elf64_vregslo(Note *note, S390CPU *cpu)
     }
 }
 
-static void s390x_write_elf64_vregshi(Note *note, S390CPU *cpu)
+static void s390x_write_elf64_vregshi(Note *note, S390CPU *cpu, int id)
 {
     int i;
     S390xElfVregsHi *temp_vregshi;
@@ -126,25 +126,25 @@ static void s390x_write_elf64_vregshi(Note *note, S390CPU *cpu)
     }
 }
 
-static void s390x_write_elf64_timer(Note *note, S390CPU *cpu)
+static void s390x_write_elf64_timer(Note *note, S390CPU *cpu, int id)
 {
     note->hdr.n_type = cpu_to_be32(NT_S390_TIMER);
     note->contents.timer = cpu_to_be64((uint64_t)(cpu->env.cputm));
 }
 
-static void s390x_write_elf64_todcmp(Note *note, S390CPU *cpu)
+static void s390x_write_elf64_todcmp(Note *note, S390CPU *cpu, int id)
 {
     note->hdr.n_type = cpu_to_be32(NT_S390_TODCMP);
     note->contents.todcmp = cpu_to_be64((uint64_t)(cpu->env.ckc));
 }
 
-static void s390x_write_elf64_todpreg(Note *note, S390CPU *cpu)
+static void s390x_write_elf64_todpreg(Note *note, S390CPU *cpu, int id)
 {
     note->hdr.n_type = cpu_to_be32(NT_S390_TODPREG);
     note->contents.todpreg = cpu_to_be32((uint32_t)(cpu->env.todpr));
 }
 
-static void s390x_write_elf64_ctrs(Note *note, S390CPU *cpu)
+static void s390x_write_elf64_ctrs(Note *note, S390CPU *cpu, int id)
 {
     int i;
 
@@ -155,20 +155,26 @@ static void s390x_write_elf64_ctrs(Note *note, S390CPU *cpu)
     }
 }
 
-static void s390x_write_elf64_prefix(Note *note, S390CPU *cpu)
+static void s390x_write_elf64_prefix(Note *note, S390CPU *cpu, int id)
 {
     note->hdr.n_type = cpu_to_be32(NT_S390_PREFIX);
     note->contents.prefix = cpu_to_be32((uint32_t)(cpu->env.psa));
 }
 
 
-static const struct NoteFuncDescStruct {
+typedef struct NoteFuncDescStruct {
     int contents_size;
-    void (*note_contents_func)(Note *note, S390CPU *cpu);
-} note_func[] = {
+    void (*note_contents_func)(Note *note, S390CPU *cpu, int id);
+} NoteFuncDesc;
+
+static const NoteFuncDesc note_core[] = {
     {sizeof(((Note *)0)->contents.prstatus), s390x_write_elf64_prstatus},
-    {sizeof(((Note *)0)->contents.prefix),   s390x_write_elf64_prefix},
     {sizeof(((Note *)0)->contents.fpregset), s390x_write_elf64_fpregset},
+    { 0, NULL}
+};
+
+static const NoteFuncDesc note_linux[] = {
+    {sizeof(((Note *)0)->contents.prefix),   s390x_write_elf64_prefix},
     {sizeof(((Note *)0)->contents.ctrs),     s390x_write_elf64_ctrs},
     {sizeof(((Note *)0)->contents.timer),    s390x_write_elf64_timer},
     {sizeof(((Note *)0)->contents.todcmp),   s390x_write_elf64_todcmp},
@@ -178,25 +184,23 @@ static const struct NoteFuncDescStruct {
     { 0, NULL}
 };
 
-typedef struct NoteFuncDescStruct NoteFuncDesc;
-
-
-static int s390x_write_all_elf64_notes(const char *note_name,
+static int s390x_write_elf64_notes(const char *note_name,
                                        WriteCoreDumpFunction f,
                                        S390CPU *cpu, int id,
-                                       void *opaque)
+                                       void *opaque,
+                                       const NoteFuncDesc *funcs)
 {
     Note note;
     const NoteFuncDesc *nf;
     int note_size;
     int ret = -1;
 
-    for (nf = note_func; nf->note_contents_func; nf++) {
+    for (nf = funcs; nf->note_contents_func; nf++) {
         memset(&note, 0, sizeof(note));
-        note.hdr.n_namesz = cpu_to_be32(sizeof(note.name));
+        note.hdr.n_namesz = cpu_to_be32(strlen(note_name) + 1);
         note.hdr.n_descsz = cpu_to_be32(nf->contents_size);
         strncpy(note.name, note_name, sizeof(note.name));
-        (*nf->note_contents_func)(&note, cpu);
+        (*nf->note_contents_func)(&note, cpu, id);
 
         note_size = sizeof(note) - sizeof(note.contents) + nf->contents_size;
         ret = f(&note, note_size, opaque);
@@ -215,7 +219,13 @@ int s390_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs,
                               int cpuid, void *opaque)
 {
     S390CPU *cpu = S390_CPU(cs);
-    return s390x_write_all_elf64_notes("CORE", f, cpu, cpuid, opaque);
+    int r;
+
+    r = s390x_write_elf64_notes("CORE", f, cpu, cpuid, opaque, note_core);
+    if (r) {
+        return r;
+    }
+    return s390x_write_elf64_notes("LINUX", f, cpu, cpuid, opaque, note_linux);
 }
 
 int cpu_get_dump_info(ArchDumpInfo *info,
@@ -230,7 +240,7 @@ int cpu_get_dump_info(ArchDumpInfo *info,
 
 ssize_t cpu_get_note_size(int class, int machine, int nr_cpus)
 {
-    int name_size = 8; /* "CORE" or "QEMU" rounded */
+    int name_size = 8; /* "LINUX" or "CORE" + pad */
     size_t elf_note_size = 0;
     int note_head_size;
     const NoteFuncDesc *nf;
@@ -240,7 +250,11 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus)
 
     note_head_size = sizeof(Elf64_Nhdr);
 
-    for (nf = note_func; nf->note_contents_func; nf++) {
+    for (nf = note_core; nf->note_contents_func; nf++) {
+        elf_note_size = elf_note_size + note_head_size + name_size +
+                        nf->contents_size;
+    }
+    for (nf = note_linux; nf->note_contents_func; nf++) {
         elf_note_size = elf_note_size + note_head_size + name_size +
                         nf->contents_size;
     }
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index 5b66d3325d..2a894eec65 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -671,7 +671,7 @@ static S390CPUModel *get_max_cpu_model(Error **errp)
     if (kvm_enabled()) {
         kvm_s390_get_host_cpu_model(&max_model, errp);
     } else {
-        /* TCG enulates a z900 */
+        /* TCG emulates a z900 */
         max_model.def = &s390_cpu_defs[0];
         bitmap_copy(max_model.features, max_model.def->default_feat,
                     S390_FEAT_MAX);
diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index 25367807f4..5ec050cf89 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -1867,6 +1867,40 @@ static void unmanageable_intercept(S390CPU *cpu, const char *str, int pswoffset)
     qemu_system_guest_panicked(NULL);
 }
 
+/* try to detect pgm check loops */
+static int handle_oper_loop(S390CPU *cpu, struct kvm_run *run)
+{
+    CPUState *cs = CPU(cpu);
+    PSW oldpsw, newpsw;
+
+    cpu_synchronize_state(cs);
+    newpsw.mask = ldq_phys(cs->as, cpu->env.psa +
+                           offsetof(LowCore, program_new_psw));
+    newpsw.addr = ldq_phys(cs->as, cpu->env.psa +
+                           offsetof(LowCore, program_new_psw) + 8);
+    oldpsw.mask  = run->psw_mask;
+    oldpsw.addr  = run->psw_addr;
+    /*
+     * Avoid endless loops of operation exceptions, if the pgm new
+     * PSW will cause a new operation exception.
+     * The heuristic checks if the pgm new psw is within 6 bytes before
+     * the faulting psw address (with same DAT, AS settings) and the
+     * new psw is not a wait psw and the fault was not triggered by
+     * problem state. In that case go into crashed state.
+     */
+
+    if (oldpsw.addr - newpsw.addr <= 6 &&
+        !(newpsw.mask & PSW_MASK_WAIT) &&
+        !(oldpsw.mask & PSW_MASK_PSTATE) &&
+        (newpsw.mask & PSW_MASK_ASC) == (oldpsw.mask & PSW_MASK_ASC) &&
+        (newpsw.mask & PSW_MASK_DAT) == (oldpsw.mask & PSW_MASK_DAT)) {
+        unmanageable_intercept(cpu, "operation exception loop",
+                               offsetof(LowCore, program_new_psw));
+        return EXCP_HALTED;
+    }
+    return 0;
+}
+
 static int handle_intercept(S390CPU *cpu)
 {
     CPUState *cs = CPU(cpu);
@@ -1914,11 +1948,14 @@ static int handle_intercept(S390CPU *cpu)
             r = EXCP_HALTED;
             break;
         case ICPT_OPEREXC:
-            /* currently only instr 0x0000 after enabled via capability */
+            /* check for break points */
             r = handle_sw_breakpoint(cpu, run);
             if (r == -ENOENT) {
-                enter_pgmcheck(cpu, PGM_OPERATION);
-                r = 0;
+                /* Then check for potential pgm check loops */
+                r = handle_oper_loop(cpu, run);
+                if (r == 0) {
+                    enter_pgmcheck(cpu, PGM_OPERATION);
+                }
             }
             break;
         case ICPT_SOFT_INTERCEPT:
diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c
index c9604ea9c7..3cb942e8bb 100644
--- a/target/s390x/misc_helper.c
+++ b/target/s390x/misc_helper.c
@@ -25,6 +25,7 @@
 #include "exec/helper-proto.h"
 #include "sysemu/kvm.h"
 #include "qemu/timer.h"
+#include "qemu/main-loop.h"
 #include "exec/address-spaces.h"
 #ifdef CONFIG_KVM
 #include <linux/kvm.h>
@@ -109,11 +110,13 @@ void program_interrupt(CPUS390XState *env, uint32_t code, int ilen)
 /* SCLP service call */
 uint32_t HELPER(servc)(CPUS390XState *env, uint64_t r1, uint64_t r2)
 {
+    qemu_mutex_lock_iothread();
     int r = sclp_service_call(env, r1, r2);
     if (r < 0) {
         program_interrupt(env, -r, 4);
-        return 0;
+        r = 0;
     }
+    qemu_mutex_unlock_iothread();
     return r;
 }
 
diff --git a/target/sh4/monitor.c b/target/sh4/monitor.c
index 426e5d4914..4c7f36c9cc 100644
--- a/target/sh4/monitor.c
+++ b/target/sh4/monitor.c
@@ -44,6 +44,11 @@ void hmp_info_tlb(Monitor *mon, const QDict *qdict)
     CPUArchState *env = mon_get_cpu_env();
     int i;
 
+    if (!env) {
+        monitor_printf(mon, "No CPU available\n");
+        return;
+    }
+
     monitor_printf (mon, "ITLB:\n");
     for (i = 0 ; i < ITLB_SIZE ; i++)
         print_tlb (mon, i, &env->itlb[i]);
diff --git a/target/sparc/ldst_helper.c b/target/sparc/ldst_helper.c
index 2c05d6af75..57968d9143 100644
--- a/target/sparc/ldst_helper.c
+++ b/target/sparc/ldst_helper.c
@@ -1768,13 +1768,15 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, target_ulong val,
           case 1:
               env->dmmu.mmu_primary_context = val;
               env->immu.mmu_primary_context = val;
-              tlb_flush_by_mmuidx(CPU(cpu), MMU_USER_IDX, MMU_KERNEL_IDX, -1);
+              tlb_flush_by_mmuidx(CPU(cpu),
+                                  (1 << MMU_USER_IDX) | (1 << MMU_KERNEL_IDX));
               break;
           case 2:
               env->dmmu.mmu_secondary_context = val;
               env->immu.mmu_secondary_context = val;
-              tlb_flush_by_mmuidx(CPU(cpu), MMU_USER_SECONDARY_IDX,
-                                  MMU_KERNEL_SECONDARY_IDX, -1);
+              tlb_flush_by_mmuidx(CPU(cpu),
+                                  (1 << MMU_USER_SECONDARY_IDX) |
+                                  (1 << MMU_KERNEL_SECONDARY_IDX));
               break;
           default:
               cpu_unassigned_access(cs, addr, true, false, 1, size);
diff --git a/target/sparc/monitor.c b/target/sparc/monitor.c
index 7cc1b0f87f..f3ca524ae9 100644
--- a/target/sparc/monitor.c
+++ b/target/sparc/monitor.c
@@ -32,6 +32,10 @@ void hmp_info_tlb(Monitor *mon, const QDict *qdict)
 {
     CPUArchState *env1 = mon_get_cpu_env();
 
+    if (!env1) {
+        monitor_printf(mon, "No CPU available\n");
+        return;
+    }
     dump_mmu((FILE*)mon, (fprintf_function)monitor_printf, env1);
 }
 
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
index 655060cd9a..aa6734d54e 100644
--- a/target/sparc/translate.c
+++ b/target/sparc/translate.c
@@ -2448,8 +2448,31 @@ static void gen_ldstub_asi(DisasContext *dc, TCGv dst, TCGv addr, int insn)
         gen_ldstub(dc, dst, addr, da.mem_idx);
         break;
     default:
-        /* ??? Should be DAE_invalid_asi.  */
-        gen_exception(dc, TT_DATA_ACCESS);
+        /* ??? In theory, this should be raise DAE_invalid_asi.
+           But the SS-20 roms do ldstuba [%l0] #ASI_M_CTL, %o1.  */
+        if (parallel_cpus) {
+            gen_helper_exit_atomic(cpu_env);
+        } else {
+            TCGv_i32 r_asi = tcg_const_i32(da.asi);
+            TCGv_i32 r_mop = tcg_const_i32(MO_UB);
+            TCGv_i64 s64, t64;
+
+            save_state(dc);
+            t64 = tcg_temp_new_i64();
+            gen_helper_ld_asi(t64, cpu_env, addr, r_asi, r_mop);
+
+            s64 = tcg_const_i64(0xff);
+            gen_helper_st_asi(cpu_env, addr, s64, r_asi, r_mop);
+            tcg_temp_free_i64(s64);
+            tcg_temp_free_i32(r_mop);
+            tcg_temp_free_i32(r_asi);
+
+            tcg_gen_trunc_i64_tl(dst, t64);
+            tcg_temp_free_i64(t64);
+
+            /* End the TB.  */
+            dc->npc = DYNAMIC_PC;
+        }
         break;
     }
 }
diff --git a/target/xtensa/monitor.c b/target/xtensa/monitor.c
index f3fa4cd278..2ee2b5b23e 100644
--- a/target/xtensa/monitor.c
+++ b/target/xtensa/monitor.c
@@ -31,5 +31,9 @@ void hmp_info_tlb(Monitor *mon, const QDict *qdict)
 {
     CPUArchState *env1 = mon_get_cpu_env();
 
+    if (!env1) {
+        monitor_printf(mon, "No CPU available\n");
+        return;
+    }
     dump_mmu((FILE*)mon, (fprintf_function)monitor_printf, env1);
 }
diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
index 6d227a5a6a..290de6dae6 100644
--- a/tcg/aarch64/tcg-target.inc.c
+++ b/tcg/aarch64/tcg-target.inc.c
@@ -866,7 +866,7 @@ static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
     }
 }
 
-static void tcg_out_brcond(TCGContext *s, TCGMemOp ext, TCGCond c, TCGArg a,
+static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
                            TCGArg b, bool b_const, TCGLabel *l)
 {
     intptr_t offset;
@@ -937,7 +937,7 @@ static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
     }
 }
 
-static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl,
+static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
                                    TCGReg rh, TCGReg al, TCGReg ah,
                                    tcg_target_long bl, tcg_target_long bh,
                                    bool const_bl, bool const_bh, bool sub)
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 21d96ec35c..4275787db9 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -165,4 +165,15 @@ static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
 {
 }
 
+/* This defines the natural memory order supported by this
+ * architecture before guarantees made by various barrier
+ * instructions.
+ *
+ * The x86 has a pretty strong memory ordering which only really
+ * allows for some stores to be re-ordered after loads.
+ */
+#include "tcg-mo.h"
+
+#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
+
 #endif
diff --git a/tcg/tcg-mo.h b/tcg/tcg-mo.h
new file mode 100644
index 0000000000..c2c55704e1
--- /dev/null
+++ b/tcg/tcg-mo.h
@@ -0,0 +1,48 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef TCG_MO_H
+#define TCG_MO_H
+
+typedef enum {
+    /* Used to indicate the type of accesses on which ordering
+       is to be ensured.  Modeled after SPARC barriers.
+
+       This is of the form TCG_MO_A_B where A is before B in program order.
+    */
+    TCG_MO_LD_LD  = 0x01,
+    TCG_MO_ST_LD  = 0x02,
+    TCG_MO_LD_ST  = 0x04,
+    TCG_MO_ST_ST  = 0x08,
+    TCG_MO_ALL    = 0x0F,  /* OR of the above */
+
+    /* Used to indicate the kind of ordering which is to be ensured by the
+       instruction.  These types are derived from x86/aarch64 instructions.
+       It should be noted that these are different from C11 semantics.  */
+    TCG_BAR_LDAQ  = 0x10,  /* Following ops will not come forward */
+    TCG_BAR_STRL  = 0x20,  /* Previous ops will not be delayed */
+    TCG_BAR_SC    = 0x30,  /* No ops cross barrier; OR of the above */
+} TCGBar;
+
+#endif /* TCG_MO_H */
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 167aa30254..6c216bb73f 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -29,6 +29,7 @@
 #include "cpu.h"
 #include "exec/tb-context.h"
 #include "qemu/bitops.h"
+#include "tcg-mo.h"
 #include "tcg-target.h"
 
 /* XXX: make safe guess about sizes */
@@ -79,6 +80,15 @@ typedef uint64_t tcg_target_ulong;
 #error unsupported
 #endif
 
+/* Oversized TCG guests make things like MTTCG hard
+ * as we can't use atomics for cputlb updates.
+ */
+#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
+#define TCG_OVERSIZED_GUEST 1
+#else
+#define TCG_OVERSIZED_GUEST 0
+#endif
+
 #if TCG_TARGET_NB_REGS <= 32
 typedef uint32_t TCGRegSet;
 #elif TCG_TARGET_NB_REGS <= 64
@@ -498,23 +508,6 @@ static inline intptr_t QEMU_ARTIFICIAL GET_TCGV_PTR(TCGv_ptr t)
 #define TCG_CALL_DUMMY_TCGV     MAKE_TCGV_I32(-1)
 #define TCG_CALL_DUMMY_ARG      ((TCGArg)(-1))
 
-typedef enum {
-    /* Used to indicate the type of accesses on which ordering
-       is to be ensured.  Modeled after SPARC barriers.  */
-    TCG_MO_LD_LD  = 0x01,
-    TCG_MO_ST_LD  = 0x02,
-    TCG_MO_LD_ST  = 0x04,
-    TCG_MO_ST_ST  = 0x08,
-    TCG_MO_ALL    = 0x0F,  /* OR of the above */
-
-    /* Used to indicate the kind of ordering which is to be ensured by the
-       instruction.  These types are derived from x86/aarch64 instructions.
-       It should be noted that these are different from C11 semantics.  */
-    TCG_BAR_LDAQ  = 0x10,  /* Following ops will not come forward */
-    TCG_BAR_STRL  = 0x20,  /* Previous ops will not be delayed */
-    TCG_BAR_SC    = 0x30,  /* No ops cross barrier; OR of the above */
-} TCGBar;
-
 /* Conditions.  Note that these are laid out for easy manipulation by
    the functions below:
      bit 0 is used for inverting;
diff --git a/tests/Makefile.include b/tests/Makefile.include
index e60bb6ce58..364ef1bd23 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -308,8 +308,7 @@ check-qtest-sparc-y = tests/prom-env-test$(EXESUF)
 check-qtest-sparc64-y = tests/endianness-test$(EXESUF)
 #check-qtest-sparc64-y += tests/m48t59-test$(EXESUF)
 #gcov-files-sparc64-y += hw/timer/m48t59.c
-#Disabled for now, triggers a TCG bug on 32-bit hosts
-#check-qtest-sparc64-y += tests/prom-env-test$(EXESUF)
+check-qtest-sparc64-y += tests/prom-env-test$(EXESUF)
 
 check-qtest-arm-y = tests/tmp105-test$(EXESUF)
 check-qtest-arm-y += tests/ds1338-test$(EXESUF)
@@ -670,7 +669,7 @@ tests/hd-geo-test$(EXESUF): tests/hd-geo-test.o
 tests/boot-order-test$(EXESUF): tests/boot-order-test.o $(libqos-obj-y)
 tests/boot-serial-test$(EXESUF): tests/boot-serial-test.o $(libqos-obj-y)
 tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o \
-	tests/boot-sector.o $(libqos-obj-y)
+	tests/boot-sector.o tests/acpi-utils.o $(libqos-obj-y)
 tests/pxe-test$(EXESUF): tests/pxe-test.o tests/boot-sector.o $(libqos-obj-y)
 tests/tmp105-test$(EXESUF): tests/tmp105-test.o $(libqos-omap-obj-y)
 tests/ds1338-test$(EXESUF): tests/ds1338-test.o $(libqos-imx-obj-y)
diff --git a/tests/acpi-test-data/q35/DSDT b/tests/acpi-test-data/q35/DSDT
index d11567c3dc..0dccad439b 100644
--- a/tests/acpi-test-data/q35/DSDT
+++ b/tests/acpi-test-data/q35/DSDT
diff --git a/tests/acpi-test-data/q35/DSDT.bridge b/tests/acpi-test-data/q35/DSDT.bridge
index 412a6e9104..8cd66c3b31 100644
--- a/tests/acpi-test-data/q35/DSDT.bridge
+++ b/tests/acpi-test-data/q35/DSDT.bridge
diff --git a/tests/acpi-test-data/q35/DSDT.cphp b/tests/acpi-test-data/q35/DSDT.cphp
index 79902d0d30..3c28a17a69 100644
--- a/tests/acpi-test-data/q35/DSDT.cphp
+++ b/tests/acpi-test-data/q35/DSDT.cphp
diff --git a/tests/acpi-test-data/q35/DSDT.ipmibt b/tests/acpi-test-data/q35/DSDT.ipmibt
index b658329c5b..3ceb876127 100644
--- a/tests/acpi-test-data/q35/DSDT.ipmibt
+++ b/tests/acpi-test-data/q35/DSDT.ipmibt
diff --git a/tests/acpi-test-data/q35/DSDT.memhp b/tests/acpi-test-data/q35/DSDT.memhp
index e46c1fb5a2..bdbefd47a5 100644
--- a/tests/acpi-test-data/q35/DSDT.memhp
+++ b/tests/acpi-test-data/q35/DSDT.memhp
diff --git a/tests/acpi-utils.c b/tests/acpi-utils.c
new file mode 100644
index 0000000000..41dc1ea9b4
--- /dev/null
+++ b/tests/acpi-utils.c
@@ -0,0 +1,65 @@
+/*
+ * ACPI Utility Functions
+ *
+ * Copyright (c) 2013 Red Hat Inc.
+ * Copyright (c) 2017 Skyport Systems
+ *
+ * Authors:
+ *  Michael S. Tsirkin <mst@redhat.com>,
+ *  Ben Warren <ben@skyportsystems.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include <glib/gstdio.h>
+#include "qemu-common.h"
+#include "hw/smbios/smbios.h"
+#include "qemu/bitmap.h"
+#include "acpi-utils.h"
+#include "boot-sector.h"
+
+uint8_t acpi_calc_checksum(const uint8_t *data, int len)
+{
+    int i;
+    uint8_t sum = 0;
+
+    for (i = 0; i < len; i++) {
+        sum += data[i];
+    }
+
+    return sum;
+}
+
+uint32_t acpi_find_rsdp_address(void)
+{
+    uint32_t off;
+
+    /* RSDP location can vary across a narrow range */
+    for (off = 0xf0000; off < 0x100000; off += 0x10) {
+        uint8_t sig[] = "RSD PTR ";
+        int i;
+
+        for (i = 0; i < sizeof sig - 1; ++i) {
+            sig[i] = readb(off + i);
+        }
+
+        if (!memcmp(sig, "RSD PTR ", sizeof sig)) {
+            break;
+        }
+    }
+    return off;
+}
+
+void acpi_parse_rsdp_table(uint32_t addr, AcpiRsdpDescriptor *rsdp_table)
+{
+    ACPI_READ_FIELD(rsdp_table->signature, addr);
+    ACPI_ASSERT_CMP64(rsdp_table->signature, "RSD PTR ");
+
+    ACPI_READ_FIELD(rsdp_table->checksum, addr);
+    ACPI_READ_ARRAY(rsdp_table->oem_id, addr);
+    ACPI_READ_FIELD(rsdp_table->revision, addr);
+    ACPI_READ_FIELD(rsdp_table->rsdt_physical_address, addr);
+    ACPI_READ_FIELD(rsdp_table->length, addr);
+}
diff --git a/tests/acpi-utils.h b/tests/acpi-utils.h
new file mode 100644
index 0000000000..9f9a2d532c
--- /dev/null
+++ b/tests/acpi-utils.h
@@ -0,0 +1,94 @@
+/*
+ * Utilities for working with ACPI tables
+ *
+ * Copyright (c) 2013 Red Hat Inc.
+ *
+ * Authors:
+ *  Michael S. Tsirkin <mst@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef TEST_ACPI_UTILS_H
+#define TEST_ACPI_UTILS_H
+
+#include "hw/acpi/acpi-defs.h"
+#include "libqtest.h"
+
+/* DSDT and SSDTs format */
+typedef struct {
+    AcpiTableHeader header;
+    gchar *aml;            /* aml bytecode from guest */
+    gsize aml_len;
+    gchar *aml_file;
+    gchar *asl;            /* asl code generated from aml */
+    gsize asl_len;
+    gchar *asl_file;
+    bool tmp_files_retain;   /* do not delete the temp asl/aml */
+} QEMU_PACKED AcpiSdtTable;
+
+#define ACPI_READ_FIELD(field, addr)           \
+    do {                                       \
+        switch (sizeof(field)) {               \
+        case 1:                                \
+            field = readb(addr);               \
+            break;                             \
+        case 2:                                \
+            field = readw(addr);               \
+            break;                             \
+        case 4:                                \
+            field = readl(addr);               \
+            break;                             \
+        case 8:                                \
+            field = readq(addr);               \
+            break;                             \
+        default:                               \
+            g_assert(false);                   \
+        }                                      \
+        addr += sizeof(field);                  \
+    } while (0);
+
+#define ACPI_READ_ARRAY_PTR(arr, length, addr)  \
+    do {                                        \
+        int idx;                                \
+        for (idx = 0; idx < length; ++idx) {    \
+            ACPI_READ_FIELD(arr[idx], addr);    \
+        }                                       \
+    } while (0);
+
+#define ACPI_READ_ARRAY(arr, addr)                               \
+    ACPI_READ_ARRAY_PTR(arr, sizeof(arr) / sizeof(arr[0]), addr)
+
+#define ACPI_READ_TABLE_HEADER(table, addr)                      \
+    do {                                                         \
+        ACPI_READ_FIELD((table)->signature, addr);               \
+        ACPI_READ_FIELD((table)->length, addr);                  \
+        ACPI_READ_FIELD((table)->revision, addr);                \
+        ACPI_READ_FIELD((table)->checksum, addr);                \
+        ACPI_READ_ARRAY((table)->oem_id, addr);                  \
+        ACPI_READ_ARRAY((table)->oem_table_id, addr);            \
+        ACPI_READ_FIELD((table)->oem_revision, addr);            \
+        ACPI_READ_ARRAY((table)->asl_compiler_id, addr);         \
+        ACPI_READ_FIELD((table)->asl_compiler_revision, addr);   \
+    } while (0);
+
+#define ACPI_ASSERT_CMP(actual, expected) do { \
+    uint32_t ACPI_ASSERT_CMP_le = cpu_to_le32(actual); \
+    char ACPI_ASSERT_CMP_str[5] = {}; \
+    memcpy(ACPI_ASSERT_CMP_str, &ACPI_ASSERT_CMP_le, 4); \
+    g_assert_cmpstr(ACPI_ASSERT_CMP_str, ==, expected); \
+} while (0)
+
+#define ACPI_ASSERT_CMP64(actual, expected) do { \
+    uint64_t ACPI_ASSERT_CMP_le = cpu_to_le64(actual); \
+    char ACPI_ASSERT_CMP_str[9] = {}; \
+    memcpy(ACPI_ASSERT_CMP_str, &ACPI_ASSERT_CMP_le, 8); \
+    g_assert_cmpstr(ACPI_ASSERT_CMP_str, ==, expected); \
+} while (0)
+
+uint8_t acpi_calc_checksum(const uint8_t *data, int len);
+uint32_t acpi_find_rsdp_address(void);
+void acpi_parse_rsdp_table(uint32_t addr, AcpiRsdpDescriptor *rsdp_table);
+
+#endif  /* TEST_ACPI_UTILS_H */
diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c
index 54048050c0..88dbf97853 100644
--- a/tests/bios-tables-test.c
+++ b/tests/bios-tables-test.c
@@ -13,10 +13,9 @@
 #include "qemu/osdep.h"
 #include <glib/gstdio.h>
 #include "qemu-common.h"
-#include "libqtest.h"
-#include "hw/acpi/acpi-defs.h"
 #include "hw/smbios/smbios.h"
 #include "qemu/bitmap.h"
+#include "acpi-utils.h"
 #include "boot-sector.h"
 
 #define MACHINE_PC "pc"
@@ -24,18 +23,6 @@
 
 #define ACPI_REBUILD_EXPECTED_AML "TEST_ACPI_REBUILD_AML"
 
-/* DSDT and SSDTs format */
-typedef struct {
-    AcpiTableHeader header;
-    gchar *aml;            /* aml bytecode from guest */
-    gsize aml_len;
-    gchar *aml_file;
-    gchar *asl;            /* asl code generated from aml */
-    gsize asl_len;
-    gchar *asl_file;
-    bool tmp_files_retain;   /* do not delete the temp asl/aml */
-} QEMU_PACKED AcpiSdtTable;
-
 typedef struct {
     const char *machine;
     const char *variant;
@@ -53,65 +40,6 @@ typedef struct {
     int required_struct_types_len;
 } test_data;
 
-#define ACPI_READ_FIELD(field, addr)           \
-    do {                                       \
-        switch (sizeof(field)) {               \
-        case 1:                                \
-            field = readb(addr);               \
-            break;                             \
-        case 2:                                \
-            field = readw(addr);               \
-            break;                             \
-        case 4:                                \
-            field = readl(addr);               \
-            break;                             \
-        case 8:                                \
-            field = readq(addr);               \
-            break;                             \
-        default:                               \
-            g_assert(false);                   \
-        }                                      \
-        addr += sizeof(field);                  \
-    } while (0);
-
-#define ACPI_READ_ARRAY_PTR(arr, length, addr)  \
-    do {                                        \
-        int idx;                                \
-        for (idx = 0; idx < length; ++idx) {    \
-            ACPI_READ_FIELD(arr[idx], addr);    \
-        }                                       \
-    } while (0);
-
-#define ACPI_READ_ARRAY(arr, addr)                               \
-    ACPI_READ_ARRAY_PTR(arr, sizeof(arr)/sizeof(arr[0]), addr)
-
-#define ACPI_READ_TABLE_HEADER(table, addr)                      \
-    do {                                                         \
-        ACPI_READ_FIELD((table)->signature, addr);               \
-        ACPI_READ_FIELD((table)->length, addr);                  \
-        ACPI_READ_FIELD((table)->revision, addr);                \
-        ACPI_READ_FIELD((table)->checksum, addr);                \
-        ACPI_READ_ARRAY((table)->oem_id, addr);                  \
-        ACPI_READ_ARRAY((table)->oem_table_id, addr);            \
-        ACPI_READ_FIELD((table)->oem_revision, addr);            \
-        ACPI_READ_ARRAY((table)->asl_compiler_id, addr);         \
-        ACPI_READ_FIELD((table)->asl_compiler_revision, addr);   \
-    } while (0);
-
-#define ACPI_ASSERT_CMP(actual, expected) do { \
-    uint32_t ACPI_ASSERT_CMP_le = cpu_to_le32(actual); \
-    char ACPI_ASSERT_CMP_str[5] = {}; \
-    memcpy(ACPI_ASSERT_CMP_str, &ACPI_ASSERT_CMP_le, 4); \
-    g_assert_cmpstr(ACPI_ASSERT_CMP_str, ==, expected); \
-} while (0)
-
-#define ACPI_ASSERT_CMP64(actual, expected) do { \
-    uint64_t ACPI_ASSERT_CMP_le = cpu_to_le64(actual); \
-    char ACPI_ASSERT_CMP_str[9] = {}; \
-    memcpy(ACPI_ASSERT_CMP_str, &ACPI_ASSERT_CMP_le, 8); \
-    g_assert_cmpstr(ACPI_ASSERT_CMP_str, ==, expected); \
-} while (0)
-
 static char disk[] = "tests/acpi-test-disk-XXXXXX";
 static const char *data_dir = "tests/acpi-test-data";
 #ifdef CONFIG_IASL
@@ -144,39 +72,12 @@ static void free_test_data(test_data *data)
         g_free(temp->asl_file);
     }
 
-    g_array_free(data->tables, false);
-}
-
-static uint8_t acpi_checksum(const uint8_t *data, int len)
-{
-    int i;
-    uint8_t sum = 0;
-
-    for (i = 0; i < len; i++) {
-        sum += data[i];
-    }
-
-    return sum;
+    g_array_free(data->tables, true);
 }
 
 static void test_acpi_rsdp_address(test_data *data)
 {
-    uint32_t off;
-
-    /* OK, now find RSDP */
-    for (off = 0xf0000; off < 0x100000; off += 0x10) {
-        uint8_t sig[] = "RSD PTR ";
-        int i;
-
-        for (i = 0; i < sizeof sig - 1; ++i) {
-            sig[i] = readb(off + i);
-        }
-
-        if (!memcmp(sig, "RSD PTR ", sizeof sig)) {
-            break;
-        }
-    }
-
+    uint32_t off = acpi_find_rsdp_address();
     g_assert_cmphex(off, <, 0x100000);
     data->rsdp_addr = off;
 }
@@ -186,17 +87,10 @@ static void test_acpi_rsdp_table(test_data *data)
     AcpiRsdpDescriptor *rsdp_table = &data->rsdp_table;
     uint32_t addr = data->rsdp_addr;
 
-    ACPI_READ_FIELD(rsdp_table->signature, addr);
-    ACPI_ASSERT_CMP64(rsdp_table->signature, "RSD PTR ");
-
-    ACPI_READ_FIELD(rsdp_table->checksum, addr);
-    ACPI_READ_ARRAY(rsdp_table->oem_id, addr);
-    ACPI_READ_FIELD(rsdp_table->revision, addr);
-    ACPI_READ_FIELD(rsdp_table->rsdt_physical_address, addr);
-    ACPI_READ_FIELD(rsdp_table->length, addr);
+    acpi_parse_rsdp_table(addr, rsdp_table);
 
     /* rsdp checksum is not for the whole table, but for the first 20 bytes */
-    g_assert(!acpi_checksum((uint8_t *)rsdp_table, 20));
+    g_assert(!acpi_calc_checksum((uint8_t *)rsdp_table, 20));
 }
 
 static void test_acpi_rsdt_table(test_data *data)
@@ -220,8 +114,9 @@ static void test_acpi_rsdt_table(test_data *data)
     tables = g_new0(uint32_t, tables_nr);
     ACPI_READ_ARRAY_PTR(tables, tables_nr, addr);
 
-    checksum = acpi_checksum((uint8_t *)rsdt_table, rsdt_table->length) +
-               acpi_checksum((uint8_t *)tables, tables_nr * sizeof(uint32_t));
+    checksum = acpi_calc_checksum((uint8_t *)rsdt_table, rsdt_table->length) +
+               acpi_calc_checksum((uint8_t *)tables,
+                                  tables_nr * sizeof(uint32_t));
     g_assert(!checksum);
 
    /* SSDT tables after FADT */
@@ -279,7 +174,7 @@ static void test_acpi_fadt_table(test_data *data)
     ACPI_READ_FIELD(fadt_table->flags, addr);
 
     ACPI_ASSERT_CMP(fadt_table->signature, "FACP");
-    g_assert(!acpi_checksum((uint8_t *)fadt_table, fadt_table->length));
+    g_assert(!acpi_calc_checksum((uint8_t *)fadt_table, fadt_table->length));
 }
 
 static void test_acpi_facs_table(test_data *data)
@@ -308,8 +203,10 @@ static void test_dst_table(AcpiSdtTable *sdt_table, uint32_t addr)
     sdt_table->aml = g_malloc0(sdt_table->aml_len);
     ACPI_READ_ARRAY_PTR(sdt_table->aml, sdt_table->aml_len, addr);
 
-    checksum = acpi_checksum((uint8_t *)sdt_table, sizeof(AcpiTableHeader)) +
-               acpi_checksum((uint8_t *)sdt_table->aml, sdt_table->aml_len);
+    checksum = acpi_calc_checksum((uint8_t *)sdt_table,
+                                  sizeof(AcpiTableHeader)) +
+               acpi_calc_checksum((uint8_t *)sdt_table->aml,
+                                  sdt_table->aml_len);
     g_assert(!checksum);
 }
 
@@ -608,8 +505,9 @@ static bool smbios_ep_table_ok(test_data *data)
         return false;
     }
     ACPI_READ_FIELD(ep_table->smbios_bcd_revision, addr);
-    if (acpi_checksum((uint8_t *)ep_table, sizeof *ep_table) ||
-        acpi_checksum((uint8_t *)ep_table + 0x10, sizeof *ep_table - 0x10)) {
+    if (acpi_calc_checksum((uint8_t *)ep_table, sizeof *ep_table) ||
+        acpi_calc_checksum((uint8_t *)ep_table + 0x10,
+                           sizeof *ep_table - 0x10)) {
         return false;
     }
     return true;
diff --git a/tests/check-qdict.c b/tests/check-qdict.c
index 07b1c798d8..81162ee572 100644
--- a/tests/check-qdict.c
+++ b/tests/check-qdict.c
@@ -591,7 +591,6 @@ static void qdict_join_test(void)
 static void qdict_crumple_test_recursive(void)
 {
     QDict *src, *dst, *rule, *vnc, *acl, *listen;
-    QObject *child, *res;
     QList *rules;
 
     src = qdict_new();
@@ -605,40 +604,37 @@ static void qdict_crumple_test_recursive(void)
     qdict_put(src, "vnc.acl..name", qstring_from_str("acl0"));
     qdict_put(src, "vnc.acl.rule..name", qstring_from_str("acl0"));
 
-    res = qdict_crumple(src, &error_abort);
-
-    g_assert_cmpint(qobject_type(res), ==, QTYPE_QDICT);
-
-    dst = qobject_to_qdict(res);
-
+    dst = qobject_to_qdict(qdict_crumple(src, &error_abort));
+    g_assert(dst);
     g_assert_cmpint(qdict_size(dst), ==, 1);
 
-    child = qdict_get(dst, "vnc");
-    g_assert_cmpint(qobject_type(child), ==, QTYPE_QDICT);
-    vnc = qobject_to_qdict(child);
+    vnc = qdict_get_qdict(dst, "vnc");
+    g_assert(vnc);
+    g_assert_cmpint(qdict_size(vnc), ==, 3);
 
-    child = qdict_get(vnc, "listen");
-    g_assert_cmpint(qobject_type(child), ==, QTYPE_QDICT);
-    listen = qobject_to_qdict(child);
+    listen = qdict_get_qdict(vnc, "listen");
+    g_assert(listen);
+    g_assert_cmpint(qdict_size(listen), ==, 2);
     g_assert_cmpstr("127.0.0.1", ==, qdict_get_str(listen, "addr"));
     g_assert_cmpstr("5901", ==, qdict_get_str(listen, "port"));
 
-    child = qdict_get(vnc, "acl");
-    g_assert_cmpint(qobject_type(child), ==, QTYPE_QDICT);
-    acl = qobject_to_qdict(child);
+    acl = qdict_get_qdict(vnc, "acl");
+    g_assert(acl);
+    g_assert_cmpint(qdict_size(acl), ==, 3);
 
-    child = qdict_get(acl, "rules");
-    g_assert_cmpint(qobject_type(child), ==, QTYPE_QLIST);
-    rules = qobject_to_qlist(child);
+    rules = qdict_get_qlist(acl, "rules");
+    g_assert(rules);
     g_assert_cmpint(qlist_size(rules), ==, 2);
 
     rule = qobject_to_qdict(qlist_pop(rules));
+    g_assert(rule);
     g_assert_cmpint(qdict_size(rule), ==, 2);
     g_assert_cmpstr("fred", ==, qdict_get_str(rule, "match"));
     g_assert_cmpstr("allow", ==, qdict_get_str(rule, "policy"));
     QDECREF(rule);
 
     rule = qobject_to_qdict(qlist_pop(rules));
+    g_assert(rule);
     g_assert_cmpint(qdict_size(rule), ==, 2);
     g_assert_cmpstr("bob", ==, qdict_get_str(rule, "match"));
     g_assert_cmpstr("deny", ==, qdict_get_str(rule, "policy"));
@@ -646,9 +642,6 @@ static void qdict_crumple_test_recursive(void)
 
     /* With recursive crumpling, we should see all names unescaped */
     g_assert_cmpstr("acl0", ==, qdict_get_str(vnc, "acl.name"));
-    child = qdict_get(vnc, "acl");
-    g_assert_cmpint(qobject_type(child), ==, QTYPE_QDICT);
-    acl = qdict_get_qdict(vnc, "acl");
     g_assert_cmpstr("acl0", ==, qdict_get_str(acl, "rule.name"));
 
     QDECREF(src);
diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index 0b21a22e10..e6d6935653 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -54,11 +54,8 @@ static void escaped_string(void)
         QString *str;
 
         obj = qobject_from_json(test_cases[i].encoded);
-
-        g_assert(obj != NULL);
-        g_assert(qobject_type(obj) == QTYPE_QSTRING);
-        
         str = qobject_to_qstring(obj);
+        g_assert(str);
         g_assert_cmpstr(qstring_get_str(str), ==, test_cases[i].decoded);
 
         if (test_cases[i].skip == 0) {
@@ -89,11 +86,8 @@ static void simple_string(void)
         QString *str;
 
         obj = qobject_from_json(test_cases[i].encoded);
-
-        g_assert(obj != NULL);
-        g_assert(qobject_type(obj) == QTYPE_QSTRING);
-        
         str = qobject_to_qstring(obj);
+        g_assert(str);
         g_assert(strcmp(qstring_get_str(str), test_cases[i].decoded) == 0);
 
         str = qobject_to_json(obj);
@@ -123,11 +117,8 @@ static void single_quote_string(void)
         QString *str;
 
         obj = qobject_from_json(test_cases[i].encoded);
-
-        g_assert(obj != NULL);
-        g_assert(qobject_type(obj) == QTYPE_QSTRING);
-        
         str = qobject_to_qstring(obj);
+        g_assert(str);
         g_assert(strcmp(qstring_get_str(str), test_cases[i].decoded) == 0);
 
         QDECREF(str);
@@ -820,9 +811,8 @@ static void utf8_string(void)
 
         obj = qobject_from_json(json_in);
         if (utf8_out) {
-            g_assert(obj);
-            g_assert(qobject_type(obj) == QTYPE_QSTRING);
             str = qobject_to_qstring(obj);
+            g_assert(str);
             g_assert_cmpstr(qstring_get_str(str), ==, utf8_out);
         } else {
             g_assert(!obj);
@@ -847,9 +837,8 @@ static void utf8_string(void)
          */
         if (0 && json_out != json_in) {
             obj = qobject_from_json(json_out);
-            g_assert(obj);
-            g_assert(qobject_type(obj) == QTYPE_QSTRING);
             str = qobject_to_qstring(obj);
+            g_assert(str);
             g_assert_cmpstr(qstring_get_str(str), ==, utf8_out);
         }
     }
@@ -867,15 +856,11 @@ static void vararg_string(void)
     };
 
     for (i = 0; test_cases[i].decoded; i++) {
-        QObject *obj;
         QString *str;
 
-        obj = qobject_from_jsonf("%s", test_cases[i].decoded);
-
-        g_assert(obj != NULL);
-        g_assert(qobject_type(obj) == QTYPE_QSTRING);
-        
-        str = qobject_to_qstring(obj);
+        str = qobject_to_qstring(qobject_from_jsonf("%s",
+                                                    test_cases[i].decoded));
+        g_assert(str);
         g_assert(strcmp(qstring_get_str(str), test_cases[i].decoded) == 0);
 
         QDECREF(str);
@@ -899,19 +884,15 @@ static void simple_number(void)
     };
 
     for (i = 0; test_cases[i].encoded; i++) {
-        QObject *obj;
         QInt *qint;
 
-        obj = qobject_from_json(test_cases[i].encoded);
-        g_assert(obj != NULL);
-        g_assert(qobject_type(obj) == QTYPE_QINT);
-
-        qint = qobject_to_qint(obj);
+        qint = qobject_to_qint(qobject_from_json(test_cases[i].encoded));
+        g_assert(qint);
         g_assert(qint_get_int(qint) == test_cases[i].decoded);
         if (test_cases[i].skip == 0) {
             QString *str;
 
-            str = qobject_to_json(obj);
+            str = qobject_to_json(QOBJECT(qint));
             g_assert(strcmp(qstring_get_str(str), test_cases[i].encoded) == 0);
             QDECREF(str);
         }
@@ -940,10 +921,8 @@ static void float_number(void)
         QFloat *qfloat;
 
         obj = qobject_from_json(test_cases[i].encoded);
-        g_assert(obj != NULL);
-        g_assert(qobject_type(obj) == QTYPE_QFLOAT);
-
         qfloat = qobject_to_qfloat(obj);
+        g_assert(qfloat);
         g_assert(qfloat_get_double(qfloat) == test_cases[i].decoded);
 
         if (test_cases[i].skip == 0) {
@@ -960,38 +939,22 @@ static void float_number(void)
 
 static void vararg_number(void)
 {
-    QObject *obj;
     QInt *qint;
     QFloat *qfloat;
     int value = 0x2342;
     long long value_ll = 0x2342342343LL;
     double valuef = 2.323423423;
 
-    obj = qobject_from_jsonf("%d", value);
-    g_assert(obj != NULL);
-    g_assert(qobject_type(obj) == QTYPE_QINT);
-
-    qint = qobject_to_qint(obj);
+    qint = qobject_to_qint(qobject_from_jsonf("%d", value));
     g_assert(qint_get_int(qint) == value);
-
     QDECREF(qint);
 
-    obj = qobject_from_jsonf("%lld", value_ll);
-    g_assert(obj != NULL);
-    g_assert(qobject_type(obj) == QTYPE_QINT);
-
-    qint = qobject_to_qint(obj);
+    qint = qobject_to_qint(qobject_from_jsonf("%lld", value_ll));
     g_assert(qint_get_int(qint) == value_ll);
-
     QDECREF(qint);
 
-    obj = qobject_from_jsonf("%f", valuef);
-    g_assert(obj != NULL);
-    g_assert(qobject_type(obj) == QTYPE_QFLOAT);
-
-    qfloat = qobject_to_qfloat(obj);
+    qfloat = qobject_to_qfloat(qobject_from_jsonf("%f", valuef));
     g_assert(qfloat_get_double(qfloat) == valuef);
-
     QDECREF(qfloat);
 }
 
@@ -1003,10 +966,8 @@ static void keyword_literal(void)
     QString *str;
 
     obj = qobject_from_json("true");
-    g_assert(obj != NULL);
-    g_assert(qobject_type(obj) == QTYPE_QBOOL);
-
     qbool = qobject_to_qbool(obj);
+    g_assert(qbool);
     g_assert(qbool_get_bool(qbool) == true);
 
     str = qobject_to_json(obj);
@@ -1016,10 +977,8 @@ static void keyword_literal(void)
     QDECREF(qbool);
 
     obj = qobject_from_json("false");
-    g_assert(obj != NULL);
-    g_assert(qobject_type(obj) == QTYPE_QBOOL);
-
     qbool = qobject_to_qbool(obj);
+    g_assert(qbool);
     g_assert(qbool_get_bool(qbool) == false);
 
     str = qobject_to_json(obj);
@@ -1028,23 +987,15 @@ static void keyword_literal(void)
 
     QDECREF(qbool);
 
-    obj = qobject_from_jsonf("%i", false);
-    g_assert(obj != NULL);
-    g_assert(qobject_type(obj) == QTYPE_QBOOL);
-
-    qbool = qobject_to_qbool(obj);
+    qbool = qobject_to_qbool(qobject_from_jsonf("%i", false));
+    g_assert(qbool);
     g_assert(qbool_get_bool(qbool) == false);
-
     QDECREF(qbool);
 
     /* Test that non-zero values other than 1 get collapsed to true */
-    obj = qobject_from_jsonf("%i", 2);
-    g_assert(obj != NULL);
-    g_assert(qobject_type(obj) == QTYPE_QBOOL);
-
-    qbool = qobject_to_qbool(obj);
+    qbool = qobject_to_qbool(qobject_from_jsonf("%i", 2));
+    g_assert(qbool);
     g_assert(qbool_get_bool(qbool) == true);
-
     QDECREF(qbool);
 
     obj = qobject_from_json("null");
@@ -1110,7 +1061,7 @@ static void compare_helper(QObject *obj, void *opaque)
 
 static int compare_litqobj_to_qobj(LiteralQObject *lhs, QObject *rhs)
 {
-    if (lhs->type != qobject_type(rhs)) {
+    if (!rhs || lhs->type != qobject_type(rhs)) {
         return 0;
     }
 
@@ -1184,18 +1135,12 @@ static void simple_dict(void)
         QString *str;
 
         obj = qobject_from_json(test_cases[i].encoded);
-        g_assert(obj != NULL);
-        g_assert(qobject_type(obj) == QTYPE_QDICT);
-
         g_assert(compare_litqobj_to_qobj(&test_cases[i].decoded, obj) == 1);
 
         str = qobject_to_json(obj);
         qobject_decref(obj);
 
         obj = qobject_from_json(qstring_get_str(str));
-        g_assert(obj != NULL);
-        g_assert(qobject_type(obj) == QTYPE_QDICT);
-
         g_assert(compare_litqobj_to_qobj(&test_cases[i].decoded, obj) == 1);
         qobject_decref(obj);
         QDECREF(str);
@@ -1299,18 +1244,12 @@ static void simple_list(void)
         QString *str;
 
         obj = qobject_from_json(test_cases[i].encoded);
-        g_assert(obj != NULL);
-        g_assert(qobject_type(obj) == QTYPE_QLIST);
-
         g_assert(compare_litqobj_to_qobj(&test_cases[i].decoded, obj) == 1);
 
         str = qobject_to_json(obj);
         qobject_decref(obj);
 
         obj = qobject_from_json(qstring_get_str(str));
-        g_assert(obj != NULL);
-        g_assert(qobject_type(obj) == QTYPE_QLIST);
-
         g_assert(compare_litqobj_to_qobj(&test_cases[i].decoded, obj) == 1);
         qobject_decref(obj);
         QDECREF(str);
@@ -1367,18 +1306,12 @@ static void simple_whitespace(void)
         QString *str;
 
         obj = qobject_from_json(test_cases[i].encoded);
-        g_assert(obj != NULL);
-        g_assert(qobject_type(obj) == QTYPE_QLIST);
-
         g_assert(compare_litqobj_to_qobj(&test_cases[i].decoded, obj) == 1);
 
         str = qobject_to_json(obj);
         qobject_decref(obj);
 
         obj = qobject_from_json(qstring_get_str(str));
-        g_assert(obj != NULL);
-        g_assert(qobject_type(obj) == QTYPE_QLIST);
-
         g_assert(compare_litqobj_to_qobj(&test_cases[i].decoded, obj) == 1);
 
         qobject_decref(obj);
@@ -1403,8 +1336,6 @@ static void simple_varargs(void)
     g_assert(embedded_obj != NULL);
 
     obj = qobject_from_jsonf("[%d, 2, %p]", 1, embedded_obj);
-    g_assert(obj != NULL);
-
     g_assert(compare_litqobj_to_qobj(&decoded, obj) == 1);
 
     qobject_decref(obj);
diff --git a/tests/docker/Makefile.include b/tests/docker/Makefile.include
index 3f15d5aea8..03eda37bf4 100644
--- a/tests/docker/Makefile.include
+++ b/tests/docker/Makefile.include
@@ -50,9 +50,14 @@ docker-image-%: $(DOCKER_FILES_DIR)/%.docker
 	$(call quiet-command,\
 		$(SRC_PATH)/tests/docker/docker.py build qemu:$* $< \
 		$(if $V,,--quiet) $(if $(NOCACHE),--no-cache) \
+		$(if $(NOUSER),,--add-current-user) \
 		$(if $(EXECUTABLE),--include-executable=$(EXECUTABLE)),\
 		"BUILD","$*")
 
+# Enforce dependancies for composite images
+docker-image-debian-armhf-cross: docker-image-debian
+docker-image-debian-arm64-cross: docker-image-debian
+
 # Expand all the pre-requistes for each docker image and test combination
 $(foreach i,$(DOCKER_IMAGES), \
 	$(foreach t,$(DOCKER_TESTS) $(DOCKER_TOOLS), \
@@ -99,6 +104,7 @@ docker:
 	@echo '                         (default is 1)'
 	@echo '    DEBUG=1              Stop and drop to shell in the created container'
 	@echo '                         before running the command.'
+	@echo '    NOUSER               Define to disable adding current user to containers passwd.'
 	@echo '    NOCACHE=1            Ignore cache when build images.'
 	@echo '    EXECUTABLE=<path>    Include executable in image.'
 
diff --git a/tests/docker/common.rc b/tests/docker/common.rc
index 21657e87c6..6865689bb5 100755
--- a/tests/docker/common.rc
+++ b/tests/docker/common.rc
@@ -29,7 +29,7 @@ build_qemu()
     config_opts="--enable-werror \
                  ${TARGET_LIST:+--target-list=${TARGET_LIST}} \
                  --prefix=$PWD/install \
-                 $EXTRA_CONFIGURE_OPTS \
+                 $QEMU_CONFIGURE_OPTS $EXTRA_CONFIGURE_OPTS \
                  $@"
     echo "Configure options:"
     echo $config_opts
diff --git a/tests/docker/docker.py b/tests/docker/docker.py
index 37d83199e7..9fd32ab5fa 100755
--- a/tests/docker/docker.py
+++ b/tests/docker/docker.py
@@ -25,6 +25,7 @@ import signal
 from tarfile import TarFile, TarInfo
 from StringIO import StringIO
 from shutil import copy, rmtree
+from pwd import getpwuid
 
 
 DEVNULL = open(os.devnull, 'wb')
@@ -149,13 +150,21 @@ class Docker(object):
         labels = json.loads(resp)[0]["Config"].get("Labels", {})
         return labels.get("com.qemu.dockerfile-checksum", "")
 
-    def build_image(self, tag, docker_dir, dockerfile, quiet=True, argv=None):
+    def build_image(self, tag, docker_dir, dockerfile,
+                    quiet=True, user=False, argv=None):
         if argv == None:
             argv = []
 
         tmp_df = tempfile.NamedTemporaryFile(dir=docker_dir, suffix=".docker")
         tmp_df.write(dockerfile)
 
+        if user:
+            uid = os.getuid()
+            uname = getpwuid(uid).pw_name
+            tmp_df.write("\n")
+            tmp_df.write("RUN id %s 2>/dev/null || useradd -u %d -U %s" %
+                         (uname, uid, uname))
+
         tmp_df.write("\n")
         tmp_df.write("LABEL com.qemu.dockerfile-checksum=%s" %
                      _text_checksum(dockerfile))
@@ -225,6 +234,9 @@ class BuildCommand(SubCommand):
                             help="""Specify a binary that will be copied to the
                             container together with all its dependent
                             libraries""")
+        parser.add_argument("--add-current-user", "-u", dest="user",
+                            action="store_true",
+                            help="Add the current user to image's passwd")
         parser.add_argument("tag",
                             help="Image Tag")
         parser.add_argument("dockerfile",
@@ -261,7 +273,7 @@ class BuildCommand(SubCommand):
                                        docker_dir)
 
             dkr.build_image(tag, docker_dir, dockerfile,
-                            quiet=args.quiet, argv=argv)
+                            quiet=args.quiet, user=args.user, argv=argv)
 
             rmtree(docker_dir)
 
diff --git a/tests/docker/dockerfiles/debian-arm64-cross.docker b/tests/docker/dockerfiles/debian-arm64-cross.docker
new file mode 100644
index 0000000000..592b5d7055
--- /dev/null
+++ b/tests/docker/dockerfiles/debian-arm64-cross.docker
@@ -0,0 +1,15 @@
+#
+# Docker arm64 cross-compiler target
+#
+# This docker target builds on the base debian image.
+#
+FROM qemu:debian
+
+# Add the foreign architecture we want and install dependencies
+RUN dpkg --add-architecture arm64
+RUN apt update
+RUN apt install -yy crossbuild-essential-arm64
+RUN apt-get build-dep -yy -a arm64 qemu
+
+# Specify the cross prefix for this image (see tests/docker/common.rc)
+ENV QEMU_CONFIGURE_OPTS --cross-prefix=aarch64-linux-gnu-
diff --git a/tests/docker/dockerfiles/debian-armhf-cross.docker b/tests/docker/dockerfiles/debian-armhf-cross.docker
new file mode 100644
index 0000000000..668d60aeb3
--- /dev/null
+++ b/tests/docker/dockerfiles/debian-armhf-cross.docker
@@ -0,0 +1,15 @@
+#
+# Docker armhf cross-compiler target
+#
+# This docker target builds on the base debian image.
+#
+FROM qemu:debian
+
+# Add the foreign architecture we want and install dependencies
+RUN dpkg --add-architecture armhf
+RUN apt update
+RUN apt install -yy crossbuild-essential-armhf
+RUN apt-get build-dep -yy -a armhf qemu
+
+# Specify the cross prefix for this image (see tests/docker/common.rc)
+ENV QEMU_CONFIGURE_OPTS --cross-prefix=arm-linux-gnueabihf-
diff --git a/tests/docker/dockerfiles/debian-s390x-cross.docker b/tests/docker/dockerfiles/debian-s390x-cross.docker
new file mode 100644
index 0000000000..bbb21ed088
--- /dev/null
+++ b/tests/docker/dockerfiles/debian-s390x-cross.docker
@@ -0,0 +1,22 @@
+#
+# Docker s390 cross-compiler target
+#
+# This docker target is based on stretch (testing) as the stable build
+# doesn't have the cross compiler available.
+#
+FROM debian:testing-slim
+
+# Duplicate deb line as deb-src
+RUN cat /etc/apt/sources.list | sed "s/deb/deb-src/" >> /etc/apt/sources.list
+
+# Add the s390x architecture
+RUN dpkg --add-architecture s390x
+
+# Grab the updated list of packages
+RUN apt update
+RUN apt dist-upgrade -yy
+RUN apt-get build-dep -yy -a s390x qemu || apt-get -f install
+RUN apt install -yy gcc-multilib-s390x-linux-gnu binutils-multiarch
+
+# Specify the cross prefix for this image (see tests/docker/common.rc)
+ENV QEMU_CONFIGURE_OPTS --cross-prefix=s390x-linux-gnu-
diff --git a/tests/docker/dockerfiles/debian.docker b/tests/docker/dockerfiles/debian.docker
new file mode 100644
index 0000000000..52bd79938e
--- /dev/null
+++ b/tests/docker/dockerfiles/debian.docker
@@ -0,0 +1,25 @@
+#
+# Docker multiarch cross-compiler target
+#
+# This docker target is builds on Debian and Emdebian's cross compiler targets
+# to build distro with a selection of cross compilers for building test binaries.
+#
+# On its own you can't build much but the docker-foo-cross targets
+# build on top of the base debian image.
+#
+FROM debian:stable-slim
+
+# Setup some basic tools we need
+RUN apt update
+RUN apt install -yy curl aptitude
+
+# Setup Emdebian
+RUN echo "deb http://emdebian.org/tools/debian/ jessie main" >> /etc/apt/sources.list
+RUN curl http://emdebian.org/tools/debian/emdebian-toolchain-archive.key | apt-key add -
+
+# Duplicate deb line as deb-src
+RUN cat /etc/apt/sources.list | sed "s/deb/deb-src/" >> /etc/apt/sources.list
+
+# Install common build utilities
+RUN apt update
+RUN apt install -yy build-essential clang
diff --git a/tests/docker/dockerfiles/fedora.docker b/tests/docker/dockerfiles/fedora.docker
index 478163b8d8..c4f80ad3d8 100644
--- a/tests/docker/dockerfiles/fedora.docker
+++ b/tests/docker/dockerfiles/fedora.docker
@@ -1,6 +1,6 @@
 FROM fedora:latest
 ENV PACKAGES \
-    ccache git tar PyYAML sparse flex bison \
+    ccache git tar PyYAML sparse flex bison python2 \
     glib2-devel pixman-devel zlib-devel SDL-devel libfdt-devel \
     gcc gcc-c++ clang make perl which bc findutils \
     mingw32-pixman mingw32-glib2 mingw32-gmp mingw32-SDL mingw32-pkg-config \
diff --git a/tests/e1000-test.c b/tests/e1000-test.c
index 59cab68a60..0c5fcdcc44 100644
--- a/tests/e1000-test.c
+++ b/tests/e1000-test.c
@@ -44,6 +44,7 @@ int main(int argc, char **argv)
 
         path = g_strdup_printf("e1000/%s", models[i]);
         qtest_add_data_func(path, models[i], test_device);
+        g_free(path);
     }
 
     return g_test_run();
diff --git a/tests/e1000e-test.c b/tests/e1000e-test.c
index 8c42ca919f..c612dc64ec 100644
--- a/tests/e1000e-test.c
+++ b/tests/e1000e-test.c
@@ -99,7 +99,10 @@ static QPCIBus *test_bus;
 
 static void e1000e_pci_foreach_callback(QPCIDevice *dev, int devfn, void *data)
 {
-    *(QPCIDevice **) data = dev;
+    QPCIDevice **res = data;
+
+    g_assert_null(*res);
+    *res = dev;
 }
 
 static QPCIDevice *e1000e_device_find(QPCIBus *bus)
@@ -403,6 +406,7 @@ static void data_test_clear(e1000e_device *d)
     e1000e_device_clear(test_bus, d);
     close(test_sockets[0]);
     pc_alloc_uninit(test_alloc);
+    g_free(d->pci_dev);
     qpci_free_pc(test_bus);
     qtest_end();
 }
diff --git a/tests/eepro100-test.c b/tests/eepro100-test.c
index ed23258b0f..bdc8a67d57 100644
--- a/tests/eepro100-test.c
+++ b/tests/eepro100-test.c
@@ -54,6 +54,7 @@ int main(int argc, char **argv)
 
         path = g_strdup_printf("eepro100/%s", models[i]);
         qtest_add_data_func(path, models[i], test_device);
+        g_free(path);
     }
 
     return g_test_run();
diff --git a/tests/endianness-test.c b/tests/endianness-test.c
index cf8d41b7b4..ed0bf52019 100644
--- a/tests/endianness-test.c
+++ b/tests/endianness-test.c
@@ -295,14 +295,17 @@ int main(int argc, char **argv)
         path = g_strdup_printf("endianness/%s",
                                test_cases[i].machine);
         qtest_add_data_func(path, &test_cases[i], test_endianness);
+        g_free(path);
 
         path = g_strdup_printf("endianness/split/%s",
                                test_cases[i].machine);
         qtest_add_data_func(path, &test_cases[i], test_endianness_split);
+        g_free(path);
 
         path = g_strdup_printf("endianness/combine/%s",
                                test_cases[i].machine);
         qtest_add_data_func(path, &test_cases[i], test_endianness_combine);
+        g_free(path);
     }
 
     return g_test_run();
diff --git a/tests/hd-geo-test.c b/tests/hd-geo-test.c
index 6176e81ab2..24870b38f4 100644
--- a/tests/hd-geo-test.c
+++ b/tests/hd-geo-test.c
@@ -19,6 +19,8 @@
 #include "qemu-common.h"
 #include "libqtest.h"
 
+#define ARGV_SIZE 256
+
 static char *create_test_img(int secs)
 {
     char *template = strdup("/tmp/qtest.XXXXXX");
@@ -66,7 +68,7 @@ static const CHST hd_chst[backend_last][mbr_last] = {
     },
 };
 
-static const char *img_file_name[backend_last];
+static char *img_file_name[backend_last];
 
 static const CHST *cur_ide[4];
 
@@ -234,28 +236,36 @@ static int setup_ide(int argc, char *argv[], int argv_sz,
  */
 static void test_ide_none(void)
 {
-    char *argv[256];
-
-    setup_common(argv, ARRAY_SIZE(argv));
-    qtest_start(g_strjoinv(" ", argv));
+    char **argv = g_new0(char *, ARGV_SIZE);
+    char *args;
+
+    setup_common(argv, ARGV_SIZE);
+    args = g_strjoinv(" ", argv);
+    qtest_start(args);
+    g_strfreev(argv);
+    g_free(args);
     test_cmos();
     qtest_end();
 }
 
 static void test_ide_mbr(bool use_device, MBRcontents mbr)
 {
-    char *argv[256];
+    char **argv = g_new0(char *, ARGV_SIZE);
+    char *args;
     int argc;
     Backend i;
     const char *dev;
 
-    argc = setup_common(argv, ARRAY_SIZE(argv));
+    argc = setup_common(argv, ARGV_SIZE);
     for (i = 0; i < backend_last; i++) {
         cur_ide[i] = &hd_chst[i][mbr];
         dev = use_device ? (is_hd(cur_ide[i]) ? "ide-hd" : "ide-cd") : NULL;
-        argc = setup_ide(argc, argv, ARRAY_SIZE(argv), i, dev, i, mbr, "");
+        argc = setup_ide(argc, argv, ARGV_SIZE, i, dev, i, mbr, "");
     }
-    qtest_start(g_strjoinv(" ", argv));
+    args = g_strjoinv(" ", argv);
+    qtest_start(args);
+    g_strfreev(argv);
+    g_free(args);
     test_cmos();
     qtest_end();
 }
@@ -310,12 +320,13 @@ static void test_ide_device_mbr_chs(void)
 
 static void test_ide_drive_user(const char *dev, bool trans)
 {
-    char *argv[256], *opts;
+    char **argv = g_new0(char *, ARGV_SIZE);
+    char *args, *opts;
     int argc;
     int secs = img_secs[backend_small];
     const CHST expected_chst = { secs / (4 * 32) , 4, 32, trans };
 
-    argc = setup_common(argv, ARRAY_SIZE(argv));
+    argc = setup_common(argv, ARGV_SIZE);
     opts = g_strdup_printf("%s,%s%scyls=%d,heads=%d,secs=%d",
                            dev ?: "",
                            trans && dev ? "bios-chs-" : "",
@@ -323,11 +334,14 @@ static void test_ide_drive_user(const char *dev, bool trans)
                            expected_chst.cyls, expected_chst.heads,
                            expected_chst.secs);
     cur_ide[0] = &expected_chst;
-    argc = setup_ide(argc, argv, ARRAY_SIZE(argv),
+    argc = setup_ide(argc, argv, ARGV_SIZE,
                      0, dev ? opts : NULL, backend_small, mbr_chs,
                      dev ? "" : opts);
     g_free(opts);
-    qtest_start(g_strjoinv(" ", argv));
+    args = g_strjoinv(" ", argv);
+    qtest_start(args);
+    g_strfreev(argv);
+    g_free(args);
     test_cmos();
     qtest_end();
 }
@@ -369,18 +383,22 @@ static void test_ide_device_user_chst(void)
  */
 static void test_ide_drive_cd_0(void)
 {
-    char *argv[256];
+    char **argv = g_new0(char *, ARGV_SIZE);
+    char *args;
     int argc, ide_idx;
     Backend i;
 
-    argc = setup_common(argv, ARRAY_SIZE(argv));
+    argc = setup_common(argv, ARGV_SIZE);
     for (i = 0; i <= backend_empty; i++) {
         ide_idx = backend_empty - i;
         cur_ide[ide_idx] = &hd_chst[i][mbr_blank];
-        argc = setup_ide(argc, argv, ARRAY_SIZE(argv),
+        argc = setup_ide(argc, argv, ARGV_SIZE,
                          ide_idx, NULL, i, mbr_blank, "");
     }
-    qtest_start(g_strjoinv(" ", argv));
+    args = g_strjoinv(" ", argv);
+    qtest_start(args);
+    g_strfreev(argv);
+    g_free(args);
     test_cmos();
     qtest_end();
 }
@@ -418,6 +436,7 @@ int main(int argc, char **argv)
     for (i = 0; i < backend_last; i++) {
         if (img_file_name[i]) {
             unlink(img_file_name[i]);
+            free(img_file_name[i]);
         }
     }
 
diff --git a/tests/i440fx-test.c b/tests/i440fx-test.c
index da2d5a53f0..e9d05c87d1 100644
--- a/tests/i440fx-test.c
+++ b/tests/i440fx-test.c
@@ -134,6 +134,8 @@ static void test_i440fx_defaults(gconstpointer opaque)
     /* 3.2.26 */
     g_assert_cmpint(qpci_config_readb(dev, 0x93), ==, 0x00); /* TRC */
 
+    g_free(dev);
+    qpci_free_pc(bus);
     qtest_end();
 }
 
@@ -270,6 +272,9 @@ static void test_i440fx_pam(gconstpointer opaque)
         /* Verify the area is not our new mask */
         g_assert(!verify_area(pam_area[i].start, pam_area[i].end, 0x82));
     }
+
+    g_free(dev);
+    qpci_free_pc(bus);
     qtest_end();
 }
 
diff --git a/tests/ide-test.c b/tests/ide-test.c
index fb541f88b5..139ebc0ec6 100644
--- a/tests/ide-test.c
+++ b/tests/ide-test.c
@@ -339,6 +339,7 @@ static void test_bmdma_simple_rw(void)
     g_assert(memcmp(buf, cmpbuf, len) == 0);
 
 
+    free_pci_device(dev);
     g_free(buf);
     g_free(cmpbuf);
 }
@@ -369,6 +370,7 @@ static void test_bmdma_short_prdt(void)
                               prdt, ARRAY_SIZE(prdt), NULL);
     g_assert_cmphex(status, ==, 0);
     assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
+    free_pci_device(dev);
 }
 
 static void test_bmdma_one_sector_short_prdt(void)
@@ -398,6 +400,7 @@ static void test_bmdma_one_sector_short_prdt(void)
                               prdt, ARRAY_SIZE(prdt), NULL);
     g_assert_cmphex(status, ==, 0);
     assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
+    free_pci_device(dev);
 }
 
 static void test_bmdma_long_prdt(void)
@@ -426,6 +429,7 @@ static void test_bmdma_long_prdt(void)
                               prdt, ARRAY_SIZE(prdt), NULL);
     g_assert_cmphex(status, ==, BM_STS_INTR);
     assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
+    free_pci_device(dev);
 }
 
 static void test_bmdma_no_busmaster(void)
@@ -449,6 +453,7 @@ static void test_bmdma_no_busmaster(void)
      * in practice. At least we want to be aware of any changes. */
     g_assert_cmphex(status, ==, BM_STS_ACTIVE | BM_STS_INTR);
     assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
+    free_pci_device(dev);
 }
 
 static void test_bmdma_setup(void)
@@ -525,6 +530,7 @@ static void test_identify(void)
     assert_bit_set(buf[85], 0x20);
 
     ide_test_quit();
+    free_pci_device(dev);
 }
 
 /*
@@ -544,6 +550,7 @@ static void make_dirty(uint8_t device)
 
     guest_buf = guest_alloc(guest_malloc, len);
     buf = g_malloc(len);
+    memset(buf, rand() % 255 + 1, len);
     g_assert(guest_buf);
     g_assert(buf);
 
@@ -562,6 +569,7 @@ static void make_dirty(uint8_t device)
     assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
 
     g_free(buf);
+    free_pci_device(dev);
 }
 
 static void test_flush(void)
@@ -608,6 +616,7 @@ static void test_flush(void)
     assert_bit_clear(data, BSY | DF | ERR | DRQ);
 
     ide_test_quit();
+    free_pci_device(dev);
 }
 
 static void test_retry_flush(const char *machine)
@@ -658,6 +667,7 @@ static void test_retry_flush(const char *machine)
     assert_bit_clear(data, BSY | DF | ERR | DRQ);
 
     ide_test_quit();
+    free_pci_device(dev);
 }
 
 static void test_flush_nodev(void)
@@ -675,6 +685,7 @@ static void test_flush_nodev(void)
 
     /* Just testing that qemu doesn't crash... */
 
+    free_pci_device(dev);
     ide_test_quit();
 }
 
@@ -741,6 +752,7 @@ static uint8_t ide_wait_clear(uint8_t flag)
     while (true) {
         data = qpci_io_readb(dev, ide_bar, reg_status);
         if (!(data & flag)) {
+            free_pci_device(dev);
             return data;
         }
         if (difftime(time(NULL), st) > 5.0) {
@@ -850,6 +862,7 @@ static void cdrom_pio_impl(int nblocks)
     g_free(pattern);
     g_free(rx);
     test_bmdma_teardown();
+    free_pci_device(dev);
 }
 
 static void test_cdrom_pio(void)
diff --git a/tests/ipmi-bt-test.c b/tests/ipmi-bt-test.c
index e84dd6889b..7e21a9bbcb 100644
--- a/tests/ipmi-bt-test.c
+++ b/tests/ipmi-bt-test.c
@@ -420,6 +420,7 @@ int main(int argc, char **argv)
           " -device ipmi-bmc-extern,chardev=ipmi0,id=bmc0"
           " -device isa-ipmi-bt,bmc=bmc0", emu_port);
     qtest_start(cmdline);
+    g_free(cmdline);
     qtest_irq_intercept_in(global_qtest, "ioapic");
     qtest_add_func("/ipmi/extern/connect", test_connect);
     qtest_add_func("/ipmi/extern/bt_base", test_bt_base);
diff --git a/tests/ipmi-kcs-test.c b/tests/ipmi-kcs-test.c
index 9cf0b34a33..178ffc1797 100644
--- a/tests/ipmi-kcs-test.c
+++ b/tests/ipmi-kcs-test.c
@@ -279,6 +279,7 @@ int main(int argc, char **argv)
     cmdline = g_strdup_printf("-device ipmi-bmc-sim,id=bmc0"
                               " -device isa-ipmi-kcs,bmc=bmc0");
     qtest_start(cmdline);
+    g_free(cmdline);
     qtest_irq_intercept_in(global_qtest, "ioapic");
     qtest_add_func("/ipmi/local/kcs_base", test_kcs_base);
     qtest_add_func("/ipmi/local/kcs_abort", test_kcs_abort);
diff --git a/tests/libqos/usb.c b/tests/libqos/usb.c
index 72d7a961fe..0cdfaecda7 100644
--- a/tests/libqos/usb.c
+++ b/tests/libqos/usb.c
@@ -24,6 +24,11 @@ void qusb_pci_init_one(QPCIBus *pcibus, struct qhc *hc, uint32_t devfn, int bar)
     hc->bar = qpci_iomap(hc->dev, bar, NULL);
 }
 
+void uhci_deinit(struct qhc *hc)
+{
+    g_free(hc->dev);
+}
+
 void uhci_port_test(struct qhc *hc, int port, uint16_t expect)
 {
     uint16_t value = qpci_io_readw(hc->dev, hc->bar, 0x10 + 2 * port);
@@ -64,4 +69,5 @@ void usb_test_hotplug(const char *hcd_id, const int port,
     g_assert(response);
     g_assert(qdict_haskey(response, "event"));
     g_assert(!strcmp(qdict_get_str(response, "event"), "DEVICE_DELETED"));
+    QDECREF(response);
 }
diff --git a/tests/libqos/usb.h b/tests/libqos/usb.h
index 423dcfd82f..297cfc564d 100644
--- a/tests/libqos/usb.h
+++ b/tests/libqos/usb.h
@@ -11,6 +11,7 @@ struct qhc {
 void qusb_pci_init_one(QPCIBus *pcibus, struct qhc *hc,
                        uint32_t devfn, int bar);
 void uhci_port_test(struct qhc *hc, int port, uint16_t expect);
+void uhci_deinit(struct qhc *hc);
 
 void usb_test_hotplug(const char *bus_name, const int port,
                       void (*port_check)(void));
diff --git a/tests/libqos/virtio-pci.c b/tests/libqos/virtio-pci.c
index d4bf841f23..7ac15c04e1 100644
--- a/tests/libqos/virtio-pci.c
+++ b/tests/libqos/virtio-pci.c
@@ -24,9 +24,17 @@
 typedef struct QVirtioPCIForeachData {
     void (*func)(QVirtioDevice *d, void *data);
     uint16_t device_type;
+    bool has_slot;
+    int slot;
     void *user_data;
 } QVirtioPCIForeachData;
 
+void qvirtio_pci_device_free(QVirtioPCIDevice *dev)
+{
+    g_free(dev->pdev);
+    g_free(dev);
+}
+
 static QVirtioPCIDevice *qpcidevice_to_qvirtiodevice(QPCIDevice *pdev)
 {
     QVirtioPCIDevice *vpcidev;
@@ -49,16 +57,18 @@ static void qvirtio_pci_foreach_callback(
     QVirtioPCIForeachData *d = data;
     QVirtioPCIDevice *vpcidev = qpcidevice_to_qvirtiodevice(dev);
 
-    if (vpcidev->vdev.device_type == d->device_type) {
+    if (vpcidev->vdev.device_type == d->device_type &&
+        (!d->has_slot || vpcidev->pdev->devfn == d->slot << 3)) {
         d->func(&vpcidev->vdev, d->user_data);
     } else {
-        g_free(vpcidev);
+        qvirtio_pci_device_free(vpcidev);
     }
 }
 
 static void qvirtio_pci_assign_device(QVirtioDevice *d, void *data)
 {
     QVirtioPCIDevice **vpcidev = data;
+    assert(!*vpcidev);
     *vpcidev = (QVirtioPCIDevice *)d;
 }
 
@@ -284,21 +294,39 @@ const QVirtioBus qvirtio_pci = {
     .virtqueue_kick = qvirtio_pci_virtqueue_kick,
 };
 
-void qvirtio_pci_foreach(QPCIBus *bus, uint16_t device_type,
+static void qvirtio_pci_foreach(QPCIBus *bus, uint16_t device_type,
+                bool has_slot, int slot,
                 void (*func)(QVirtioDevice *d, void *data), void *data)
 {
     QVirtioPCIForeachData d = { .func = func,
                                 .device_type = device_type,
+                                .has_slot = has_slot,
+                                .slot = slot,
                                 .user_data = data };
 
     qpci_device_foreach(bus, PCI_VENDOR_ID_REDHAT_QUMRANET, -1,
-                                qvirtio_pci_foreach_callback, &d);
+                        qvirtio_pci_foreach_callback, &d);
 }
 
 QVirtioPCIDevice *qvirtio_pci_device_find(QPCIBus *bus, uint16_t device_type)
 {
     QVirtioPCIDevice *dev = NULL;
-    qvirtio_pci_foreach(bus, device_type, qvirtio_pci_assign_device, &dev);
+
+    qvirtio_pci_foreach(bus, device_type, false, 0,
+                        qvirtio_pci_assign_device, &dev);
+
+    dev->vdev.bus = &qvirtio_pci;
+
+    return dev;
+}
+
+QVirtioPCIDevice *qvirtio_pci_device_find_slot(QPCIBus *bus,
+                                               uint16_t device_type, int slot)
+{
+    QVirtioPCIDevice *dev = NULL;
+
+    qvirtio_pci_foreach(bus, device_type, true, slot,
+                        qvirtio_pci_assign_device, &dev);
 
     dev->vdev.bus = &qvirtio_pci;
 
diff --git a/tests/libqos/virtio-pci.h b/tests/libqos/virtio-pci.h
index 38c54c63ea..6ef19094cb 100644
--- a/tests/libqos/virtio-pci.h
+++ b/tests/libqos/virtio-pci.h
@@ -31,9 +31,11 @@ typedef struct QVirtQueuePCI {
 
 extern const QVirtioBus qvirtio_pci;
 
-void qvirtio_pci_foreach(QPCIBus *bus, uint16_t device_type,
-                void (*func)(QVirtioDevice *d, void *data), void *data);
 QVirtioPCIDevice *qvirtio_pci_device_find(QPCIBus *bus, uint16_t device_type);
+QVirtioPCIDevice *qvirtio_pci_device_find_slot(QPCIBus *bus,
+                                               uint16_t device_type, int slot);
+void qvirtio_pci_device_free(QVirtioPCIDevice *dev);
+
 void qvirtio_pci_device_enable(QVirtioPCIDevice *d);
 void qvirtio_pci_device_disable(QVirtioPCIDevice *d);
 
diff --git a/tests/libqtest.c b/tests/libqtest.c
index d8fba6647a..3a0e0d63a7 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -379,9 +379,9 @@ static void qmp_response(JSONMessageParser *parser, GQueue *tokens)
         exit(1);
     }
 
-    g_assert(qobject_type(obj) == QTYPE_QDICT);
     g_assert(!qmp->response);
-    qmp->response = (QDict *)obj;
+    qmp->response = qobject_to_qdict(obj);
+    g_assert(qmp->response);
 }
 
 QDict *qmp_fd_receive(int fd)
@@ -805,17 +805,7 @@ void qtest_add_data_func_full(const char *str, void *data,
                               GDestroyNotify data_free_func)
 {
     gchar *path = g_strdup_printf("/%s/%s", qtest_get_arch(), str);
-#if GLIB_CHECK_VERSION(2, 34, 0)
     g_test_add_data_func_full(path, data, fn, data_free_func);
-#elif GLIB_CHECK_VERSION(2, 26, 0)
-    /* back-compat casts, remove this once we can require new-enough glib */
-    g_test_add_vtable(path, 0, data, NULL,
-                      (GTestFixtureFunc)fn, (GTestFixtureFunc) data_free_func);
-#else
-    /* back-compat casts, remove this once we can require new-enough glib */
-    g_test_add_vtable(path, 0, data, NULL,
-                      (void (*)(void)) fn, (void (*)(void)) data_free_func);
-#endif
     g_free(path);
 }
 
diff --git a/tests/postcopy-test.c b/tests/postcopy-test.c
index dafe8beba4..de35a18903 100644
--- a/tests/postcopy-test.c
+++ b/tests/postcopy-test.c
@@ -482,7 +482,7 @@ static void test_migrate(void)
         usleep(10 * 1000);
     } while (dest_byte_a == dest_byte_b);
 
-    qmp("{ 'execute' : 'stop'}");
+    qmp_discard_response("{ 'execute' : 'stop'}");
     /* With it stopped, check nothing changes */
     qtest_memread(to, start_address, &dest_byte_c, 1);
     sleep(1);
diff --git a/tests/prom-env-test.c b/tests/prom-env-test.c
index bd33bc353d..eac207b30e 100644
--- a/tests/prom-env-test.c
+++ b/tests/prom-env-test.c
@@ -76,7 +76,7 @@ static void add_tests(const char *machines[])
 int main(int argc, char *argv[])
 {
     const char *sparc_machines[] = { "SPARCbook", "Voyager", "SS-20", NULL };
-    const char *sparc64_machines[] = { "sun4u", "sun4v", NULL };
+    const char *sparc64_machines[] = { "sun4u", NULL };
     const char *ppc_machines[] = { "mac99", "g3beige", NULL };
     const char *ppc64_machines[] = { "mac99", "g3beige", "pseries", NULL };
     const char *arch = qtest_get_arch();
diff --git a/tests/ptimer-test-stubs.c b/tests/ptimer-test-stubs.c
index 21d4ebb0fe..8a1b0a336c 100644
--- a/tests/ptimer-test-stubs.c
+++ b/tests/ptimer-test-stubs.c
@@ -108,6 +108,11 @@ QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
     return bh;
 }
 
+void qemu_bh_delete(QEMUBH *bh)
+{
+    g_free(bh);
+}
+
 void replay_bh_schedule_event(QEMUBH *bh)
 {
     bh->cb(bh->opaque);
diff --git a/tests/ptimer-test.c b/tests/ptimer-test.c
index b36a476483..5d1a2a8188 100644
--- a/tests/ptimer-test.c
+++ b/tests/ptimer-test.c
@@ -73,6 +73,7 @@ static void check_set_count(gconstpointer arg)
     ptimer_set_count(ptimer, 1000);
     g_assert_cmpuint(ptimer_get_count(ptimer), ==, 1000);
     g_assert_false(triggered);
+    ptimer_free(ptimer);
 }
 
 static void check_set_limit(gconstpointer arg)
@@ -92,6 +93,7 @@ static void check_set_limit(gconstpointer arg)
     g_assert_cmpuint(ptimer_get_count(ptimer), ==, 2000);
     g_assert_cmpuint(ptimer_get_limit(ptimer), ==, 2000);
     g_assert_false(triggered);
+    ptimer_free(ptimer);
 }
 
 static void check_oneshot(gconstpointer arg)
@@ -194,6 +196,7 @@ static void check_oneshot(gconstpointer arg)
 
     g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
     g_assert_false(triggered);
+    ptimer_free(ptimer);
 }
 
 static void check_periodic(gconstpointer arg)
@@ -360,6 +363,7 @@ static void check_periodic(gconstpointer arg)
     g_assert_cmpuint(ptimer_get_count(ptimer), ==,
                     (no_round_down ? 8 : 7) + (wrap_policy ? 1 : 0));
     g_assert_false(triggered);
+    ptimer_free(ptimer);
 }
 
 static void check_on_the_fly_mode_change(gconstpointer arg)
@@ -406,6 +410,7 @@ static void check_on_the_fly_mode_change(gconstpointer arg)
 
     g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
     g_assert_true(triggered);
+    ptimer_free(ptimer);
 }
 
 static void check_on_the_fly_period_change(gconstpointer arg)
@@ -438,6 +443,7 @@ static void check_on_the_fly_period_change(gconstpointer arg)
 
     g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
     g_assert_true(triggered);
+    ptimer_free(ptimer);
 }
 
 static void check_on_the_fly_freq_change(gconstpointer arg)
@@ -470,6 +476,7 @@ static void check_on_the_fly_freq_change(gconstpointer arg)
 
     g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
     g_assert_true(triggered);
+    ptimer_free(ptimer);
 }
 
 static void check_run_with_period_0(gconstpointer arg)
@@ -487,6 +494,7 @@ static void check_run_with_period_0(gconstpointer arg)
 
     g_assert_cmpuint(ptimer_get_count(ptimer), ==, 99);
     g_assert_false(triggered);
+    ptimer_free(ptimer);
 }
 
 static void check_run_with_delta_0(gconstpointer arg)
@@ -591,6 +599,7 @@ static void check_run_with_delta_0(gconstpointer arg)
     g_assert_true(triggered);
 
     ptimer_stop(ptimer);
+    ptimer_free(ptimer);
 }
 
 static void check_periodic_with_load_0(gconstpointer arg)
@@ -649,6 +658,7 @@ static void check_periodic_with_load_0(gconstpointer arg)
     }
 
     ptimer_stop(ptimer);
+    ptimer_free(ptimer);
 }
 
 static void check_oneshot_with_load_0(gconstpointer arg)
@@ -682,14 +692,14 @@ static void check_oneshot_with_load_0(gconstpointer arg)
     } else {
         g_assert_false(triggered);
     }
+
+    ptimer_free(ptimer);
 }
 
 static void add_ptimer_tests(uint8_t policy)
 {
-    uint8_t *ppolicy = g_malloc(1);
-    char *policy_name = g_malloc0(256);
-
-    *ppolicy = policy;
+    char policy_name[256] = "";
+    char *tmp;
 
     if (policy == PTIMER_POLICY_DEFAULT) {
         g_sprintf(policy_name, "default");
@@ -715,49 +725,67 @@ static void add_ptimer_tests(uint8_t policy)
         g_strlcat(policy_name, "no_counter_rounddown,", 256);
     }
 
-    g_test_add_data_func(
-        g_strdup_printf("/ptimer/set_count policy=%s", policy_name),
-        ppolicy, check_set_count);
-
-    g_test_add_data_func(
-        g_strdup_printf("/ptimer/set_limit policy=%s", policy_name),
-        ppolicy, check_set_limit);
-
-    g_test_add_data_func(
-        g_strdup_printf("/ptimer/oneshot policy=%s", policy_name),
-        ppolicy, check_oneshot);
-
-    g_test_add_data_func(
-        g_strdup_printf("/ptimer/periodic policy=%s", policy_name),
-        ppolicy, check_periodic);
-
-    g_test_add_data_func(
-        g_strdup_printf("/ptimer/on_the_fly_mode_change policy=%s", policy_name),
-        ppolicy, check_on_the_fly_mode_change);
-
-    g_test_add_data_func(
-        g_strdup_printf("/ptimer/on_the_fly_period_change policy=%s", policy_name),
-        ppolicy, check_on_the_fly_period_change);
-
-    g_test_add_data_func(
-        g_strdup_printf("/ptimer/on_the_fly_freq_change policy=%s", policy_name),
-        ppolicy, check_on_the_fly_freq_change);
-
-    g_test_add_data_func(
-        g_strdup_printf("/ptimer/run_with_period_0 policy=%s", policy_name),
-        ppolicy, check_run_with_period_0);
-
-    g_test_add_data_func(
-        g_strdup_printf("/ptimer/run_with_delta_0 policy=%s", policy_name),
-        ppolicy, check_run_with_delta_0);
-
-    g_test_add_data_func(
-        g_strdup_printf("/ptimer/periodic_with_load_0 policy=%s", policy_name),
-        ppolicy, check_periodic_with_load_0);
-
-    g_test_add_data_func(
-        g_strdup_printf("/ptimer/oneshot_with_load_0 policy=%s", policy_name),
-        ppolicy, check_oneshot_with_load_0);
+    g_test_add_data_func_full(
+        tmp = g_strdup_printf("/ptimer/set_count policy=%s", policy_name),
+        g_memdup(&policy, 1), check_set_count, g_free);
+    g_free(tmp);
+
+    g_test_add_data_func_full(
+        tmp = g_strdup_printf("/ptimer/set_limit policy=%s", policy_name),
+        g_memdup(&policy, 1), check_set_limit, g_free);
+    g_free(tmp);
+
+    g_test_add_data_func_full(
+        tmp = g_strdup_printf("/ptimer/oneshot policy=%s", policy_name),
+        g_memdup(&policy, 1), check_oneshot, g_free);
+    g_free(tmp);
+
+    g_test_add_data_func_full(
+        tmp = g_strdup_printf("/ptimer/periodic policy=%s", policy_name),
+        g_memdup(&policy, 1), check_periodic, g_free);
+    g_free(tmp);
+
+    g_test_add_data_func_full(
+        tmp = g_strdup_printf("/ptimer/on_the_fly_mode_change policy=%s",
+                              policy_name),
+        g_memdup(&policy, 1), check_on_the_fly_mode_change, g_free);
+    g_free(tmp);
+
+    g_test_add_data_func_full(
+        tmp = g_strdup_printf("/ptimer/on_the_fly_period_change policy=%s",
+                              policy_name),
+        g_memdup(&policy, 1), check_on_the_fly_period_change, g_free);
+    g_free(tmp);
+
+    g_test_add_data_func_full(
+        tmp = g_strdup_printf("/ptimer/on_the_fly_freq_change policy=%s",
+                              policy_name),
+        g_memdup(&policy, 1), check_on_the_fly_freq_change, g_free);
+    g_free(tmp);
+
+    g_test_add_data_func_full(
+        tmp = g_strdup_printf("/ptimer/run_with_period_0 policy=%s",
+                              policy_name),
+        g_memdup(&policy, 1), check_run_with_period_0, g_free);
+    g_free(tmp);
+
+    g_test_add_data_func_full(
+        tmp = g_strdup_printf("/ptimer/run_with_delta_0 policy=%s",
+                              policy_name),
+        g_memdup(&policy, 1), check_run_with_delta_0, g_free);
+    g_free(tmp);
+
+    g_test_add_data_func_full(
+        tmp = g_strdup_printf("/ptimer/periodic_with_load_0 policy=%s",
+                              policy_name),
+        g_memdup(&policy, 1), check_periodic_with_load_0, g_free);
+    g_free(tmp);
+
+    g_test_add_data_func_full(
+        tmp = g_strdup_printf("/ptimer/oneshot_with_load_0 policy=%s",
+                              policy_name),
+        g_memdup(&policy, 1), check_oneshot_with_load_0, g_free);
+    g_free(tmp);
 }
 
 static void add_all_ptimer_policies_comb_tests(void)
diff --git a/tests/pvpanic-test.c b/tests/pvpanic-test.c
index 3bfa678667..71ebb5c02c 100644
--- a/tests/pvpanic-test.c
+++ b/tests/pvpanic-test.c
@@ -27,6 +27,7 @@ static void test_panic(void)
     data = qdict_get_qdict(response, "data");
     g_assert(qdict_haskey(data, "action"));
     g_assert_cmpstr(qdict_get_str(data, "action"), ==, "pause");
+    QDECREF(response);
 }
 
 int main(int argc, char **argv)
diff --git a/tests/q35-test.c b/tests/q35-test.c
index 763fe3d6ae..cc58f3ecf4 100644
--- a/tests/q35-test.c
+++ b/tests/q35-test.c
@@ -71,6 +71,9 @@ static void test_smram_lock(void)
     g_assert(smram_test_bit(pcidev, MCH_HOST_BRIDGE_SMRAM_D_OPEN) == false);
     smram_set_bit(pcidev, MCH_HOST_BRIDGE_SMRAM_D_OPEN, true);
     g_assert(smram_test_bit(pcidev, MCH_HOST_BRIDGE_SMRAM_D_OPEN) == true);
+
+    g_free(pcidev);
+    qpci_free_pc(pcibus);
 }
 
 int main(int argc, char **argv)
diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index 54db54a1ea..0d472d5f27 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -547,11 +547,14 @@ class TestEIO(TestErrors):
         while not completed:
             for event in self.vm.get_qmp_events(wait=True):
                 if event['event'] == 'BLOCK_JOB_ERROR':
+                    error = True
                     self.assert_qmp(event, 'data/device', 'drive0')
                     self.assert_qmp(event, 'data/operation', 'read')
                     result = self.vm.qmp('query-block-jobs')
+                    if result == {'return': []}:
+                        # Job finished too quickly
+                        continue
                     self.assert_qmp(result, 'return[0]/paused', False)
-                    error = True
                 elif event['event'] == 'BLOCK_JOB_COMPLETED':
                     self.assertTrue(error, 'job completed unexpectedly')
                     self.assert_qmp(event, 'data/type', 'stream')
diff --git a/tests/qemu-iotests/049.out b/tests/qemu-iotests/049.out
index 4673b67f37..34e66db691 100644
--- a/tests/qemu-iotests/049.out
+++ b/tests/qemu-iotests/049.out
@@ -95,14 +95,14 @@ qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- -1024
 qemu-img: Image size must be less than 8 EiB!
 
 qemu-img create -f qcow2 -o size=-1024 TEST_DIR/t.qcow2
-qemu-img: Parameter 'size' expects a non-negative number below 2^64
+qemu-img: Value '-1024' is out of range for parameter 'size'
 qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2'
 
 qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- -1k
 qemu-img: Image size must be less than 8 EiB!
 
 qemu-img create -f qcow2 -o size=-1k TEST_DIR/t.qcow2
-qemu-img: Parameter 'size' expects a non-negative number below 2^64
+qemu-img: Value '-1k' is out of range for parameter 'size'
 qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2'
 
 qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- 1kilobyte
@@ -110,15 +110,19 @@ qemu-img: Invalid image size specified! You may use k, M, G, T, P or E suffixes
 qemu-img: kilobytes, megabytes, gigabytes, terabytes, petabytes and exabytes.
 
 qemu-img create -f qcow2 -o size=1kilobyte TEST_DIR/t.qcow2
-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 size=1024 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16
+qemu-img: Parameter 'size' expects a non-negative number below 2^64
+Optional suffix k, M, G, T, P or E means kilo-, mega-, giga-, tera-, peta-
+and exabytes, respectively.
+qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2'
 
 qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- foobar
 qemu-img: Invalid image size specified! You may use k, M, G, T, P or E suffixes for
 qemu-img: kilobytes, megabytes, gigabytes, terabytes, petabytes and exabytes.
 
 qemu-img create -f qcow2 -o size=foobar TEST_DIR/t.qcow2
-qemu-img: Parameter 'size' expects a size
-You may use k, M, G or T suffixes for kilobytes, megabytes, gigabytes and terabytes.
+qemu-img: Parameter 'size' expects a non-negative number below 2^64
+Optional suffix k, M, G, T, P or E means kilo-, mega-, giga-, tera-, peta-
+and exabytes, respectively.
 qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2'
 
 == Check correct interpretation of suffixes for cluster size ==
diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out
index 42bf4164ca..7524c62025 100644
--- a/tests/qemu-iotests/051.out
+++ b/tests/qemu-iotests/051.out
@@ -225,7 +225,7 @@ Testing: -drive driver=nbd
 QEMU_PROG: -drive driver=nbd: NBD server address missing
 
 Testing: -drive driver=raw
-QEMU_PROG: -drive driver=raw: Can't use 'raw' as a block driver for the protocol level
+QEMU_PROG: -drive driver=raw: A block device must be specified for "file"
 
 Testing: -drive file.driver=file
 QEMU_PROG: -drive file.driver=file: The 'file' block driver requires a file name
@@ -234,7 +234,7 @@ Testing: -drive file.driver=nbd
 QEMU_PROG: -drive file.driver=nbd: NBD server address missing
 
 Testing: -drive file.driver=raw
-QEMU_PROG: -drive file.driver=raw: Can't use 'raw' as a block driver for the protocol level
+QEMU_PROG: -drive file.driver=raw: A block device must be specified for "file"
 
 Testing: -drive foo=bar
 QEMU_PROG: -drive foo=bar: Must specify either driver or file
diff --git a/tests/qemu-iotests/051.pc.out b/tests/qemu-iotests/051.pc.out
index f8047a2e45..c6f4eef215 100644
--- a/tests/qemu-iotests/051.pc.out
+++ b/tests/qemu-iotests/051.pc.out
@@ -179,7 +179,7 @@ q[K[Dqu[K[D[Dqui[K[D[D[Dquit[K
 
 Testing: -drive file=TEST_DIR/t.qcow2,if=ide,readonly=on
 QEMU X.Y.Z monitor - type 'help' for more information
-(qemu) QEMU_PROG: Can't use a read-only drive
+(qemu) QEMU_PROG: Block node is read-only
 QEMU_PROG: Initialization of device ide-hd failed: Device initialization failed.
 
 Testing: -drive file=TEST_DIR/t.qcow2,if=scsi,readonly=on
@@ -201,12 +201,12 @@ QEMU X.Y.Z monitor - type 'help' for more information
 
 Testing: -drive file=TEST_DIR/t.qcow2,if=none,id=disk,readonly=on -device ide-drive,drive=disk
 QEMU X.Y.Z monitor - type 'help' for more information
-(qemu) QEMU_PROG: -device ide-drive,drive=disk: Can't use a read-only drive
+(qemu) QEMU_PROG: -device ide-drive,drive=disk: Block node is read-only
 QEMU_PROG: -device ide-drive,drive=disk: Device initialization failed.
 
 Testing: -drive file=TEST_DIR/t.qcow2,if=none,id=disk,readonly=on -device ide-hd,drive=disk
 QEMU X.Y.Z monitor - type 'help' for more information
-(qemu) QEMU_PROG: -device ide-hd,drive=disk: Can't use a read-only drive
+(qemu) QEMU_PROG: -device ide-hd,drive=disk: Block node is read-only
 QEMU_PROG: -device ide-hd,drive=disk: Device initialization failed.
 
 Testing: -drive file=TEST_DIR/t.qcow2,if=none,id=disk,readonly=on -device lsi53c895a -device scsi-disk,drive=disk
@@ -323,7 +323,7 @@ Testing: -drive driver=nbd
 QEMU_PROG: -drive driver=nbd: NBD server address missing
 
 Testing: -drive driver=raw
-QEMU_PROG: -drive driver=raw: Can't use 'raw' as a block driver for the protocol level
+QEMU_PROG: -drive driver=raw: A block device must be specified for "file"
 
 Testing: -drive file.driver=file
 QEMU_PROG: -drive file.driver=file: The 'file' block driver requires a file name
@@ -332,7 +332,7 @@ Testing: -drive file.driver=nbd
 QEMU_PROG: -drive file.driver=nbd: NBD server address missing
 
 Testing: -drive file.driver=raw
-QEMU_PROG: -drive file.driver=raw: Can't use 'raw' as a block driver for the protocol level
+QEMU_PROG: -drive file.driver=raw: A block device must be specified for "file"
 
 Testing: -drive foo=bar
 QEMU_PROG: -drive foo=bar: Must specify either driver or file
diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055
index 1d3fd04b65..aafcd249f6 100755
--- a/tests/qemu-iotests/055
+++ b/tests/qemu-iotests/055
@@ -48,7 +48,8 @@ class TestSingleDrive(iotests.QMPTestCase):
     def setUp(self):
         qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(image_len))
 
-        self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img)
+        self.vm = iotests.VM().add_drive(test_img)
+        self.vm.add_drive(blockdev_target_img, interface="none")
         if iotests.qemu_default_machine == 'pc':
             self.vm.add_drive(None, 'media=cdrom', 'ide')
         self.vm.launch()
@@ -164,7 +165,8 @@ class TestSetSpeed(iotests.QMPTestCase):
     def setUp(self):
         qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(image_len))
 
-        self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img)
+        self.vm = iotests.VM().add_drive(test_img)
+        self.vm.add_drive(blockdev_target_img, interface="none")
         self.vm.launch()
 
     def tearDown(self):
@@ -247,7 +249,8 @@ class TestSingleTransaction(iotests.QMPTestCase):
     def setUp(self):
         qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(image_len))
 
-        self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img)
+        self.vm = iotests.VM().add_drive(test_img)
+        self.vm.add_drive(blockdev_target_img, interface="none")
         if iotests.qemu_default_machine == 'pc':
             self.vm.add_drive(None, 'media=cdrom', 'ide')
         self.vm.launch()
@@ -460,7 +463,7 @@ class TestDriveCompression(iotests.QMPTestCase):
 
         qemu_img('create', '-f', fmt, blockdev_target_img,
                  str(TestDriveCompression.image_len), *args)
-        self.vm.add_drive(blockdev_target_img, format=fmt)
+        self.vm.add_drive(blockdev_target_img, format=fmt, interface="none")
 
         self.vm.launch()
 
diff --git a/tests/qemu-iotests/085.out b/tests/qemu-iotests/085.out
index 08e4bb7218..182acb42cf 100644
--- a/tests/qemu-iotests/085.out
+++ b/tests/qemu-iotests/085.out
@@ -74,7 +74,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/
 
 === Invalid command - snapshot node used as backing hd ===
 
-{"error": {"class": "GenericError", "desc": "Node 'snap_11' is busy: node is used as backing hd of 'virtio0'"}}
+{"error": {"class": "GenericError", "desc": "Node 'snap_11' is busy: node is used as backing hd of 'snap_12'"}}
 
 === Invalid command - snapshot node has a backing image ===
 
diff --git a/tests/qemu-iotests/137 b/tests/qemu-iotests/137
index e5e30de2fa..eb91e517d7 100755
--- a/tests/qemu-iotests/137
+++ b/tests/qemu-iotests/137
@@ -39,7 +39,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 . ./common.qemu
 
 _supported_fmt qcow2
-_supported_proto generic
+_supported_proto file
 _supported_os Linux
 
 
diff --git a/tests/qemu-iotests/141 b/tests/qemu-iotests/141
index 3ba79f027a..6d8f0a1a84 100755
--- a/tests/qemu-iotests/141
+++ b/tests/qemu-iotests/141
@@ -67,7 +67,7 @@ test_blockjob()
     _send_qemu_cmd $QEMU_HANDLE \
         "{'execute': 'x-blockdev-del',
           'arguments': {'node-name': 'drv0'}}" \
-        'error'
+        'error' | _filter_generated_node_ids
 
     _send_qemu_cmd $QEMU_HANDLE \
         "{'execute': 'block-job-cancel',
diff --git a/tests/qemu-iotests/141.out b/tests/qemu-iotests/141.out
index 195ca1a604..82e763b68d 100644
--- a/tests/qemu-iotests/141.out
+++ b/tests/qemu-iotests/141.out
@@ -20,7 +20,7 @@ Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.
 Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}}
 {"return": {}}
-{"error": {"class": "GenericError", "desc": "Node drv0 is in use"}}
+{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: node is used as backing hd of 'NODE_NAME'"}}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}}
 {"return": {}}
@@ -30,7 +30,7 @@ Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
 {"return": {}}
-{"error": {"class": "GenericError", "desc": "Node drv0 is in use"}}
+{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: node is used as backing hd of 'NODE_NAME'"}}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
 {"return": {}}
diff --git a/tests/qemu-iotests/172.out b/tests/qemu-iotests/172.out
index 6b7edaf28f..54b53293d7 100644
--- a/tests/qemu-iotests/172.out
+++ b/tests/qemu-iotests/172.out
@@ -28,6 +28,7 @@ Testing:
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "288"
 
 
@@ -57,6 +58,7 @@ Testing: -fda TEST_DIR/t.qcow2
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -fdb TEST_DIR/t.qcow2
@@ -83,6 +85,7 @@ Testing: -fdb TEST_DIR/t.qcow2
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 0 (0x0)
@@ -93,6 +96,7 @@ Testing: -fdb TEST_DIR/t.qcow2
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "288"
 
 Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2
@@ -119,6 +123,7 @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 0 (0x0)
@@ -129,6 +134,7 @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 
@@ -158,6 +164,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1
@@ -184,6 +191,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 0 (0x0)
@@ -194,6 +202,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "288"
 
 Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t.qcow2,index=1
@@ -220,6 +229,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 0 (0x0)
@@ -230,6 +240,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 
@@ -259,6 +270,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0
@@ -285,6 +297,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -global isa-fdc.driveB=none1
@@ -311,6 +324,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 0 (0x0)
@@ -321,6 +335,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 
@@ -350,6 +365,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1
@@ -376,6 +392,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0 -device floppy,drive=none1,unit=1
@@ -402,6 +419,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 0 (0x0)
@@ -412,6 +430,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 
@@ -441,6 +460,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 0 (0x0)
@@ -451,6 +471,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0
@@ -477,6 +498,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 0 (0x0)
@@ -487,6 +509,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0
@@ -513,6 +536,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0
@@ -539,6 +563,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 
@@ -568,6 +593,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 0 (0x0)
@@ -578,6 +604,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1
@@ -604,6 +631,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 0 (0x0)
@@ -614,6 +642,7 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0
@@ -640,6 +669,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 1 (0x1)
@@ -650,6 +680,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0
@@ -676,6 +707,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 1 (0x1)
@@ -686,6 +718,7 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device flop
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0
@@ -723,6 +756,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 0 (0x0)
@@ -733,6 +767,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1
@@ -759,6 +794,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 0 (0x0)
@@ -769,6 +805,7 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0
@@ -802,6 +839,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 0 (0x0)
@@ -812,6 +850,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=1
@@ -838,6 +877,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 0 (0x0)
@@ -848,6 +888,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1
@@ -874,6 +915,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 1 (0x1)
@@ -884,6 +926,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1,unit=0
@@ -910,6 +953,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
               dev: floppy, id ""
                 unit = 1 (0x1)
@@ -920,6 +964,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=0
@@ -964,6 +1009,7 @@ Testing: -device floppy
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "288"
 
 Testing: -device floppy,drive-type=120
@@ -990,6 +1036,7 @@ Testing: -device floppy,drive-type=120
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "120"
 
 Testing: -device floppy,drive-type=144
@@ -1016,6 +1063,7 @@ Testing: -device floppy,drive-type=144
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -device floppy,drive-type=288
@@ -1042,6 +1090,7 @@ Testing: -device floppy,drive-type=288
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "288"
 
 
@@ -1071,6 +1120,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "120"
 
 Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-type=288
@@ -1097,6 +1147,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "288"
 
 
@@ -1126,6 +1177,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,logical
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,physical_block_size=512
@@ -1152,6 +1204,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,physica
                 opt_io_size = 0 (0x0)
                 discard_granularity = 4294967295 (0xffffffff)
                 write-cache = "auto"
+                share-rw = false
                 drive-type = "144"
 
 Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,logical_block_size=4096
diff --git a/tests/qemu-iotests/175 b/tests/qemu-iotests/175
new file mode 100755
index 0000000000..ca56e827cd
--- /dev/null
+++ b/tests/qemu-iotests/175
@@ -0,0 +1,61 @@
+#!/bin/bash
+#
+# Test creating raw image preallocation mode
+#
+# Copyright (C) 2017 Nir Soffer <nirsof@gmail.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=nirsof@gmail.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+status=1	# failure is the default!
+
+_cleanup()
+{
+	_cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt raw
+_supported_proto file
+_supported_os Linux
+
+size=1m
+
+echo
+echo "== creating image with default preallocation =="
+_make_test_img $size | _filter_imgfmt
+stat -c "size=%s, blocks=%b" $TEST_IMG
+
+for mode in off full falloc; do
+    echo
+    echo "== creating image with preallocation $mode =="
+    IMGOPTS=preallocation=$mode _make_test_img $size | _filter_imgfmt
+    stat -c "size=%s, blocks=%b" $TEST_IMG
+done
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/175.out b/tests/qemu-iotests/175.out
new file mode 100644
index 0000000000..76c02c6a57
--- /dev/null
+++ b/tests/qemu-iotests/175.out
@@ -0,0 +1,18 @@
+QA output created by 175
+
+== creating image with default preallocation ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
+size=1048576, blocks=0
+
+== creating image with preallocation off ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 preallocation=off
+size=1048576, blocks=0
+
+== creating image with preallocation full ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 preallocation=full
+size=1048576, blocks=2048
+
+== creating image with preallocation falloc ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 preallocation=falloc
+size=1048576, blocks=2048
+ *** done
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index a3d904fc22..08065dceae 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -172,7 +172,7 @@ _make_test_img()
 
     # Start an NBD server on the image file, which is what we'll be talking to
     if [ $IMGPROTO = "nbd" ]; then
-        eval "$QEMU_NBD -v -t -b 127.0.0.1 -p 10810 -f $IMGFMT  $TEST_IMG_FILE &"
+        eval "$QEMU_NBD -v -t -b 127.0.0.1 -p 10810 -f $IMGFMT  $TEST_IMG_FILE >/dev/null &"
         sleep 1 # FIXME: qemu-nbd needs to be listening before we continue
     fi
 }
@@ -379,6 +379,18 @@ _supported_proto()
     _notrun "not suitable for this image protocol: $IMGPROTO"
 }
 
+# tests whether $IMGPROTO is specified as an unsupported image protocol for a test
+#
+_unsupported_proto()
+{
+    for f; do
+        if [ "$f" = "$IMGPROTO" ]; then
+            _notrun "not suitable for this image protocol: $IMGPROTO"
+            return
+        fi
+    done
+}
+
 # tests whether the host OS is one of the supported OSes for a test
 #
 _supported_os()
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 985b9a6a36..1f4bf03185 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -167,3 +167,4 @@
 172 auto
 173 rw auto
 174 auto
+175 auto quick
diff --git a/tests/tco-test.c b/tests/tco-test.c
index ef02ec5903..c4c264eb3d 100644
--- a/tests/tco-test.c
+++ b/tests/tco-test.c
@@ -42,11 +42,18 @@ typedef struct {
     bool noreboot;
     QPCIDevice *dev;
     QPCIBar tco_io_bar;
+    QPCIBus *bus;
 } TestData;
 
+static void test_end(TestData *d)
+{
+    g_free(d->dev);
+    qpci_free_pc(d->bus);
+    qtest_end();
+}
+
 static void test_init(TestData *d)
 {
-    QPCIBus *bus;
     QTestState *qs;
     char *s;
 
@@ -57,8 +64,8 @@ static void test_init(TestData *d)
     qtest_irq_intercept_in(qs, "ioapic");
     g_free(s);
 
-    bus = qpci_init_pc(NULL);
-    d->dev = qpci_device_find(bus, QPCI_DEVFN(0x1f, 0x00));
+    d->bus = qpci_init_pc(NULL);
+    d->dev = qpci_device_find(d->bus, QPCI_DEVFN(0x1f, 0x00));
     g_assert(d->dev != NULL);
 
     qpci_device_enable(d->dev);
@@ -148,7 +155,7 @@ static void test_tco_defaults(void)
                     SW_IRQ_GEN_DEFAULT);
     g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_bar, TCO_TMR), ==,
                     TCO_TMR_DEFAULT);
-    qtest_end();
+    test_end(&d);
 }
 
 static void test_tco_timeout(void)
@@ -192,7 +199,7 @@ static void test_tco_timeout(void)
     g_assert(ret == 1);
 
     stop_tco(&d);
-    qtest_end();
+    test_end(&d);
 }
 
 static void test_tco_max_timeout(void)
@@ -225,7 +232,7 @@ static void test_tco_max_timeout(void)
     g_assert(ret == 1);
 
     stop_tco(&d);
-    qtest_end();
+    test_end(&d);
 }
 
 static QDict *get_watchdog_action(void)
@@ -262,7 +269,7 @@ static void test_tco_second_timeout_pause(void)
     QDECREF(ad);
 
     stop_tco(&td);
-    qtest_end();
+    test_end(&td);
 }
 
 static void test_tco_second_timeout_reset(void)
@@ -287,7 +294,7 @@ static void test_tco_second_timeout_reset(void)
     QDECREF(ad);
 
     stop_tco(&td);
-    qtest_end();
+    test_end(&td);
 }
 
 static void test_tco_second_timeout_shutdown(void)
@@ -312,7 +319,7 @@ static void test_tco_second_timeout_shutdown(void)
     QDECREF(ad);
 
     stop_tco(&td);
-    qtest_end();
+    test_end(&td);
 }
 
 static void test_tco_second_timeout_none(void)
@@ -337,7 +344,7 @@ static void test_tco_second_timeout_none(void)
     QDECREF(ad);
 
     stop_tco(&td);
-    qtest_end();
+    test_end(&td);
 }
 
 static void test_tco_ticks_counter(void)
@@ -365,7 +372,7 @@ static void test_tco_ticks_counter(void)
     } while (!(qpci_io_readw(d.dev, d.tco_io_bar, TCO1_STS) & TCO_TIMEOUT));
 
     stop_tco(&d);
-    qtest_end();
+    test_end(&d);
 }
 
 static void test_tco1_control_bits(void)
@@ -383,7 +390,7 @@ static void test_tco1_control_bits(void)
     qpci_io_writew(d.dev, d.tco_io_bar, TCO1_CNT, val);
     g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_bar, TCO1_CNT), ==,
                     TCO_LOCK);
-    qtest_end();
+    test_end(&d);
 }
 
 static void test_tco1_status_bits(void)
@@ -412,7 +419,7 @@ static void test_tco1_status_bits(void)
     g_assert(ret == 1);
     qpci_io_writew(d.dev, d.tco_io_bar, TCO1_STS, val);
     g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_bar, TCO1_STS), ==, 0);
-    qtest_end();
+    test_end(&d);
 }
 
 static void test_tco2_status_bits(void)
@@ -439,7 +446,7 @@ static void test_tco2_status_bits(void)
     g_assert(ret == 1);
     qpci_io_writew(d.dev, d.tco_io_bar, TCO2_STS, val);
     g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_bar, TCO2_STS), ==, 0);
-    qtest_end();
+    test_end(&d);
 }
 
 int main(int argc, char **argv)
diff --git a/tests/test-aio-multithread.c b/tests/test-aio-multithread.c
index f11e990568..8b0b40ec78 100644
--- a/tests/test-aio-multithread.c
+++ b/tests/test-aio-multithread.c
@@ -309,7 +309,7 @@ static void mcs_mutex_lock(void)
 static void mcs_mutex_unlock(void)
 {
     int next;
-    if (nodes[id].next == -1) {
+    if (atomic_read(&nodes[id].next) == -1) {
         if (atomic_read(&mutex_head) == id &&
             atomic_cmpxchg(&mutex_head, id, -1) == id) {
             /* Last item in the list, exit.  */
@@ -323,7 +323,7 @@ static void mcs_mutex_unlock(void)
     }
 
     /* Wake up the next in line.  */
-    next = nodes[id].next;
+    next = atomic_read(&nodes[id].next);
     nodes[next].locked = 0;
     qemu_futex_wake(&nodes[next].locked, 1);
 }
diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c
index b132e39097..4ccbda14af 100644
--- a/tests/test-blockjob-txn.c
+++ b/tests/test-blockjob-txn.c
@@ -96,11 +96,14 @@ static BlockJob *test_block_job_start(unsigned int iterations,
     char job_id[24];
 
     data = g_new0(TestBlockJobCBData, 1);
-    bs = bdrv_new();
+
+    bs = bdrv_open("null-co://", NULL, NULL, 0, &error_abort);
+    g_assert_nonnull(bs);
+
     snprintf(job_id, sizeof(job_id), "job%u", counter++);
-    s = block_job_create(job_id, &test_block_job_driver, bs, 0,
-                         BLOCK_JOB_DEFAULT, test_block_job_cb,
-                         data, &error_abort);
+    s = block_job_create(job_id, &test_block_job_driver, bs,
+                         0, BLK_PERM_ALL, 0, BLOCK_JOB_DEFAULT,
+                         test_block_job_cb, data, &error_abort);
     s->iterations = iterations;
     s->use_timer = use_timer;
     s->rc = rc;
@@ -242,6 +245,7 @@ static void test_pair_jobs_fail_cancel_race(void)
 int main(int argc, char **argv)
 {
     qemu_init_main_loop(&error_abort);
+    bdrv_init();
 
     g_test_init(&argc, &argv, NULL);
     g_test_add_func("/single/success", test_single_job_success);
diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c
index 60b78a3342..740e740398 100644
--- a/tests/test-blockjob.c
+++ b/tests/test-blockjob.c
@@ -30,8 +30,9 @@ static BlockJob *do_test_id(BlockBackend *blk, const char *id,
     BlockJob *job;
     Error *errp = NULL;
 
-    job = block_job_create(id, &test_block_job_driver, blk_bs(blk), 0,
-                           BLOCK_JOB_DEFAULT, block_job_cb, NULL, &errp);
+    job = block_job_create(id, &test_block_job_driver, blk_bs(blk),
+                           0, BLK_PERM_ALL, 0, BLOCK_JOB_DEFAULT, block_job_cb,
+                           NULL, &errp);
     if (should_succeed) {
         g_assert_null(errp);
         g_assert_nonnull(job);
@@ -53,10 +54,14 @@ static BlockJob *do_test_id(BlockBackend *blk, const char *id,
  * BlockDriverState inserted. */
 static BlockBackend *create_blk(const char *name)
 {
-    BlockBackend *blk = blk_new();
-    BlockDriverState *bs = bdrv_new();
+    /* No I/O is performed on this device */
+    BlockBackend *blk = blk_new(0, BLK_PERM_ALL);
+    BlockDriverState *bs;
 
-    blk_insert_bs(blk, bs);
+    bs = bdrv_open("null-co://", NULL, NULL, 0, &error_abort);
+    g_assert_nonnull(bs);
+
+    blk_insert_bs(blk, bs, &error_abort);
     bdrv_unref(bs);
 
     if (name) {
@@ -140,6 +145,7 @@ static void test_job_ids(void)
 int main(int argc, char **argv)
 {
     qemu_init_main_loop(&error_abort);
+    bdrv_init();
 
     g_test_init(&argc, &argv, NULL);
     g_test_add_func("/blockjob/ids", test_job_ids);
diff --git a/tests/test-cutils.c b/tests/test-cutils.c
index 20b0f59ba2..f64a49b7fb 100644
--- a/tests/test-cutils.c
+++ b/tests/test-cutils.c
@@ -262,6 +262,7 @@ static void test_qemu_strtol_empty(void)
     err = qemu_strtol(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, -EINVAL);
+    g_assert(endptr == str);
 }
 
 static void test_qemu_strtol_whitespace(void)
@@ -275,6 +276,7 @@ static void test_qemu_strtol_whitespace(void)
     err = qemu_strtol(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, -EINVAL);
+    g_assert(endptr == str);
 }
 
 static void test_qemu_strtol_invalid(void)
@@ -288,6 +290,7 @@ static void test_qemu_strtol_invalid(void)
     err = qemu_strtol(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, -EINVAL);
+    g_assert(endptr == str);
 }
 
 static void test_qemu_strtol_trailing(void)
@@ -520,7 +523,7 @@ static void test_qemu_strtoul_correct(void)
     err = qemu_strtoul(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, 12345);
+    g_assert_cmpuint(res, ==, 12345);
     g_assert(endptr == str + 5);
 }
 
@@ -548,6 +551,7 @@ static void test_qemu_strtoul_empty(void)
     err = qemu_strtoul(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, -EINVAL);
+    g_assert(endptr == str);
 }
 
 static void test_qemu_strtoul_whitespace(void)
@@ -561,6 +565,7 @@ static void test_qemu_strtoul_whitespace(void)
     err = qemu_strtoul(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, -EINVAL);
+    g_assert(endptr == str);
 }
 
 static void test_qemu_strtoul_invalid(void)
@@ -574,6 +579,7 @@ static void test_qemu_strtoul_invalid(void)
     err = qemu_strtoul(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, -EINVAL);
+    g_assert(endptr == str);
 }
 
 static void test_qemu_strtoul_trailing(void)
@@ -587,7 +593,7 @@ static void test_qemu_strtoul_trailing(void)
     err = qemu_strtoul(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, 123);
+    g_assert_cmpuint(res, ==, 123);
     g_assert(endptr == str + 3);
 }
 
@@ -602,7 +608,7 @@ static void test_qemu_strtoul_octal(void)
     err = qemu_strtoul(str, &endptr, 8, &res);
 
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, 0123);
+    g_assert_cmpuint(res, ==, 0123);
     g_assert(endptr == str + strlen(str));
 
     res = 999;
@@ -610,7 +616,7 @@ static void test_qemu_strtoul_octal(void)
     err = qemu_strtoul(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, 0123);
+    g_assert_cmpuint(res, ==, 0123);
     g_assert(endptr == str + strlen(str));
 }
 
@@ -625,7 +631,7 @@ static void test_qemu_strtoul_decimal(void)
     err = qemu_strtoul(str, &endptr, 10, &res);
 
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, 123);
+    g_assert_cmpuint(res, ==, 123);
     g_assert(endptr == str + strlen(str));
 
     str = "123";
@@ -634,7 +640,7 @@ static void test_qemu_strtoul_decimal(void)
     err = qemu_strtoul(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, 123);
+    g_assert_cmpuint(res, ==, 123);
     g_assert(endptr == str + strlen(str));
 }
 
@@ -649,7 +655,7 @@ static void test_qemu_strtoul_hex(void)
     err = qemu_strtoul(str, &endptr, 16, &res);
 
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, 0x123);
+    g_assert_cmphex(res, ==, 0x123);
     g_assert(endptr == str + strlen(str));
 
     str = "0x123";
@@ -658,7 +664,7 @@ static void test_qemu_strtoul_hex(void)
     err = qemu_strtoul(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, 0x123);
+    g_assert_cmphex(res, ==, 0x123);
     g_assert(endptr == str + strlen(str));
 }
 
@@ -673,7 +679,7 @@ static void test_qemu_strtoul_max(void)
     err = qemu_strtoul(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, ULONG_MAX);
+    g_assert_cmphex(res, ==, ULONG_MAX);
     g_assert(endptr == str + strlen(str));
     g_free(str);
 }
@@ -689,7 +695,7 @@ static void test_qemu_strtoul_overflow(void)
     err = qemu_strtoul(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, -ERANGE);
-    g_assert_cmpint(res, ==, ULONG_MAX);
+    g_assert_cmphex(res, ==, ULONG_MAX);
     g_assert(endptr == str + strlen(str));
 }
 
@@ -704,7 +710,7 @@ static void test_qemu_strtoul_underflow(void)
     err  = qemu_strtoul(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, -ERANGE);
-    g_assert_cmpint(res, ==, -1ul);
+    g_assert_cmpuint(res, ==, -1ul);
     g_assert(endptr == str + strlen(str));
 }
 
@@ -719,7 +725,7 @@ static void test_qemu_strtoul_negative(void)
     err = qemu_strtoul(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, -321ul);
+    g_assert_cmpuint(res, ==, -321ul);
     g_assert(endptr == str + strlen(str));
 }
 
@@ -732,7 +738,7 @@ static void test_qemu_strtoul_full_correct(void)
     err = qemu_strtoul(str, NULL, 0, &res);
 
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, 123);
+    g_assert_cmpuint(res, ==, 123);
 }
 
 static void test_qemu_strtoul_full_null(void)
@@ -763,7 +769,7 @@ static void test_qemu_strtoul_full_negative(void)
 
     err = qemu_strtoul(str, NULL, 0, &res);
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, -321ul);
+    g_assert_cmpuint(res, ==, -321ul);
 }
 
 static void test_qemu_strtoul_full_trailing(void)
@@ -786,11 +792,11 @@ static void test_qemu_strtoul_full_max(void)
     err = qemu_strtoul(str, NULL, 0, &res);
 
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, ULONG_MAX);
+    g_assert_cmphex(res, ==, ULONG_MAX);
     g_free(str);
 }
 
-static void test_qemu_strtoll_correct(void)
+static void test_qemu_strtoi64_correct(void)
 {
     const char *str = "12345 foo";
     char f = 'X';
@@ -798,27 +804,27 @@ static void test_qemu_strtoll_correct(void)
     int64_t res = 999;
     int err;
 
-    err = qemu_strtoll(str, &endptr, 0, &res);
+    err = qemu_strtoi64(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, 0);
     g_assert_cmpint(res, ==, 12345);
     g_assert(endptr == str + 5);
 }
 
-static void test_qemu_strtoll_null(void)
+static void test_qemu_strtoi64_null(void)
 {
     char f = 'X';
     const char *endptr = &f;
     int64_t res = 999;
     int err;
 
-    err = qemu_strtoll(NULL, &endptr, 0, &res);
+    err = qemu_strtoi64(NULL, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, -EINVAL);
     g_assert(endptr == NULL);
 }
 
-static void test_qemu_strtoll_empty(void)
+static void test_qemu_strtoi64_empty(void)
 {
     const char *str = "";
     char f = 'X';
@@ -826,12 +832,13 @@ static void test_qemu_strtoll_empty(void)
     int64_t res = 999;
     int err;
 
-    err = qemu_strtoll(str, &endptr, 0, &res);
+    err = qemu_strtoi64(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, -EINVAL);
+    g_assert(endptr == str);
 }
 
-static void test_qemu_strtoll_whitespace(void)
+static void test_qemu_strtoi64_whitespace(void)
 {
     const char *str = "  \t  ";
     char f = 'X';
@@ -839,12 +846,13 @@ static void test_qemu_strtoll_whitespace(void)
     int64_t res = 999;
     int err;
 
-    err = qemu_strtoll(str, &endptr, 0, &res);
+    err = qemu_strtoi64(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, -EINVAL);
+    g_assert(endptr == str);
 }
 
-static void test_qemu_strtoll_invalid(void)
+static void test_qemu_strtoi64_invalid(void)
 {
     const char *str = "   xxxx  \t abc";
     char f = 'X';
@@ -852,12 +860,13 @@ static void test_qemu_strtoll_invalid(void)
     int64_t res = 999;
     int err;
 
-    err = qemu_strtoll(str, &endptr, 0, &res);
+    err = qemu_strtoi64(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, -EINVAL);
+    g_assert(endptr == str);
 }
 
-static void test_qemu_strtoll_trailing(void)
+static void test_qemu_strtoi64_trailing(void)
 {
     const char *str = "123xxx";
     char f = 'X';
@@ -865,14 +874,14 @@ static void test_qemu_strtoll_trailing(void)
     int64_t res = 999;
     int err;
 
-    err = qemu_strtoll(str, &endptr, 0, &res);
+    err = qemu_strtoi64(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, 0);
     g_assert_cmpint(res, ==, 123);
     g_assert(endptr == str + 3);
 }
 
-static void test_qemu_strtoll_octal(void)
+static void test_qemu_strtoi64_octal(void)
 {
     const char *str = "0123";
     char f = 'X';
@@ -880,7 +889,7 @@ static void test_qemu_strtoll_octal(void)
     int64_t res = 999;
     int err;
 
-    err = qemu_strtoll(str, &endptr, 8, &res);
+    err = qemu_strtoi64(str, &endptr, 8, &res);
 
     g_assert_cmpint(err, ==, 0);
     g_assert_cmpint(res, ==, 0123);
@@ -888,14 +897,14 @@ static void test_qemu_strtoll_octal(void)
 
     endptr = &f;
     res = 999;
-    err = qemu_strtoll(str, &endptr, 0, &res);
+    err = qemu_strtoi64(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, 0);
     g_assert_cmpint(res, ==, 0123);
     g_assert(endptr == str + strlen(str));
 }
 
-static void test_qemu_strtoll_decimal(void)
+static void test_qemu_strtoi64_decimal(void)
 {
     const char *str = "0123";
     char f = 'X';
@@ -903,7 +912,7 @@ static void test_qemu_strtoll_decimal(void)
     int64_t res = 999;
     int err;
 
-    err = qemu_strtoll(str, &endptr, 10, &res);
+    err = qemu_strtoi64(str, &endptr, 10, &res);
 
     g_assert_cmpint(err, ==, 0);
     g_assert_cmpint(res, ==, 123);
@@ -912,14 +921,14 @@ static void test_qemu_strtoll_decimal(void)
     str = "123";
     endptr = &f;
     res = 999;
-    err = qemu_strtoll(str, &endptr, 0, &res);
+    err = qemu_strtoi64(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, 0);
     g_assert_cmpint(res, ==, 123);
     g_assert(endptr == str + strlen(str));
 }
 
-static void test_qemu_strtoll_hex(void)
+static void test_qemu_strtoi64_hex(void)
 {
     const char *str = "0123";
     char f = 'X';
@@ -927,7 +936,7 @@ static void test_qemu_strtoll_hex(void)
     int64_t res = 999;
     int err;
 
-    err = qemu_strtoll(str, &endptr, 16, &res);
+    err = qemu_strtoi64(str, &endptr, 16, &res);
 
     g_assert_cmpint(err, ==, 0);
     g_assert_cmpint(res, ==, 0x123);
@@ -936,14 +945,14 @@ static void test_qemu_strtoll_hex(void)
     str = "0x123";
     endptr = &f;
     res = 999;
-    err = qemu_strtoll(str, &endptr, 0, &res);
+    err = qemu_strtoi64(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, 0);
     g_assert_cmpint(res, ==, 0x123);
     g_assert(endptr == str + strlen(str));
 }
 
-static void test_qemu_strtoll_max(void)
+static void test_qemu_strtoi64_max(void)
 {
     char *str = g_strdup_printf("%lld", LLONG_MAX);
     char f = 'X';
@@ -951,7 +960,7 @@ static void test_qemu_strtoll_max(void)
     int64_t res = 999;
     int err;
 
-    err = qemu_strtoll(str, &endptr, 0, &res);
+    err = qemu_strtoi64(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, 0);
     g_assert_cmpint(res, ==, LLONG_MAX);
@@ -959,7 +968,7 @@ static void test_qemu_strtoll_max(void)
     g_free(str);
 }
 
-static void test_qemu_strtoll_overflow(void)
+static void test_qemu_strtoi64_overflow(void)
 {
     const char *str = "99999999999999999999999999999999999999999999";
     char f = 'X';
@@ -967,14 +976,14 @@ static void test_qemu_strtoll_overflow(void)
     int64_t res = 999;
     int err;
 
-    err = qemu_strtoll(str, &endptr, 0, &res);
+    err = qemu_strtoi64(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, -ERANGE);
     g_assert_cmpint(res, ==, LLONG_MAX);
     g_assert(endptr == str + strlen(str));
 }
 
-static void test_qemu_strtoll_underflow(void)
+static void test_qemu_strtoi64_underflow(void)
 {
     const char *str = "-99999999999999999999999999999999999999999999";
     char f = 'X';
@@ -982,14 +991,14 @@ static void test_qemu_strtoll_underflow(void)
     int64_t res = 999;
     int err;
 
-    err  = qemu_strtoll(str, &endptr, 0, &res);
+    err  = qemu_strtoi64(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, -ERANGE);
     g_assert_cmpint(res, ==, LLONG_MIN);
     g_assert(endptr == str + strlen(str));
 }
 
-static void test_qemu_strtoll_negative(void)
+static void test_qemu_strtoi64_negative(void)
 {
     const char *str = "  \t -321";
     char f = 'X';
@@ -997,84 +1006,84 @@ static void test_qemu_strtoll_negative(void)
     int64_t res = 999;
     int err;
 
-    err = qemu_strtoll(str, &endptr, 0, &res);
+    err = qemu_strtoi64(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, 0);
     g_assert_cmpint(res, ==, -321);
     g_assert(endptr == str + strlen(str));
 }
 
-static void test_qemu_strtoll_full_correct(void)
+static void test_qemu_strtoi64_full_correct(void)
 {
     const char *str = "123";
     int64_t res = 999;
     int err;
 
-    err = qemu_strtoll(str, NULL, 0, &res);
+    err = qemu_strtoi64(str, NULL, 0, &res);
 
     g_assert_cmpint(err, ==, 0);
     g_assert_cmpint(res, ==, 123);
 }
 
-static void test_qemu_strtoll_full_null(void)
+static void test_qemu_strtoi64_full_null(void)
 {
     int64_t res = 999;
     int err;
 
-    err = qemu_strtoll(NULL, NULL, 0, &res);
+    err = qemu_strtoi64(NULL, NULL, 0, &res);
 
     g_assert_cmpint(err, ==, -EINVAL);
 }
 
-static void test_qemu_strtoll_full_empty(void)
+static void test_qemu_strtoi64_full_empty(void)
 {
     const char *str = "";
     int64_t res = 999;
     int err;
 
-    err = qemu_strtoll(str, NULL, 0, &res);
+    err = qemu_strtoi64(str, NULL, 0, &res);
 
     g_assert_cmpint(err, ==, -EINVAL);
 }
 
-static void test_qemu_strtoll_full_negative(void)
+static void test_qemu_strtoi64_full_negative(void)
 {
     const char *str = " \t -321";
     int64_t res = 999;
     int err;
 
-    err = qemu_strtoll(str, NULL, 0, &res);
+    err = qemu_strtoi64(str, NULL, 0, &res);
 
     g_assert_cmpint(err, ==, 0);
     g_assert_cmpint(res, ==, -321);
 }
 
-static void test_qemu_strtoll_full_trailing(void)
+static void test_qemu_strtoi64_full_trailing(void)
 {
     const char *str = "123xxx";
     int64_t res = 999;
     int err;
 
-    err = qemu_strtoll(str, NULL, 0, &res);
+    err = qemu_strtoi64(str, NULL, 0, &res);
 
     g_assert_cmpint(err, ==, -EINVAL);
 }
 
-static void test_qemu_strtoll_full_max(void)
+static void test_qemu_strtoi64_full_max(void)
 {
 
     char *str = g_strdup_printf("%lld", LLONG_MAX);
     int64_t res;
     int err;
 
-    err = qemu_strtoll(str, NULL, 0, &res);
+    err = qemu_strtoi64(str, NULL, 0, &res);
 
     g_assert_cmpint(err, ==, 0);
     g_assert_cmpint(res, ==, LLONG_MAX);
     g_free(str);
 }
 
-static void test_qemu_strtoull_correct(void)
+static void test_qemu_strtou64_correct(void)
 {
     const char *str = "12345 foo";
     char f = 'X';
@@ -1082,27 +1091,27 @@ static void test_qemu_strtoull_correct(void)
     uint64_t res = 999;
     int err;
 
-    err = qemu_strtoull(str, &endptr, 0, &res);
+    err = qemu_strtou64(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, 12345);
+    g_assert_cmpuint(res, ==, 12345);
     g_assert(endptr == str + 5);
 }
 
-static void test_qemu_strtoull_null(void)
+static void test_qemu_strtou64_null(void)
 {
     char f = 'X';
     const char *endptr = &f;
     uint64_t res = 999;
     int err;
 
-    err = qemu_strtoull(NULL, &endptr, 0, &res);
+    err = qemu_strtou64(NULL, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, -EINVAL);
     g_assert(endptr == NULL);
 }
 
-static void test_qemu_strtoull_empty(void)
+static void test_qemu_strtou64_empty(void)
 {
     const char *str = "";
     char f = 'X';
@@ -1110,12 +1119,13 @@ static void test_qemu_strtoull_empty(void)
     uint64_t res = 999;
     int err;
 
-    err = qemu_strtoull(str, &endptr, 0, &res);
+    err = qemu_strtou64(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, -EINVAL);
+    g_assert(endptr == str);
 }
 
-static void test_qemu_strtoull_whitespace(void)
+static void test_qemu_strtou64_whitespace(void)
 {
     const char *str = "  \t  ";
     char f = 'X';
@@ -1123,12 +1133,13 @@ static void test_qemu_strtoull_whitespace(void)
     uint64_t res = 999;
     int err;
 
-    err = qemu_strtoull(str, &endptr, 0, &res);
+    err = qemu_strtou64(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, -EINVAL);
+    g_assert(endptr == str);
 }
 
-static void test_qemu_strtoull_invalid(void)
+static void test_qemu_strtou64_invalid(void)
 {
     const char *str = "   xxxx  \t abc";
     char f = 'X';
@@ -1136,12 +1147,13 @@ static void test_qemu_strtoull_invalid(void)
     uint64_t res = 999;
     int err;
 
-    err = qemu_strtoull(str, &endptr, 0, &res);
+    err = qemu_strtou64(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, -EINVAL);
+    g_assert(endptr == str);
 }
 
-static void test_qemu_strtoull_trailing(void)
+static void test_qemu_strtou64_trailing(void)
 {
     const char *str = "123xxx";
     char f = 'X';
@@ -1149,14 +1161,14 @@ static void test_qemu_strtoull_trailing(void)
     uint64_t res = 999;
     int err;
 
-    err = qemu_strtoull(str, &endptr, 0, &res);
+    err = qemu_strtou64(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, 123);
+    g_assert_cmpuint(res, ==, 123);
     g_assert(endptr == str + 3);
 }
 
-static void test_qemu_strtoull_octal(void)
+static void test_qemu_strtou64_octal(void)
 {
     const char *str = "0123";
     char f = 'X';
@@ -1164,22 +1176,22 @@ static void test_qemu_strtoull_octal(void)
     uint64_t res = 999;
     int err;
 
-    err = qemu_strtoull(str, &endptr, 8, &res);
+    err = qemu_strtou64(str, &endptr, 8, &res);
 
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, 0123);
+    g_assert_cmpuint(res, ==, 0123);
     g_assert(endptr == str + strlen(str));
 
     endptr = &f;
     res = 999;
-    err = qemu_strtoull(str, &endptr, 0, &res);
+    err = qemu_strtou64(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, 0123);
+    g_assert_cmpuint(res, ==, 0123);
     g_assert(endptr == str + strlen(str));
 }
 
-static void test_qemu_strtoull_decimal(void)
+static void test_qemu_strtou64_decimal(void)
 {
     const char *str = "0123";
     char f = 'X';
@@ -1187,23 +1199,23 @@ static void test_qemu_strtoull_decimal(void)
     uint64_t res = 999;
     int err;
 
-    err = qemu_strtoull(str, &endptr, 10, &res);
+    err = qemu_strtou64(str, &endptr, 10, &res);
 
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, 123);
+    g_assert_cmpuint(res, ==, 123);
     g_assert(endptr == str + strlen(str));
 
     str = "123";
     endptr = &f;
     res = 999;
-    err = qemu_strtoull(str, &endptr, 0, &res);
+    err = qemu_strtou64(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, 123);
+    g_assert_cmpuint(res, ==, 123);
     g_assert(endptr == str + strlen(str));
 }
 
-static void test_qemu_strtoull_hex(void)
+static void test_qemu_strtou64_hex(void)
 {
     const char *str = "0123";
     char f = 'X';
@@ -1211,23 +1223,23 @@ static void test_qemu_strtoull_hex(void)
     uint64_t res = 999;
     int err;
 
-    err = qemu_strtoull(str, &endptr, 16, &res);
+    err = qemu_strtou64(str, &endptr, 16, &res);
 
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, 0x123);
+    g_assert_cmphex(res, ==, 0x123);
     g_assert(endptr == str + strlen(str));
 
     str = "0x123";
     endptr = &f;
     res = 999;
-    err = qemu_strtoull(str, &endptr, 0, &res);
+    err = qemu_strtou64(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, 0x123);
+    g_assert_cmphex(res, ==, 0x123);
     g_assert(endptr == str + strlen(str));
 }
 
-static void test_qemu_strtoull_max(void)
+static void test_qemu_strtou64_max(void)
 {
     char *str = g_strdup_printf("%llu", ULLONG_MAX);
     char f = 'X';
@@ -1235,15 +1247,15 @@ static void test_qemu_strtoull_max(void)
     uint64_t res = 999;
     int err;
 
-    err = qemu_strtoull(str, &endptr, 0, &res);
+    err = qemu_strtou64(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, ULLONG_MAX);
+    g_assert_cmphex(res, ==, ULLONG_MAX);
     g_assert(endptr == str + strlen(str));
     g_free(str);
 }
 
-static void test_qemu_strtoull_overflow(void)
+static void test_qemu_strtou64_overflow(void)
 {
     const char *str = "99999999999999999999999999999999999999999999";
     char f = 'X';
@@ -1251,14 +1263,14 @@ static void test_qemu_strtoull_overflow(void)
     uint64_t res = 999;
     int err;
 
-    err = qemu_strtoull(str, &endptr, 0, &res);
+    err = qemu_strtou64(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, -ERANGE);
-    g_assert_cmpint(res, ==, ULLONG_MAX);
+    g_assert_cmphex(res, ==, ULLONG_MAX);
     g_assert(endptr == str + strlen(str));
 }
 
-static void test_qemu_strtoull_underflow(void)
+static void test_qemu_strtou64_underflow(void)
 {
     const char *str = "-99999999999999999999999999999999999999999999";
     char f = 'X';
@@ -1266,14 +1278,14 @@ static void test_qemu_strtoull_underflow(void)
     uint64_t res = 999;
     int err;
 
-    err  = qemu_strtoull(str, &endptr, 0, &res);
+    err  = qemu_strtou64(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, -ERANGE);
-    g_assert_cmpint(res, ==, -1);
+    g_assert_cmphex(res, ==, -1ull);
     g_assert(endptr == str + strlen(str));
 }
 
-static void test_qemu_strtoull_negative(void)
+static void test_qemu_strtou64_negative(void)
 {
     const char *str = "  \t -321";
     char f = 'X';
@@ -1281,94 +1293,139 @@ static void test_qemu_strtoull_negative(void)
     uint64_t res = 999;
     int err;
 
-    err = qemu_strtoull(str, &endptr, 0, &res);
+    err = qemu_strtou64(str, &endptr, 0, &res);
 
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, -321);
+    g_assert_cmpuint(res, ==, -321ull);
     g_assert(endptr == str + strlen(str));
 }
 
-static void test_qemu_strtoull_full_correct(void)
+static void test_qemu_strtou64_full_correct(void)
 {
     const char *str = "18446744073709551614";
     uint64_t res = 999;
     int err;
 
-    err = qemu_strtoull(str, NULL, 0, &res);
+    err = qemu_strtou64(str, NULL, 0, &res);
 
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, 18446744073709551614LLU);
+    g_assert_cmpuint(res, ==, 18446744073709551614ull);
 }
 
-static void test_qemu_strtoull_full_null(void)
+static void test_qemu_strtou64_full_null(void)
 {
     uint64_t res = 999;
     int err;
 
-    err = qemu_strtoull(NULL, NULL, 0, &res);
+    err = qemu_strtou64(NULL, NULL, 0, &res);
 
     g_assert_cmpint(err, ==, -EINVAL);
 }
 
-static void test_qemu_strtoull_full_empty(void)
+static void test_qemu_strtou64_full_empty(void)
 {
     const char *str = "";
     uint64_t res = 999;
     int err;
 
-    err = qemu_strtoull(str, NULL, 0, &res);
+    err = qemu_strtou64(str, NULL, 0, &res);
 
     g_assert_cmpint(err, ==, -EINVAL);
 }
 
-static void test_qemu_strtoull_full_negative(void)
+static void test_qemu_strtou64_full_negative(void)
 {
     const char *str = " \t -321";
     uint64_t res = 999;
     int err;
 
-    err = qemu_strtoull(str, NULL, 0, &res);
+    err = qemu_strtou64(str, NULL, 0, &res);
 
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, 18446744073709551295LLU);
+    g_assert_cmpuint(res, ==, -321ull);
 }
 
-static void test_qemu_strtoull_full_trailing(void)
+static void test_qemu_strtou64_full_trailing(void)
 {
     const char *str = "18446744073709551614xxxxxx";
     uint64_t res = 999;
     int err;
 
-    err = qemu_strtoull(str, NULL, 0, &res);
+    err = qemu_strtou64(str, NULL, 0, &res);
 
     g_assert_cmpint(err, ==, -EINVAL);
 }
 
-static void test_qemu_strtoull_full_max(void)
+static void test_qemu_strtou64_full_max(void)
 {
     char *str = g_strdup_printf("%lld", ULLONG_MAX);
     uint64_t res = 999;
     int err;
 
-    err = qemu_strtoull(str, NULL, 0, &res);
+    err = qemu_strtou64(str, NULL, 0, &res);
 
     g_assert_cmpint(err, ==, 0);
-    g_assert_cmpint(res, ==, ULLONG_MAX);
+    g_assert_cmphex(res, ==, ULLONG_MAX);
     g_free(str);
 }
 
 static void test_qemu_strtosz_simple(void)
 {
-    const char *str = "12345M";
+    const char *str;
     char *endptr = NULL;
-    int64_t res;
+    int err;
+    uint64_t res = 0xbaadf00d;
 
-    res = qemu_strtosz(str, &endptr);
-    g_assert_cmpint(res, ==, 12345 * M_BYTE);
-    g_assert(endptr == str + 6);
+    str = "0";
+    err = qemu_strtosz(str, &endptr, &res);
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 0);
+    g_assert(endptr == str + 1);
+
+    str = "12345";
+    err = qemu_strtosz(str, &endptr, &res);
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 12345);
+    g_assert(endptr == str + 5);
+
+    err = qemu_strtosz(str, NULL, &res);
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 12345);
+
+    /* Note: precision is 53 bits since we're parsing with strtod() */
 
-    res = qemu_strtosz(str, NULL);
-    g_assert_cmpint(res, ==, 12345 * M_BYTE);
+    str = "9007199254740991"; /* 2^53-1 */
+    err = qemu_strtosz(str, &endptr, &res);
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 0x1fffffffffffff);
+    g_assert(endptr == str + 16);
+
+    str = "9007199254740992"; /* 2^53 */
+    err = qemu_strtosz(str, &endptr, &res);
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 0x20000000000000);
+    g_assert(endptr == str + 16);
+
+    str = "9007199254740993"; /* 2^53+1 */
+    err = qemu_strtosz(str, &endptr, &res);
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 0x20000000000000); /* rounded to 53 bits */
+    g_assert(endptr == str + 16);
+
+    str = "18446744073709549568"; /* 0xfffffffffffff800 (53 msbs set) */
+    err = qemu_strtosz(str, &endptr, &res);
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 0xfffffffffffff800);
+    g_assert(endptr == str + 20);
+
+    str = "18446744073709550591"; /* 0xfffffffffffffbff */
+    err = qemu_strtosz(str, &endptr, &res);
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 0xfffffffffffff800); /* rounded to 53 bits */
+    g_assert(endptr == str + 20);
+
+    /* 0x7ffffffffffffe00..0x7fffffffffffffff get rounded to
+     * 0x8000000000000000, thus -ERANGE; see test_qemu_strtosz_erange() */
 }
 
 static void test_qemu_strtosz_units(void)
@@ -1381,60 +1438,157 @@ static void test_qemu_strtosz_units(void)
     const char *t = "1T";
     const char *p = "1P";
     const char *e = "1E";
-    int64_t res;
+    int err;
+    char *endptr = NULL;
+    uint64_t res = 0xbaadf00d;
 
     /* default is M */
-    res = qemu_strtosz(none, NULL);
+    err = qemu_strtosz_MiB(none, &endptr, &res);
+    g_assert_cmpint(err, ==, 0);
     g_assert_cmpint(res, ==, M_BYTE);
+    g_assert(endptr == none + 1);
 
-    res = qemu_strtosz(b, NULL);
+    err = qemu_strtosz(b, &endptr, &res);
+    g_assert_cmpint(err, ==, 0);
     g_assert_cmpint(res, ==, 1);
+    g_assert(endptr == b + 2);
 
-    res = qemu_strtosz(k, NULL);
+    err = qemu_strtosz(k, &endptr, &res);
+    g_assert_cmpint(err, ==, 0);
     g_assert_cmpint(res, ==, K_BYTE);
+    g_assert(endptr == k + 2);
 
-    res = qemu_strtosz(m, NULL);
+    err = qemu_strtosz(m, &endptr, &res);
+    g_assert_cmpint(err, ==, 0);
     g_assert_cmpint(res, ==, M_BYTE);
+    g_assert(endptr == m + 2);
 
-    res = qemu_strtosz(g, NULL);
+    err = qemu_strtosz(g, &endptr, &res);
+    g_assert_cmpint(err, ==, 0);
     g_assert_cmpint(res, ==, G_BYTE);
+    g_assert(endptr == g + 2);
 
-    res = qemu_strtosz(t, NULL);
+    err = qemu_strtosz(t, &endptr, &res);
+    g_assert_cmpint(err, ==, 0);
     g_assert_cmpint(res, ==, T_BYTE);
+    g_assert(endptr == t + 2);
 
-    res = qemu_strtosz(p, NULL);
+    err = qemu_strtosz(p, &endptr, &res);
+    g_assert_cmpint(err, ==, 0);
     g_assert_cmpint(res, ==, P_BYTE);
+    g_assert(endptr == p + 2);
 
-    res = qemu_strtosz(e, NULL);
+    err = qemu_strtosz(e, &endptr, &res);
+    g_assert_cmpint(err, ==, 0);
     g_assert_cmpint(res, ==, E_BYTE);
+    g_assert(endptr == e + 2);
 }
 
 static void test_qemu_strtosz_float(void)
 {
     const char *str = "12.345M";
-    int64_t res;
+    int err;
+    char *endptr = NULL;
+    uint64_t res = 0xbaadf00d;
 
-    res = qemu_strtosz(str, NULL);
+    err = qemu_strtosz(str, &endptr, &res);
+    g_assert_cmpint(err, ==, 0);
     g_assert_cmpint(res, ==, 12.345 * M_BYTE);
+    g_assert(endptr == str + 7);
+}
+
+static void test_qemu_strtosz_invalid(void)
+{
+    const char *str;
+    char *endptr = NULL;
+    int err;
+    uint64_t res = 0xbaadf00d;
+
+    str = "";
+    err = qemu_strtosz(str, &endptr, &res);
+    g_assert_cmpint(err, ==, -EINVAL);
+    g_assert(endptr == str);
+
+    str = " \t ";
+    err = qemu_strtosz(str, &endptr, &res);
+    g_assert_cmpint(err, ==, -EINVAL);
+    g_assert(endptr == str);
+
+    str = "crap";
+    err = qemu_strtosz(str, &endptr, &res);
+    g_assert_cmpint(err, ==, -EINVAL);
+    g_assert(endptr == str);
+}
+
+static void test_qemu_strtosz_trailing(void)
+{
+    const char *str;
+    char *endptr = NULL;
+    int err;
+    uint64_t res = 0xbaadf00d;
+
+    str = "123xxx";
+    err = qemu_strtosz_MiB(str, &endptr, &res);
+    g_assert_cmpint(res, ==, 123 * M_BYTE);
+    g_assert(endptr == str + 3);
+
+    err = qemu_strtosz(str, NULL, &res);
+    g_assert_cmpint(err, ==, -EINVAL);
+
+    str = "1kiB";
+    err = qemu_strtosz(str, &endptr, &res);
+    g_assert_cmpint(err, ==, 0);
+    g_assert_cmpint(res, ==, 1024);
+    g_assert(endptr == str + 2);
+
+    err = qemu_strtosz(str, NULL, &res);
+    g_assert_cmpint(err, ==, -EINVAL);
 }
 
 static void test_qemu_strtosz_erange(void)
 {
-    const char *str = "10E";
-    int64_t res;
+    const char *str;
+    char *endptr = NULL;
+    int err;
+    uint64_t res = 0xbaadf00d;
+
+    str = "-1";
+    err = qemu_strtosz(str, &endptr, &res);
+    g_assert_cmpint(err, ==, -ERANGE);
+    g_assert(endptr == str + 2);
+
+    str = "18446744073709550592"; /* 0xfffffffffffffc00 */
+    err = qemu_strtosz(str, &endptr, &res);
+    g_assert_cmpint(err, ==, -ERANGE);
+    g_assert(endptr == str + 20);
+
+    str = "18446744073709551615"; /* 2^64-1 */
+    err = qemu_strtosz(str, &endptr, &res);
+    g_assert_cmpint(err, ==, -ERANGE);
+    g_assert(endptr == str + 20);
+
+    str = "18446744073709551616"; /* 2^64 */
+    err = qemu_strtosz(str, &endptr, &res);
+    g_assert_cmpint(err, ==, -ERANGE);
+    g_assert(endptr == str + 20);
 
-    res = qemu_strtosz(str, NULL);
-    g_assert_cmpint(res, ==, -ERANGE);
+    str = "20E";
+    err = qemu_strtosz(str, &endptr, &res);
+    g_assert_cmpint(err, ==, -ERANGE);
+    g_assert(endptr == str + 3);
 }
 
-static void test_qemu_strtosz_suffix_unit(void)
+static void test_qemu_strtosz_metric(void)
 {
-    const char *str = "12345";
-    int64_t res;
+    const char *str = "12345k";
+    int err;
+    char *endptr = NULL;
+    uint64_t res = 0xbaadf00d;
 
-    res = qemu_strtosz_suffix_unit(str, NULL,
-                                   QEMU_STRTOSZ_DEFSUFFIX_KB, 1000);
+    err = qemu_strtosz_metric(str, &endptr, &res);
+    g_assert_cmpint(err, ==, 0);
     g_assert_cmpint(res, ==, 12345000);
+    g_assert(endptr == str + 6);
 }
 
 int main(int argc, char **argv)
@@ -1459,21 +1613,32 @@ int main(int argc, char **argv)
                     test_parse_uint_full_correct);
 
     /* qemu_strtol() tests */
-    g_test_add_func("/cutils/qemu_strtol/correct", test_qemu_strtol_correct);
-    g_test_add_func("/cutils/qemu_strtol/null", test_qemu_strtol_null);
-    g_test_add_func("/cutils/qemu_strtol/empty", test_qemu_strtol_empty);
+    g_test_add_func("/cutils/qemu_strtol/correct",
+                    test_qemu_strtol_correct);
+    g_test_add_func("/cutils/qemu_strtol/null",
+                    test_qemu_strtol_null);
+    g_test_add_func("/cutils/qemu_strtol/empty",
+                    test_qemu_strtol_empty);
     g_test_add_func("/cutils/qemu_strtol/whitespace",
                     test_qemu_strtol_whitespace);
-    g_test_add_func("/cutils/qemu_strtol/invalid", test_qemu_strtol_invalid);
-    g_test_add_func("/cutils/qemu_strtol/trailing", test_qemu_strtol_trailing);
-    g_test_add_func("/cutils/qemu_strtol/octal", test_qemu_strtol_octal);
-    g_test_add_func("/cutils/qemu_strtol/decimal", test_qemu_strtol_decimal);
-    g_test_add_func("/cutils/qemu_strtol/hex", test_qemu_strtol_hex);
-    g_test_add_func("/cutils/qemu_strtol/max", test_qemu_strtol_max);
-    g_test_add_func("/cutils/qemu_strtol/overflow", test_qemu_strtol_overflow);
+    g_test_add_func("/cutils/qemu_strtol/invalid",
+                    test_qemu_strtol_invalid);
+    g_test_add_func("/cutils/qemu_strtol/trailing",
+                    test_qemu_strtol_trailing);
+    g_test_add_func("/cutils/qemu_strtol/octal",
+                    test_qemu_strtol_octal);
+    g_test_add_func("/cutils/qemu_strtol/decimal",
+                    test_qemu_strtol_decimal);
+    g_test_add_func("/cutils/qemu_strtol/hex",
+                    test_qemu_strtol_hex);
+    g_test_add_func("/cutils/qemu_strtol/max",
+                    test_qemu_strtol_max);
+    g_test_add_func("/cutils/qemu_strtol/overflow",
+                    test_qemu_strtol_overflow);
     g_test_add_func("/cutils/qemu_strtol/underflow",
                     test_qemu_strtol_underflow);
-    g_test_add_func("/cutils/qemu_strtol/negative", test_qemu_strtol_negative);
+    g_test_add_func("/cutils/qemu_strtol/negative",
+                    test_qemu_strtol_negative);
     g_test_add_func("/cutils/qemu_strtol_full/correct",
                     test_qemu_strtol_full_correct);
     g_test_add_func("/cutils/qemu_strtol_full/null",
@@ -1488,18 +1653,26 @@ int main(int argc, char **argv)
                     test_qemu_strtol_full_max);
 
     /* qemu_strtoul() tests */
-    g_test_add_func("/cutils/qemu_strtoul/correct", test_qemu_strtoul_correct);
-    g_test_add_func("/cutils/qemu_strtoul/null", test_qemu_strtoul_null);
-    g_test_add_func("/cutils/qemu_strtoul/empty", test_qemu_strtoul_empty);
+    g_test_add_func("/cutils/qemu_strtoul/correct",
+                    test_qemu_strtoul_correct);
+    g_test_add_func("/cutils/qemu_strtoul/null",
+                    test_qemu_strtoul_null);
+    g_test_add_func("/cutils/qemu_strtoul/empty",
+                    test_qemu_strtoul_empty);
     g_test_add_func("/cutils/qemu_strtoul/whitespace",
                     test_qemu_strtoul_whitespace);
-    g_test_add_func("/cutils/qemu_strtoul/invalid", test_qemu_strtoul_invalid);
+    g_test_add_func("/cutils/qemu_strtoul/invalid",
+                    test_qemu_strtoul_invalid);
     g_test_add_func("/cutils/qemu_strtoul/trailing",
                     test_qemu_strtoul_trailing);
-    g_test_add_func("/cutils/qemu_strtoul/octal", test_qemu_strtoul_octal);
-    g_test_add_func("/cutils/qemu_strtoul/decimal", test_qemu_strtoul_decimal);
-    g_test_add_func("/cutils/qemu_strtoul/hex", test_qemu_strtoul_hex);
-    g_test_add_func("/cutils/qemu_strtoul/max", test_qemu_strtoul_max);
+    g_test_add_func("/cutils/qemu_strtoul/octal",
+                    test_qemu_strtoul_octal);
+    g_test_add_func("/cutils/qemu_strtoul/decimal",
+                    test_qemu_strtoul_decimal);
+    g_test_add_func("/cutils/qemu_strtoul/hex",
+                    test_qemu_strtoul_hex);
+    g_test_add_func("/cutils/qemu_strtoul/max",
+                    test_qemu_strtoul_max);
     g_test_add_func("/cutils/qemu_strtoul/overflow",
                     test_qemu_strtoul_overflow);
     g_test_add_func("/cutils/qemu_strtoul/underflow",
@@ -1519,73 +1692,86 @@ int main(int argc, char **argv)
     g_test_add_func("/cutils/qemu_strtoul_full/max",
                     test_qemu_strtoul_full_max);
 
-    /* qemu_strtoll() tests */
-    g_test_add_func("/cutils/qemu_strtoll/correct", test_qemu_strtoll_correct);
-    g_test_add_func("/cutils/qemu_strtoll/null", test_qemu_strtoll_null);
-    g_test_add_func("/cutils/qemu_strtoll/empty", test_qemu_strtoll_empty);
-    g_test_add_func("/cutils/qemu_strtoll/whitespace",
-                    test_qemu_strtoll_whitespace);
-    g_test_add_func("/cutils/qemu_strtoll/invalid", test_qemu_strtoll_invalid);
-    g_test_add_func("/cutils/qemu_strtoll/trailing",
-                    test_qemu_strtoll_trailing);
-    g_test_add_func("/cutils/qemu_strtoll/octal", test_qemu_strtoll_octal);
-    g_test_add_func("/cutils/qemu_strtoll/decimal", test_qemu_strtoll_decimal);
-    g_test_add_func("/cutils/qemu_strtoll/hex", test_qemu_strtoll_hex);
-    g_test_add_func("/cutils/qemu_strtoll/max", test_qemu_strtoll_max);
-    g_test_add_func("/cutils/qemu_strtoll/overflow",
-                    test_qemu_strtoll_overflow);
-    g_test_add_func("/cutils/qemu_strtoll/underflow",
-                    test_qemu_strtoll_underflow);
-    g_test_add_func("/cutils/qemu_strtoll/negative",
-                    test_qemu_strtoll_negative);
-    g_test_add_func("/cutils/qemu_strtoll_full/correct",
-                    test_qemu_strtoll_full_correct);
-    g_test_add_func("/cutils/qemu_strtoll_full/null",
-                    test_qemu_strtoll_full_null);
-    g_test_add_func("/cutils/qemu_strtoll_full/empty",
-                    test_qemu_strtoll_full_empty);
-    g_test_add_func("/cutils/qemu_strtoll_full/negative",
-                    test_qemu_strtoll_full_negative);
-    g_test_add_func("/cutils/qemu_strtoll_full/trailing",
-                    test_qemu_strtoll_full_trailing);
-    g_test_add_func("/cutils/qemu_strtoll_full/max",
-                    test_qemu_strtoll_full_max);
-
-    /* qemu_strtoull() tests */
-    g_test_add_func("/cutils/qemu_strtoull/correct",
-                    test_qemu_strtoull_correct);
-    g_test_add_func("/cutils/qemu_strtoull/null",
-                    test_qemu_strtoull_null);
-    g_test_add_func("/cutils/qemu_strtoull/empty", test_qemu_strtoull_empty);
-    g_test_add_func("/cutils/qemu_strtoull/whitespace",
-                    test_qemu_strtoull_whitespace);
-    g_test_add_func("/cutils/qemu_strtoull/invalid",
-                    test_qemu_strtoull_invalid);
-    g_test_add_func("/cutils/qemu_strtoull/trailing",
-                    test_qemu_strtoull_trailing);
-    g_test_add_func("/cutils/qemu_strtoull/octal", test_qemu_strtoull_octal);
-    g_test_add_func("/cutils/qemu_strtoull/decimal",
-                    test_qemu_strtoull_decimal);
-    g_test_add_func("/cutils/qemu_strtoull/hex", test_qemu_strtoull_hex);
-    g_test_add_func("/cutils/qemu_strtoull/max", test_qemu_strtoull_max);
-    g_test_add_func("/cutils/qemu_strtoull/overflow",
-                    test_qemu_strtoull_overflow);
-    g_test_add_func("/cutils/qemu_strtoull/underflow",
-                    test_qemu_strtoull_underflow);
-    g_test_add_func("/cutils/qemu_strtoull/negative",
-                    test_qemu_strtoull_negative);
-    g_test_add_func("/cutils/qemu_strtoull_full/correct",
-                    test_qemu_strtoull_full_correct);
-    g_test_add_func("/cutils/qemu_strtoull_full/null",
-                    test_qemu_strtoull_full_null);
-    g_test_add_func("/cutils/qemu_strtoull_full/empty",
-                    test_qemu_strtoull_full_empty);
-    g_test_add_func("/cutils/qemu_strtoull_full/negative",
-                    test_qemu_strtoull_full_negative);
-    g_test_add_func("/cutils/qemu_strtoull_full/trailing",
-                    test_qemu_strtoull_full_trailing);
-    g_test_add_func("/cutils/qemu_strtoull_full/max",
-                    test_qemu_strtoull_full_max);
+    /* qemu_strtoi64() tests */
+    g_test_add_func("/cutils/qemu_strtoi64/correct",
+                    test_qemu_strtoi64_correct);
+    g_test_add_func("/cutils/qemu_strtoi64/null",
+                    test_qemu_strtoi64_null);
+    g_test_add_func("/cutils/qemu_strtoi64/empty",
+                    test_qemu_strtoi64_empty);
+    g_test_add_func("/cutils/qemu_strtoi64/whitespace",
+                    test_qemu_strtoi64_whitespace);
+    g_test_add_func("/cutils/qemu_strtoi64/invalid"
+                    ,
+                    test_qemu_strtoi64_invalid);
+    g_test_add_func("/cutils/qemu_strtoi64/trailing",
+                    test_qemu_strtoi64_trailing);
+    g_test_add_func("/cutils/qemu_strtoi64/octal",
+                    test_qemu_strtoi64_octal);
+    g_test_add_func("/cutils/qemu_strtoi64/decimal",
+                    test_qemu_strtoi64_decimal);
+    g_test_add_func("/cutils/qemu_strtoi64/hex",
+                    test_qemu_strtoi64_hex);
+    g_test_add_func("/cutils/qemu_strtoi64/max",
+                    test_qemu_strtoi64_max);
+    g_test_add_func("/cutils/qemu_strtoi64/overflow",
+                    test_qemu_strtoi64_overflow);
+    g_test_add_func("/cutils/qemu_strtoi64/underflow",
+                    test_qemu_strtoi64_underflow);
+    g_test_add_func("/cutils/qemu_strtoi64/negative",
+                    test_qemu_strtoi64_negative);
+    g_test_add_func("/cutils/qemu_strtoi64_full/correct",
+                    test_qemu_strtoi64_full_correct);
+    g_test_add_func("/cutils/qemu_strtoi64_full/null",
+                    test_qemu_strtoi64_full_null);
+    g_test_add_func("/cutils/qemu_strtoi64_full/empty",
+                    test_qemu_strtoi64_full_empty);
+    g_test_add_func("/cutils/qemu_strtoi64_full/negative",
+                    test_qemu_strtoi64_full_negative);
+    g_test_add_func("/cutils/qemu_strtoi64_full/trailing",
+                    test_qemu_strtoi64_full_trailing);
+    g_test_add_func("/cutils/qemu_strtoi64_full/max",
+                    test_qemu_strtoi64_full_max);
+
+    /* qemu_strtou64() tests */
+    g_test_add_func("/cutils/qemu_strtou64/correct",
+                    test_qemu_strtou64_correct);
+    g_test_add_func("/cutils/qemu_strtou64/null",
+                    test_qemu_strtou64_null);
+    g_test_add_func("/cutils/qemu_strtou64/empty",
+                    test_qemu_strtou64_empty);
+    g_test_add_func("/cutils/qemu_strtou64/whitespace",
+                    test_qemu_strtou64_whitespace);
+    g_test_add_func("/cutils/qemu_strtou64/invalid",
+                    test_qemu_strtou64_invalid);
+    g_test_add_func("/cutils/qemu_strtou64/trailing",
+                    test_qemu_strtou64_trailing);
+    g_test_add_func("/cutils/qemu_strtou64/octal",
+                    test_qemu_strtou64_octal);
+    g_test_add_func("/cutils/qemu_strtou64/decimal",
+                    test_qemu_strtou64_decimal);
+    g_test_add_func("/cutils/qemu_strtou64/hex",
+                    test_qemu_strtou64_hex);
+    g_test_add_func("/cutils/qemu_strtou64/max",
+                    test_qemu_strtou64_max);
+    g_test_add_func("/cutils/qemu_strtou64/overflow",
+                    test_qemu_strtou64_overflow);
+    g_test_add_func("/cutils/qemu_strtou64/underflow",
+                    test_qemu_strtou64_underflow);
+    g_test_add_func("/cutils/qemu_strtou64/negative",
+                    test_qemu_strtou64_negative);
+    g_test_add_func("/cutils/qemu_strtou64_full/correct",
+                    test_qemu_strtou64_full_correct);
+    g_test_add_func("/cutils/qemu_strtou64_full/null",
+                    test_qemu_strtou64_full_null);
+    g_test_add_func("/cutils/qemu_strtou64_full/empty",
+                    test_qemu_strtou64_full_empty);
+    g_test_add_func("/cutils/qemu_strtou64_full/negative",
+                    test_qemu_strtou64_full_negative);
+    g_test_add_func("/cutils/qemu_strtou64_full/trailing",
+                    test_qemu_strtou64_full_trailing);
+    g_test_add_func("/cutils/qemu_strtou64_full/max",
+                    test_qemu_strtou64_full_max);
 
     g_test_add_func("/cutils/strtosz/simple",
                     test_qemu_strtosz_simple);
@@ -1593,10 +1779,14 @@ int main(int argc, char **argv)
                     test_qemu_strtosz_units);
     g_test_add_func("/cutils/strtosz/float",
                     test_qemu_strtosz_float);
+    g_test_add_func("/cutils/strtosz/invalid",
+                    test_qemu_strtosz_invalid);
+    g_test_add_func("/cutils/strtosz/trailing",
+                    test_qemu_strtosz_trailing);
     g_test_add_func("/cutils/strtosz/erange",
                     test_qemu_strtosz_erange);
-    g_test_add_func("/cutils/strtosz/suffix-unit",
-                    test_qemu_strtosz_suffix_unit);
+    g_test_add_func("/cutils/strtosz/metric",
+                    test_qemu_strtosz_metric);
 
     return g_test_run();
 }
diff --git a/tests/test-filter-mirror.c b/tests/test-filter-mirror.c
index ffaaffabd0..9f84402493 100644
--- a/tests/test-filter-mirror.c
+++ b/tests/test-filter-mirror.c
@@ -57,7 +57,7 @@ static void test_mirror(void)
     };
 
     /* send a qmp command to guarantee that 'connected' is setting to true. */
-    qmp("{ 'execute' : 'query-status'}");
+    qmp_discard_response("{ 'execute' : 'query-status'}");
     ret = iov_send(send_sock[0], iov, 2, 0, sizeof(size) + sizeof(send_buf));
     g_assert_cmpint(ret, ==, sizeof(send_buf) + sizeof(size));
     close(send_sock[0]);
diff --git a/tests/test-filter-redirector.c b/tests/test-filter-redirector.c
index c63b68f03a..0c4b8d52ef 100644
--- a/tests/test-filter-redirector.c
+++ b/tests/test-filter-redirector.c
@@ -99,7 +99,7 @@ static void test_redirector_tx(void)
     g_assert_cmpint(recv_sock, !=, -1);
 
     /* send a qmp command to guarantee that 'connected' is setting to true. */
-    qmp("{ 'execute' : 'query-status'}");
+    qmp_discard_response("{ 'execute' : 'query-status'}");
 
     struct iovec iov[] = {
         {
@@ -184,7 +184,7 @@ static void test_redirector_rx(void)
     send_sock = unix_connect(sock_path1, NULL);
     g_assert_cmpint(send_sock, !=, -1);
     /* send a qmp command to guarantee that 'connected' is setting to true. */
-    qmp("{ 'execute' : 'query-status'}");
+    qmp_discard_response("{ 'execute' : 'query-status'}");
 
     ret = iov_send(send_sock, iov, 2, 0, sizeof(size) + sizeof(send_buf));
     g_assert_cmpint(ret, ==, sizeof(send_buf) + sizeof(size));
diff --git a/tests/test-io-channel-command.c b/tests/test-io-channel-command.c
index 1d1f461bed..46ce1ff01c 100644
--- a/tests/test-io-channel-command.c
+++ b/tests/test-io-channel-command.c
@@ -29,8 +29,8 @@ static void test_io_channel_command_fifo(bool async)
 #define TEST_FIFO "tests/test-io-channel-command.fifo"
     QIOChannel *src, *dst;
     QIOChannelTest *test;
-    char *srcfifo = g_strdup_printf("PIPE:%s,wronly", TEST_FIFO);
-    char *dstfifo = g_strdup_printf("PIPE:%s,rdonly", TEST_FIFO);
+    const char *srcfifo = "PIPE:" TEST_FIFO ",wronly";
+    const char *dstfifo = "PIPE:" TEST_FIFO ",rdonly";
     const char *srcargv[] = {
         "/bin/socat", "-", srcfifo, NULL,
     };
@@ -59,8 +59,6 @@ static void test_io_channel_command_fifo(bool async)
     object_unref(OBJECT(src));
     object_unref(OBJECT(dst));
 
-    g_free(srcfifo);
-    g_free(dstfifo);
     unlink(TEST_FIFO);
 }
 
diff --git a/tests/test-qemu-opts.c b/tests/test-qemu-opts.c
index a505a3e059..c46ef31658 100644
--- a/tests/test-qemu-opts.c
+++ b/tests/test-qemu-opts.c
@@ -8,6 +8,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/cutils.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qstring.h"
 #include "qemu/config-file.h"
@@ -29,6 +30,9 @@ static QemuOptsList opts_list_01 = {
         },{
             .name = "number1",
             .type = QEMU_OPT_NUMBER,
+        },{
+            .name = "number2",
+            .type = QEMU_OPT_NUMBER,
         },
         { /* end of list */ }
     },
@@ -42,14 +46,23 @@ static QemuOptsList opts_list_02 = {
             .name = "str1",
             .type = QEMU_OPT_STRING,
         },{
+            .name = "str2",
+            .type = QEMU_OPT_STRING,
+        },{
             .name = "bool1",
             .type = QEMU_OPT_BOOL,
         },{
-            .name = "str2",
-            .type = QEMU_OPT_STRING,
+            .name = "bool2",
+            .type = QEMU_OPT_BOOL,
         },{
             .name = "size1",
             .type = QEMU_OPT_SIZE,
+        },{
+            .name = "size2",
+            .type = QEMU_OPT_SIZE,
+        },{
+            .name = "size3",
+            .type = QEMU_OPT_SIZE,
         },
         { /* end of list */ }
     },
@@ -57,6 +70,7 @@ static QemuOptsList opts_list_02 = {
 
 static QemuOptsList opts_list_03 = {
     .name = "opts_list_03",
+    .implied_opt_name = "implied",
     .head = QTAILQ_HEAD_INITIALIZER(opts_list_03.head),
     .desc = {
         /* no elements => accept any params */
@@ -421,6 +435,308 @@ static void test_qemu_opts_set(void)
     g_assert(opts == NULL);
 }
 
+static int opts_count_iter(void *opaque, const char *name, const char *value,
+                           Error **errp)
+{
+    (*(size_t *)opaque)++;
+    return 0;
+}
+
+static size_t opts_count(QemuOpts *opts)
+{
+    size_t n = 0;
+
+    qemu_opt_foreach(opts, opts_count_iter, &n, NULL);
+    return n;
+}
+
+static void test_opts_parse(void)
+{
+    Error *err = NULL;
+    QemuOpts *opts;
+    char long_key[129];
+    char *params;
+
+    /* Nothing */
+    opts = qemu_opts_parse(&opts_list_03, "", false, &error_abort);
+    g_assert_cmpuint(opts_count(opts), ==, 0);
+
+    /* Empty key */
+    opts = qemu_opts_parse(&opts_list_03, "=val", false, &error_abort);
+    g_assert_cmpuint(opts_count(opts), ==, 1);
+    g_assert_cmpstr(qemu_opt_get(opts, ""), ==, "val");
+
+    /* Long key */
+    memset(long_key, 'a', 127);
+    long_key[127] = 'z';
+    long_key[128] = 0;
+    params = g_strdup_printf("%s=v", long_key);
+    opts = qemu_opts_parse(&opts_list_03, params + 1, NULL, &error_abort);
+    g_assert_cmpuint(opts_count(opts), ==, 1);
+    g_assert_cmpstr(qemu_opt_get(opts, long_key + 1), ==, "v");
+
+    /* Overlong key gets truncated */
+    opts = qemu_opts_parse(&opts_list_03, params, NULL, &error_abort);
+    g_assert(opts_count(opts) == 1);
+    long_key[127] = 0;
+    g_assert_cmpstr(qemu_opt_get(opts, long_key), ==, "v");
+    g_free(params);
+
+    /* Multiple keys, last one wins */
+    opts = qemu_opts_parse(&opts_list_03, "a=1,b=2,,x,a=3",
+                           false, &error_abort);
+    g_assert_cmpuint(opts_count(opts), ==, 3);
+    g_assert_cmpstr(qemu_opt_get(opts, "a"), ==, "3");
+    g_assert_cmpstr(qemu_opt_get(opts, "b"), ==, "2,x");
+
+    /* Except when it doesn't */
+    opts = qemu_opts_parse(&opts_list_03, "id=foo,id=bar",
+                           false, &error_abort);
+    g_assert_cmpuint(opts_count(opts), ==, 0);
+    g_assert_cmpstr(qemu_opts_id(opts), ==, "foo");
+
+    /* TODO Cover low-level access to repeated keys */
+
+    /* Trailing comma is ignored */
+    opts = qemu_opts_parse(&opts_list_03, "x=y,", false, &error_abort);
+    g_assert_cmpuint(opts_count(opts), ==, 1);
+    g_assert_cmpstr(qemu_opt_get(opts, "x"), ==, "y");
+
+    /* Except when it isn't */
+    opts = qemu_opts_parse(&opts_list_03, ",", false, &error_abort);
+    g_assert_cmpuint(opts_count(opts), ==, 1);
+    g_assert_cmpstr(qemu_opt_get(opts, ""), ==, "on");
+
+    /* Duplicate ID */
+    opts = qemu_opts_parse(&opts_list_03, "x=y,id=foo", false, &err);
+    error_free_or_abort(&err);
+    g_assert(!opts);
+    /* TODO Cover .merge_lists = true */
+
+    /* Buggy ID recognition */
+    opts = qemu_opts_parse(&opts_list_03, "x=,,id=bar", false, &error_abort);
+    g_assert_cmpuint(opts_count(opts), ==, 1);
+    g_assert_cmpstr(qemu_opts_id(opts), ==, "bar"); /* BUG */
+    g_assert_cmpstr(qemu_opt_get(opts, "x"), ==, ",id=bar");
+
+    /* Anti-social ID */
+    opts = qemu_opts_parse(&opts_list_01, "id=666", false, &err);
+    error_free_or_abort(&err);
+    g_assert(!opts);
+
+    /* Implied value */
+    opts = qemu_opts_parse(&opts_list_03, "an,noaus,noaus=",
+                           false, &error_abort);
+    g_assert_cmpuint(opts_count(opts), ==, 3);
+    g_assert_cmpstr(qemu_opt_get(opts, "an"), ==, "on");
+    g_assert_cmpstr(qemu_opt_get(opts, "aus"), ==, "off");
+    g_assert_cmpstr(qemu_opt_get(opts, "noaus"), ==, "");
+
+    /* Implied key */
+    opts = qemu_opts_parse(&opts_list_03, "an,noaus,noaus=", true,
+                           &error_abort);
+    g_assert_cmpuint(opts_count(opts), ==, 3);
+    g_assert_cmpstr(qemu_opt_get(opts, "implied"), ==, "an");
+    g_assert_cmpstr(qemu_opt_get(opts, "aus"), ==, "off");
+    g_assert_cmpstr(qemu_opt_get(opts, "noaus"), ==, "");
+
+    /* Implied key with empty value */
+    opts = qemu_opts_parse(&opts_list_03, ",", true, &error_abort);
+    g_assert_cmpuint(opts_count(opts), ==, 1);
+    g_assert_cmpstr(qemu_opt_get(opts, "implied"), ==, "");
+
+    /* Implied key with comma value */
+    opts = qemu_opts_parse(&opts_list_03, ",,,a=1", true, &error_abort);
+    g_assert_cmpuint(opts_count(opts), ==, 2);
+    g_assert_cmpstr(qemu_opt_get(opts, "implied"), ==, ",");
+    g_assert_cmpstr(qemu_opt_get(opts, "a"), ==, "1");
+
+    /* Empty key is not an implied key */
+    opts = qemu_opts_parse(&opts_list_03, "=val", true, &error_abort);
+    g_assert_cmpuint(opts_count(opts), ==, 1);
+    g_assert_cmpstr(qemu_opt_get(opts, ""), ==, "val");
+
+    /* Unknown key */
+    opts = qemu_opts_parse(&opts_list_01, "nonexistent=", false, &err);
+    error_free_or_abort(&err);
+    g_assert(!opts);
+
+    qemu_opts_reset(&opts_list_01);
+    qemu_opts_reset(&opts_list_03);
+}
+
+static void test_opts_parse_bool(void)
+{
+    Error *err = NULL;
+    QemuOpts *opts;
+
+    opts = qemu_opts_parse(&opts_list_02, "bool1=on,bool2=off",
+                           false, &error_abort);
+    g_assert_cmpuint(opts_count(opts), ==, 2);
+    g_assert(qemu_opt_get_bool(opts, "bool1", false));
+    g_assert(!qemu_opt_get_bool(opts, "bool2", true));
+
+    opts = qemu_opts_parse(&opts_list_02, "bool1=offer", false, &err);
+    error_free_or_abort(&err);
+    g_assert(!opts);
+
+    qemu_opts_reset(&opts_list_02);
+}
+
+static void test_opts_parse_number(void)
+{
+    Error *err = NULL;
+    QemuOpts *opts;
+
+    /* Lower limit zero */
+    opts = qemu_opts_parse(&opts_list_01, "number1=0", false, &error_abort);
+    g_assert_cmpuint(opts_count(opts), ==, 1);
+    g_assert_cmpuint(qemu_opt_get_number(opts, "number1", 1), ==, 0);
+
+    /* Upper limit 2^64-1 */
+    opts = qemu_opts_parse(&opts_list_01,
+                           "number1=18446744073709551615,number2=-1",
+                           false, &error_abort);
+    g_assert_cmpuint(opts_count(opts), ==, 2);
+    g_assert_cmphex(qemu_opt_get_number(opts, "number1", 1), ==, UINT64_MAX);
+    g_assert_cmphex(qemu_opt_get_number(opts, "number2", 0), ==, UINT64_MAX);
+
+    /* Above upper limit */
+    opts = qemu_opts_parse(&opts_list_01, "number1=18446744073709551616",
+                           false, &err);
+    error_free_or_abort(&err);
+    g_assert(!opts);
+
+    /* Below lower limit */
+    opts = qemu_opts_parse(&opts_list_01, "number1=-18446744073709551616",
+                           false, &err);
+    error_free_or_abort(&err);
+    g_assert(!opts);
+
+    /* Hex and octal */
+    opts = qemu_opts_parse(&opts_list_01, "number1=0x2a,number2=052",
+                           false, &error_abort);
+    g_assert_cmpuint(opts_count(opts), ==, 2);
+    g_assert_cmpuint(qemu_opt_get_number(opts, "number1", 1), ==, 42);
+    g_assert_cmpuint(qemu_opt_get_number(opts, "number2", 0), ==, 42);
+
+    /* Invalid */
+    opts = qemu_opts_parse(&opts_list_01, "number1=", false, &err);
+    error_free_or_abort(&err);
+    g_assert(!opts);
+    opts = qemu_opts_parse(&opts_list_01, "number1=eins", false, &err);
+    error_free_or_abort(&err);
+    g_assert(!opts);
+
+    /* Leading whitespace */
+    opts = qemu_opts_parse(&opts_list_01, "number1= \t42",
+                           false, &error_abort);
+    g_assert_cmpuint(opts_count(opts), ==, 1);
+    g_assert_cmpuint(qemu_opt_get_number(opts, "number1", 1), ==, 42);
+
+    /* Trailing crap */
+    opts = qemu_opts_parse(&opts_list_01, "number1=3.14", false, &err);
+    error_free_or_abort(&err);
+    g_assert(!opts);
+    opts = qemu_opts_parse(&opts_list_01, "number1=08", false, &err);
+    error_free_or_abort(&err);
+    g_assert(!opts);
+    opts = qemu_opts_parse(&opts_list_01, "number1=0 ", false, &err);
+    error_free_or_abort(&err);
+    g_assert(!opts);
+
+    qemu_opts_reset(&opts_list_01);
+}
+
+static void test_opts_parse_size(void)
+{
+    Error *err = NULL;
+    QemuOpts *opts;
+
+    /* Lower limit zero */
+    opts = qemu_opts_parse(&opts_list_02, "size1=0", false, &error_abort);
+    g_assert_cmpuint(opts_count(opts), ==, 1);
+    g_assert_cmpuint(qemu_opt_get_size(opts, "size1", 1), ==, 0);
+
+    /* Note: precision is 53 bits since we're parsing with strtod() */
+
+    /* Around limit of precision: 2^53-1, 2^53, 2^54 */
+    opts = qemu_opts_parse(&opts_list_02,
+                           "size1=9007199254740991,"
+                           "size2=9007199254740992,"
+                           "size3=9007199254740993",
+                           false, &error_abort);
+    g_assert_cmpuint(opts_count(opts), ==, 3);
+    g_assert_cmphex(qemu_opt_get_size(opts, "size1", 1),
+                     ==, 0x1fffffffffffff);
+    g_assert_cmphex(qemu_opt_get_size(opts, "size2", 1),
+                     ==, 0x20000000000000);
+    g_assert_cmphex(qemu_opt_get_size(opts, "size3", 1),
+                     ==, 0x20000000000000);
+
+    /* Close to signed upper limit 0x7ffffffffffffc00 (53 msbs set) */
+    opts = qemu_opts_parse(&opts_list_02,
+                           "size1=9223372036854774784," /* 7ffffffffffffc00 */
+                           "size2=9223372036854775295", /* 7ffffffffffffdff */
+                           false, &error_abort);
+    g_assert_cmpuint(opts_count(opts), ==, 2);
+    g_assert_cmphex(qemu_opt_get_size(opts, "size1", 1),
+                     ==, 0x7ffffffffffffc00);
+    g_assert_cmphex(qemu_opt_get_size(opts, "size2", 1),
+                     ==, 0x7ffffffffffffc00);
+
+    /* Close to actual upper limit 0xfffffffffffff800 (53 msbs set) */
+    opts = qemu_opts_parse(&opts_list_02,
+                           "size1=18446744073709549568," /* fffffffffffff800 */
+                           "size2=18446744073709550591", /* fffffffffffffbff */
+                           false, &error_abort);
+    g_assert_cmpuint(opts_count(opts), ==, 2);
+    g_assert_cmphex(qemu_opt_get_size(opts, "size1", 1),
+                     ==, 0xfffffffffffff800);
+    g_assert_cmphex(qemu_opt_get_size(opts, "size2", 1),
+                     ==, 0xfffffffffffff800);
+
+    /* Beyond limits */
+    opts = qemu_opts_parse(&opts_list_02, "size1=-1", false, &err);
+    error_free_or_abort(&err);
+    g_assert(!opts);
+    opts = qemu_opts_parse(&opts_list_02,
+                           "size1=18446744073709550592", /* fffffffffffffc00 */
+                           false, &err);
+    error_free_or_abort(&err);
+    g_assert(!opts);
+
+    /* Suffixes */
+    opts = qemu_opts_parse(&opts_list_02, "size1=8b,size2=1.5k,size3=2M",
+                           false, &error_abort);
+    g_assert_cmpuint(opts_count(opts), ==, 3);
+    g_assert_cmphex(qemu_opt_get_size(opts, "size1", 0), ==, 8);
+    g_assert_cmphex(qemu_opt_get_size(opts, "size2", 0), ==, 1536);
+    g_assert_cmphex(qemu_opt_get_size(opts, "size3", 0), ==, 2 * M_BYTE);
+    opts = qemu_opts_parse(&opts_list_02, "size1=0.1G,size2=16777215T",
+                           false, &error_abort);
+    g_assert_cmpuint(opts_count(opts), ==, 2);
+    g_assert_cmphex(qemu_opt_get_size(opts, "size1", 0), ==, G_BYTE / 10);
+    g_assert_cmphex(qemu_opt_get_size(opts, "size2", 0),
+                     ==, 16777215 * T_BYTE);
+
+    /* Beyond limit with suffix */
+    opts = qemu_opts_parse(&opts_list_02, "size1=16777216T",
+                           false, &err);
+    error_free_or_abort(&err);
+    g_assert(!opts);
+
+    /* Trailing crap */
+    opts = qemu_opts_parse(&opts_list_02, "size1=16E", false, &err);
+    error_free_or_abort(&err);
+    g_assert(!opts);
+    opts = qemu_opts_parse(&opts_list_02, "size1=16Gi", false, &err);
+    error_free_or_abort(&err);
+    g_assert(!opts);
+
+    qemu_opts_reset(&opts_list_02);
+}
+
 int main(int argc, char *argv[])
 {
     register_opts();
@@ -435,6 +751,10 @@ int main(int argc, char *argv[])
     g_test_add_func("/qemu-opts/opt_unset", test_qemu_opt_unset);
     g_test_add_func("/qemu-opts/opts_reset", test_qemu_opts_reset);
     g_test_add_func("/qemu-opts/opts_set", test_qemu_opts_set);
+    g_test_add_func("/qemu-opts/opts_parse/general", test_opts_parse);
+    g_test_add_func("/qemu-opts/opts_parse/bool", test_opts_parse_bool);
+    g_test_add_func("/qemu-opts/opts_parse/number", test_opts_parse_number);
+    g_test_add_func("/qemu-opts/opts_parse/size", test_opts_parse_size);
     g_test_run();
     return 0;
 }
diff --git a/tests/test-qmp-event.c b/tests/test-qmp-event.c
index 633dc87402..7bb621b027 100644
--- a/tests/test-qmp-event.c
+++ b/tests/test-qmp-event.c
@@ -95,24 +95,18 @@ static bool qdict_cmp_simple(QDict *a, QDict *b)
    correctness. */
 static void event_test_emit(test_QAPIEvent event, QDict *d, Error **errp)
 {
-    QObject *obj;
     QDict *t;
     int64_t s, ms;
 
     /* Verify that we have timestamp, then remove it to compare other fields */
-    obj = qdict_get(d, "timestamp");
-    g_assert(obj);
-    t = qobject_to_qdict(obj);
+    t = qdict_get_qdict(d, "timestamp");
     g_assert(t);
-    obj = qdict_get(t, "seconds");
-    g_assert(obj && qobject_type(obj) == QTYPE_QINT);
-    s = qint_get_int(qobject_to_qint(obj));
-    obj = qdict_get(t, "microseconds");
-    g_assert(obj && qobject_type(obj) == QTYPE_QINT);
-    ms = qint_get_int(qobject_to_qint(obj));
+    s = qdict_get_try_int(t, "seconds", -2);
+    ms = qdict_get_try_int(t, "microseconds", -2);
     if (s == -1) {
         g_assert(ms == -1);
     } else {
+        g_assert(s >= 0);
         g_assert(ms >= 0 && ms <= 999999);
     }
     g_assert(qdict_size(t) == 2);
diff --git a/tests/test-qobject-output-visitor.c b/tests/test-qobject-output-visitor.c
index 4e2d79c5d1..500b452d98 100644
--- a/tests/test-qobject-output-visitor.c
+++ b/tests/test-qobject-output-visitor.c
@@ -58,81 +58,80 @@ static void test_visitor_out_int(TestOutputVisitorData *data,
                                  const void *unused)
 {
     int64_t value = -42;
-    QObject *obj;
+    QInt *qint;
 
     visit_type_int(data->ov, NULL, &value, &error_abort);
 
-    obj = visitor_get(data);
-    g_assert(qobject_type(obj) == QTYPE_QINT);
-    g_assert_cmpint(qint_get_int(qobject_to_qint(obj)), ==, value);
+    qint = qobject_to_qint(visitor_get(data));
+    g_assert(qint);
+    g_assert_cmpint(qint_get_int(qint), ==, value);
 }
 
 static void test_visitor_out_bool(TestOutputVisitorData *data,
                                   const void *unused)
 {
     bool value = true;
-    QObject *obj;
+    QBool *qbool;
 
     visit_type_bool(data->ov, NULL, &value, &error_abort);
 
-    obj = visitor_get(data);
-    g_assert(qobject_type(obj) == QTYPE_QBOOL);
-    g_assert(qbool_get_bool(qobject_to_qbool(obj)) == value);
+    qbool = qobject_to_qbool(visitor_get(data));
+    g_assert(qbool);
+    g_assert(qbool_get_bool(qbool) == value);
 }
 
 static void test_visitor_out_number(TestOutputVisitorData *data,
                                     const void *unused)
 {
     double value = 3.14;
-    QObject *obj;
+    QFloat *qfloat;
 
     visit_type_number(data->ov, NULL, &value, &error_abort);
 
-    obj = visitor_get(data);
-    g_assert(qobject_type(obj) == QTYPE_QFLOAT);
-    g_assert(qfloat_get_double(qobject_to_qfloat(obj)) == value);
+    qfloat = qobject_to_qfloat(visitor_get(data));
+    g_assert(qfloat);
+    g_assert(qfloat_get_double(qfloat) == value);
 }
 
 static void test_visitor_out_string(TestOutputVisitorData *data,
                                     const void *unused)
 {
     char *string = (char *) "Q E M U";
-    QObject *obj;
+    QString *qstr;
 
     visit_type_str(data->ov, NULL, &string, &error_abort);
 
-    obj = visitor_get(data);
-    g_assert(qobject_type(obj) == QTYPE_QSTRING);
-    g_assert_cmpstr(qstring_get_str(qobject_to_qstring(obj)), ==, string);
+    qstr = qobject_to_qstring(visitor_get(data));
+    g_assert(qstr);
+    g_assert_cmpstr(qstring_get_str(qstr), ==, string);
 }
 
 static void test_visitor_out_no_string(TestOutputVisitorData *data,
                                        const void *unused)
 {
     char *string = NULL;
-    QObject *obj;
+    QString *qstr;
 
     /* A null string should return "" */
     visit_type_str(data->ov, NULL, &string, &error_abort);
 
-    obj = visitor_get(data);
-    g_assert(qobject_type(obj) == QTYPE_QSTRING);
-    g_assert_cmpstr(qstring_get_str(qobject_to_qstring(obj)), ==, "");
+    qstr = qobject_to_qstring(visitor_get(data));
+    g_assert(qstr);
+    g_assert_cmpstr(qstring_get_str(qstr), ==, "");
 }
 
 static void test_visitor_out_enum(TestOutputVisitorData *data,
                                   const void *unused)
 {
-    QObject *obj;
     EnumOne i;
+    QString *qstr;
 
     for (i = 0; i < ENUM_ONE__MAX; i++) {
         visit_type_EnumOne(data->ov, "unused", &i, &error_abort);
 
-        obj = visitor_get(data);
-        g_assert(qobject_type(obj) == QTYPE_QSTRING);
-        g_assert_cmpstr(qstring_get_str(qobject_to_qstring(obj)), ==,
-                        EnumOne_lookup[i]);
+        qstr = qobject_to_qstring(visitor_get(data));
+        g_assert(qstr);
+        g_assert_cmpstr(qstring_get_str(qstr), ==, EnumOne_lookup[i]);
         visitor_reset(data);
     }
 }
@@ -160,15 +159,12 @@ static void test_visitor_out_struct(TestOutputVisitorData *data,
                                .boolean = false,
                                .string = (char *) "foo"};
     TestStruct *p = &test_struct;
-    QObject *obj;
     QDict *qdict;
 
     visit_type_TestStruct(data->ov, NULL, &p, &error_abort);
 
-    obj = visitor_get(data);
-    g_assert(qobject_type(obj) == QTYPE_QDICT);
-
-    qdict = qobject_to_qdict(obj);
+    qdict = qobject_to_qdict(visitor_get(data));
+    g_assert(qdict);
     g_assert_cmpint(qdict_size(qdict), ==, 3);
     g_assert_cmpint(qdict_get_int(qdict, "integer"), ==, 42);
     g_assert_cmpint(qdict_get_bool(qdict, "boolean"), ==, false);
@@ -180,7 +176,6 @@ static void test_visitor_out_struct_nested(TestOutputVisitorData *data,
 {
     int64_t value = 42;
     UserDefTwo *ud2;
-    QObject *obj;
     QDict *qdict, *dict1, *dict2, *dict3, *userdef;
     const char *string = "user def string";
     const char *strings[] = { "forty two", "forty three", "forty four",
@@ -207,10 +202,8 @@ static void test_visitor_out_struct_nested(TestOutputVisitorData *data,
 
     visit_type_UserDefTwo(data->ov, "unused", &ud2, &error_abort);
 
-    obj = visitor_get(data);
-    g_assert(qobject_type(obj) == QTYPE_QDICT);
-
-    qdict = qobject_to_qdict(obj);
+    qdict = qobject_to_qdict(visitor_get(data));
+    g_assert(qdict);
     g_assert_cmpint(qdict_size(qdict), ==, 2);
     g_assert_cmpstr(qdict_get_str(qdict, "string0"), ==, strings[0]);
 
@@ -267,7 +260,6 @@ static void test_visitor_out_list(TestOutputVisitorData *data,
     bool value_bool = true;
     int value_int = 10;
     QListEntry *entry;
-    QObject *obj;
     QList *qlist;
     int i;
 
@@ -285,10 +277,8 @@ static void test_visitor_out_list(TestOutputVisitorData *data,
 
     visit_type_TestStructList(data->ov, NULL, &head, &error_abort);
 
-    obj = visitor_get(data);
-    g_assert(qobject_type(obj) == QTYPE_QLIST);
-
-    qlist = qobject_to_qlist(obj);
+    qlist = qobject_to_qlist(visitor_get(data));
+    g_assert(qlist);
     g_assert(!qlist_empty(qlist));
 
     /* ...and ensure that the visitor sees it in order */
@@ -296,8 +286,8 @@ static void test_visitor_out_list(TestOutputVisitorData *data,
     QLIST_FOREACH_ENTRY(qlist, entry) {
         QDict *qdict;
 
-        g_assert(qobject_type(entry->value) == QTYPE_QDICT);
         qdict = qobject_to_qdict(entry->value);
+        g_assert(qdict);
         g_assert_cmpint(qdict_size(qdict), ==, 3);
         g_assert_cmpint(qdict_get_int(qdict, "integer"), ==, value_int + i);
         g_assert_cmpint(qdict_get_bool(qdict, "boolean"), ==, value_bool);
@@ -345,13 +335,12 @@ static void test_visitor_out_any(TestOutputVisitorData *data,
     QBool *qbool;
     QString *qstring;
     QDict *qdict;
-    QObject *obj;
 
     qobj = QOBJECT(qint_from_int(-42));
     visit_type_any(data->ov, NULL, &qobj, &error_abort);
-    obj = visitor_get(data);
-    g_assert(qobject_type(obj) == QTYPE_QINT);
-    g_assert_cmpint(qint_get_int(qobject_to_qint(obj)), ==, -42);
+    qint = qobject_to_qint(visitor_get(data));
+    g_assert(qint);
+    g_assert_cmpint(qint_get_int(qint), ==, -42);
     qobject_decref(qobj);
 
     visitor_reset(data);
@@ -362,22 +351,15 @@ static void test_visitor_out_any(TestOutputVisitorData *data,
     qobj = QOBJECT(qdict);
     visit_type_any(data->ov, NULL, &qobj, &error_abort);
     qobject_decref(qobj);
-    obj = visitor_get(data);
-    qdict = qobject_to_qdict(obj);
+    qdict = qobject_to_qdict(visitor_get(data));
     g_assert(qdict);
-    qobj = qdict_get(qdict, "integer");
-    g_assert(qobj);
-    qint = qobject_to_qint(qobj);
+    qint = qobject_to_qint(qdict_get(qdict, "integer"));
     g_assert(qint);
     g_assert_cmpint(qint_get_int(qint), ==, -42);
-    qobj = qdict_get(qdict, "boolean");
-    g_assert(qobj);
-    qbool = qobject_to_qbool(qobj);
+    qbool = qobject_to_qbool(qdict_get(qdict, "boolean"));
     g_assert(qbool);
     g_assert(qbool_get_bool(qbool) == true);
-    qobj = qdict_get(qdict, "string");
-    g_assert(qobj);
-    qstring = qobject_to_qstring(qobj);
+    qstring = qobject_to_qstring(qdict_get(qdict, "string"));
     g_assert(qstring);
     g_assert_cmpstr(qstring_get_str(qstring), ==, "foo");
 }
@@ -385,7 +367,6 @@ static void test_visitor_out_any(TestOutputVisitorData *data,
 static void test_visitor_out_union_flat(TestOutputVisitorData *data,
                                         const void *unused)
 {
-    QObject *arg;
     QDict *qdict;
 
     UserDefFlatUnion *tmp = g_malloc0(sizeof(UserDefFlatUnion));
@@ -395,11 +376,8 @@ static void test_visitor_out_union_flat(TestOutputVisitorData *data,
     tmp->u.value1.boolean = true;
 
     visit_type_UserDefFlatUnion(data->ov, NULL, &tmp, &error_abort);
-    arg = visitor_get(data);
-
-    g_assert(qobject_type(arg) == QTYPE_QDICT);
-    qdict = qobject_to_qdict(arg);
-
+    qdict = qobject_to_qdict(visitor_get(data));
+    g_assert(qdict);
     g_assert_cmpstr(qdict_get_str(qdict, "enum1"), ==, "value1");
     g_assert_cmpstr(qdict_get_str(qdict, "string"), ==, "str");
     g_assert_cmpint(qdict_get_int(qdict, "integer"), ==, 41);
@@ -411,8 +389,9 @@ static void test_visitor_out_union_flat(TestOutputVisitorData *data,
 static void test_visitor_out_alternate(TestOutputVisitorData *data,
                                        const void *unused)
 {
-    QObject *arg;
     UserDefAlternate *tmp;
+    QInt *qint;
+    QString *qstr;
     QDict *qdict;
 
     tmp = g_new0(UserDefAlternate, 1);
@@ -420,10 +399,9 @@ static void test_visitor_out_alternate(TestOutputVisitorData *data,
     tmp->u.i = 42;
 
     visit_type_UserDefAlternate(data->ov, NULL, &tmp, &error_abort);
-    arg = visitor_get(data);
-
-    g_assert(qobject_type(arg) == QTYPE_QINT);
-    g_assert_cmpint(qint_get_int(qobject_to_qint(arg)), ==, 42);
+    qint = qobject_to_qint(visitor_get(data));
+    g_assert(qint);
+    g_assert_cmpint(qint_get_int(qint), ==, 42);
 
     qapi_free_UserDefAlternate(tmp);
 
@@ -433,10 +411,9 @@ static void test_visitor_out_alternate(TestOutputVisitorData *data,
     tmp->u.s = g_strdup("hello");
 
     visit_type_UserDefAlternate(data->ov, NULL, &tmp, &error_abort);
-    arg = visitor_get(data);
-
-    g_assert(qobject_type(arg) == QTYPE_QSTRING);
-    g_assert_cmpstr(qstring_get_str(qobject_to_qstring(arg)), ==, "hello");
+    qstr = qobject_to_qstring(visitor_get(data));
+    g_assert(qstr);
+    g_assert_cmpstr(qstring_get_str(qstr), ==, "hello");
 
     qapi_free_UserDefAlternate(tmp);
 
@@ -449,10 +426,8 @@ static void test_visitor_out_alternate(TestOutputVisitorData *data,
     tmp->u.udfu.u.value1.boolean = true;
 
     visit_type_UserDefAlternate(data->ov, NULL, &tmp, &error_abort);
-    arg = visitor_get(data);
-
-    g_assert_cmpint(qobject_type(arg), ==, QTYPE_QDICT);
-    qdict = qobject_to_qdict(arg);
+    qdict = qobject_to_qdict(visitor_get(data));
+    g_assert(qdict);
     g_assert_cmpint(qdict_size(qdict), ==, 4);
     g_assert_cmpint(qdict_get_int(qdict, "integer"), ==, 1);
     g_assert_cmpstr(qdict_get_str(qdict, "string"), ==, "str");
@@ -465,7 +440,6 @@ static void test_visitor_out_alternate(TestOutputVisitorData *data,
 static void test_visitor_out_null(TestOutputVisitorData *data,
                                   const void *unused)
 {
-    QObject *arg;
     QDict *qdict;
     QObject *nil;
 
@@ -473,9 +447,8 @@ static void test_visitor_out_null(TestOutputVisitorData *data,
     visit_type_null(data->ov, "a", &error_abort);
     visit_check_struct(data->ov, &error_abort);
     visit_end_struct(data->ov, NULL);
-    arg = visitor_get(data);
-    g_assert(qobject_type(arg) == QTYPE_QDICT);
-    qdict = qobject_to_qdict(arg);
+    qdict = qobject_to_qdict(visitor_get(data));
+    g_assert(qdict);
     g_assert_cmpint(qdict_size(qdict), ==, 1);
     nil = qdict_get(qdict, "a");
     g_assert(nil);
@@ -618,8 +591,6 @@ static void check_native_list(QObject *qobj,
     QList *qlist;
     int i;
 
-    g_assert(qobj);
-    g_assert(qobject_type(qobj) == QTYPE_QDICT);
     qdict = qobject_to_qdict(qobj);
     g_assert(qdict);
     g_assert(qdict_haskey(qdict, "data"));
diff --git a/tests/test-throttle.c b/tests/test-throttle.c
index 363b59a38f..bd7c501b2e 100644
--- a/tests/test-throttle.c
+++ b/tests/test-throttle.c
@@ -593,9 +593,10 @@ static void test_groups(void)
     BlockBackend *blk1, *blk2, *blk3;
     BlockBackendPublic *blkp1, *blkp2, *blkp3;
 
-    blk1 = blk_new();
-    blk2 = blk_new();
-    blk3 = blk_new();
+    /* No actual I/O is performed on these devices */
+    blk1 = blk_new(0, BLK_PERM_ALL);
+    blk2 = blk_new(0, BLK_PERM_ALL);
+    blk3 = blk_new(0, BLK_PERM_ALL);
 
     blkp1 = blk_get_public(blk1);
     blkp2 = blk_get_public(blk2);
diff --git a/tests/test-vmstate.c b/tests/test-vmstate.c
index 39f338a4c4..f694a89782 100644
--- a/tests/test-vmstate.c
+++ b/tests/test-vmstate.c
@@ -476,6 +476,8 @@ const VMStateDescription vmsd_tst = {
     }
 };
 
+/* test array migration */
+
 #define AR_SIZE 4
 
 typedef struct {
@@ -492,20 +494,22 @@ const VMStateDescription vmsd_arps = {
         VMSTATE_END_OF_LIST()
     }
 };
+
+static uint8_t wire_arr_ptr_no0[] = {
+    0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x01,
+    0x00, 0x00, 0x00, 0x02,
+    0x00, 0x00, 0x00, 0x03,
+    QEMU_VM_EOF
+};
+
 static void test_arr_ptr_str_no0_save(void)
 {
     TestStructTriv ar[AR_SIZE] = {{.i = 0}, {.i = 1}, {.i = 2}, {.i = 3} };
     TestArrayOfPtrToStuct sample = {.ar = {&ar[0], &ar[1], &ar[2], &ar[3]} };
-    uint8_t wire_sample[] = {
-        0x00, 0x00, 0x00, 0x00,
-        0x00, 0x00, 0x00, 0x01,
-        0x00, 0x00, 0x00, 0x02,
-        0x00, 0x00, 0x00, 0x03,
-        QEMU_VM_EOF
-    };
 
     save_vmstate(&vmsd_arps, &sample);
-    compare_vmstate(wire_sample, sizeof(wire_sample));
+    compare_vmstate(wire_arr_ptr_no0, sizeof(wire_arr_ptr_no0));
 }
 
 static void test_arr_ptr_str_no0_load(void)
@@ -514,21 +518,98 @@ static void test_arr_ptr_str_no0_load(void)
     TestStructTriv ar[AR_SIZE] = {};
     TestArrayOfPtrToStuct obj = {.ar = {&ar[0], &ar[1], &ar[2], &ar[3]} };
     int idx;
-    uint8_t wire_sample[] = {
-        0x00, 0x00, 0x00, 0x00,
-        0x00, 0x00, 0x00, 0x01,
-        0x00, 0x00, 0x00, 0x02,
-        0x00, 0x00, 0x00, 0x03,
-        QEMU_VM_EOF
-    };
 
-    save_buffer(wire_sample, sizeof(wire_sample));
+    save_buffer(wire_arr_ptr_no0, sizeof(wire_arr_ptr_no0));
+    SUCCESS(load_vmstate_one(&vmsd_arps, &obj, 1,
+                          wire_arr_ptr_no0, sizeof(wire_arr_ptr_no0)));
+    for (idx = 0; idx < AR_SIZE; ++idx) {
+        /* compare the target array ar with the ground truth array ar_gt */
+        g_assert_cmpint(ar_gt[idx].i, ==, ar[idx].i);
+    }
+}
+
+static uint8_t wire_arr_ptr_0[] = {
+    0x00, 0x00, 0x00, 0x00,
+    VMS_NULLPTR_MARKER,
+    0x00, 0x00, 0x00, 0x02,
+    0x00, 0x00, 0x00, 0x03,
+    QEMU_VM_EOF
+};
+
+static void test_arr_ptr_str_0_save(void)
+{
+    TestStructTriv ar[AR_SIZE] = {{.i = 0}, {.i = 1}, {.i = 2}, {.i = 3} };
+    TestArrayOfPtrToStuct sample = {.ar = {&ar[0], NULL, &ar[2], &ar[3]} };
+
+    save_vmstate(&vmsd_arps, &sample);
+    compare_vmstate(wire_arr_ptr_0, sizeof(wire_arr_ptr_0));
+}
+
+static void test_arr_ptr_str_0_load(void)
+{
+    TestStructTriv ar_gt[AR_SIZE] = {{.i = 0}, {.i = 0}, {.i = 2}, {.i = 3} };
+    TestStructTriv ar[AR_SIZE] = {};
+    TestArrayOfPtrToStuct obj = {.ar = {&ar[0], NULL, &ar[2], &ar[3]} };
+    int idx;
+
+    save_buffer(wire_arr_ptr_0, sizeof(wire_arr_ptr_0));
     SUCCESS(load_vmstate_one(&vmsd_arps, &obj, 1,
-                          wire_sample, sizeof(wire_sample)));
+                          wire_arr_ptr_0, sizeof(wire_arr_ptr_0)));
     for (idx = 0; idx < AR_SIZE; ++idx) {
         /* compare the target array ar with the ground truth array ar_gt */
         g_assert_cmpint(ar_gt[idx].i, ==, ar[idx].i);
     }
+    for (idx = 0; idx < AR_SIZE; ++idx) {
+        if (idx == 1) {
+            g_assert_cmpint((uintptr_t)(obj.ar[idx]), ==, 0);
+        } else {
+            g_assert_cmpint((uintptr_t)(obj.ar[idx]), !=, 0);
+        }
+    }
+}
+
+typedef struct TestArrayOfPtrToInt {
+    int32_t *ar[AR_SIZE];
+} TestArrayOfPtrToInt;
+
+const VMStateDescription vmsd_arpp = {
+    .name = "test/arps",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_ARRAY_OF_POINTER(ar, TestArrayOfPtrToInt,
+                AR_SIZE, 0, vmstate_info_int32, int32_t*),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void test_arr_ptr_prim_0_save(void)
+{
+    int32_t ar[AR_SIZE] = {0 , 1, 2, 3};
+    TestArrayOfPtrToInt  sample = {.ar = {&ar[0], NULL, &ar[2], &ar[3]} };
+
+    save_vmstate(&vmsd_arpp, &sample);
+    compare_vmstate(wire_arr_ptr_0, sizeof(wire_arr_ptr_0));
+}
+
+static void test_arr_ptr_prim_0_load(void)
+{
+    int32_t ar_gt[AR_SIZE] = {0, 1, 2, 3};
+    int32_t ar[AR_SIZE] = {3 , 42, 1, 0};
+    TestArrayOfPtrToInt obj = {.ar = {&ar[0], NULL, &ar[2], &ar[3]} };
+    int idx;
+
+    save_buffer(wire_arr_ptr_0, sizeof(wire_arr_ptr_0));
+    SUCCESS(load_vmstate_one(&vmsd_arpp, &obj, 1,
+                          wire_arr_ptr_0, sizeof(wire_arr_ptr_0)));
+    for (idx = 0; idx < AR_SIZE; ++idx) {
+        /* compare the target array ar with the ground truth array ar_gt */
+        if (idx == 1) {
+            g_assert_cmpint(42, ==, ar[idx]);
+        } else {
+            g_assert_cmpint(ar_gt[idx], ==, ar[idx]);
+        }
+    }
 }
 
 /* test QTAILQ migration */
@@ -781,6 +862,13 @@ int main(int argc, char **argv)
                     test_arr_ptr_str_no0_save);
     g_test_add_func("/vmstate/array/ptr/str/no0/load",
                     test_arr_ptr_str_no0_load);
+    g_test_add_func("/vmstate/array/ptr/str/0/save", test_arr_ptr_str_0_save);
+    g_test_add_func("/vmstate/array/ptr/str/0/load",
+                    test_arr_ptr_str_0_load);
+    g_test_add_func("/vmstate/array/ptr/prim/0/save",
+                    test_arr_ptr_prim_0_save);
+    g_test_add_func("/vmstate/array/ptr/prim/0/load",
+                    test_arr_ptr_prim_0_load);
     g_test_add_func("/vmstate/qtailq/save/saveq", test_save_q);
     g_test_add_func("/vmstate/qtailq/load/loadq", test_load_q);
     g_test_add_func("/vmstate/tmp_struct", test_tmp_struct);
diff --git a/tests/usb-hcd-ehci-test.c b/tests/usb-hcd-ehci-test.c
index 57af8a034e..944eb1c088 100644
--- a/tests/usb-hcd-ehci-test.c
+++ b/tests/usb-hcd-ehci-test.c
@@ -50,11 +50,8 @@ static void ehci_port_test(struct qhc *hc, int port, uint32_t expect)
 
 /* tests */
 
-static void pci_init(void)
+static void test_init(void)
 {
-    if (pcibus) {
-        return;
-    }
     pcibus = qpci_init_pc(NULL);
     g_assert(pcibus != NULL);
 
@@ -64,6 +61,15 @@ static void pci_init(void)
     qusb_pci_init_one(pcibus, &ehci1, QPCI_DEVFN(0x1d, 7), 0);
 }
 
+static void test_deinit(void)
+{
+    uhci_deinit(&uhci1);
+    uhci_deinit(&uhci2);
+    uhci_deinit(&uhci3);
+    uhci_deinit(&ehci1);
+    qpci_free_pc(pcibus);
+}
+
 static void pci_uhci_port_1(void)
 {
     g_assert(pcibus != NULL);
@@ -142,7 +148,7 @@ int main(int argc, char **argv)
     int ret;
 
     g_test_init(&argc, &argv, NULL);
-    qtest_add_func("/ehci/pci/init", pci_init);
+
     qtest_add_func("/ehci/pci/uhci-port-1", pci_uhci_port_1);
     qtest_add_func("/ehci/pci/ehci-port-1", pci_ehci_port_1);
     qtest_add_func("/ehci/pci/ehci-config", pci_ehci_config);
@@ -161,7 +167,10 @@ int main(int argc, char **argv)
                 "-drive if=none,id=usbcdrom,media=cdrom "
                 "-device usb-tablet,bus=ich9-ehci-1.0,port=1,usb_version=1 "
                 "-device usb-storage,bus=ich9-ehci-1.0,port=2,drive=usbcdrom ");
+
+    test_init();
     ret = g_test_run();
+    test_deinit();
 
     qtest_end();
 
diff --git a/tests/usb-hcd-uhci-test.c b/tests/usb-hcd-uhci-test.c
index e956b9ccb7..f25bae5e6c 100644
--- a/tests/usb-hcd-uhci-test.c
+++ b/tests/usb-hcd-uhci-test.c
@@ -28,6 +28,7 @@ static void test_port(int port)
     g_assert(port > 0);
     qusb_pci_init_one(qs->pcibus, &uhci, QPCI_DEVFN(0x1d, 0), 4);
     uhci_port_test(&uhci, port - 1, UHCI_PORT_CCS);
+    uhci_deinit(&uhci);
 }
 
 static void test_port_1(void)
diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c
index 2c45c7b29f..a61896c32d 100644
--- a/tests/vhost-user-test.c
+++ b/tests/vhost-user-test.c
@@ -139,6 +139,7 @@ enum {
 };
 
 typedef struct TestServer {
+    QPCIBus *bus;
     gchar *socket_path;
     gchar *mig_path;
     gchar *chr_name;
@@ -160,14 +161,13 @@ static const char *root;
 
 static void init_virtio_dev(TestServer *s)
 {
-    QPCIBus *bus;
     QVirtioPCIDevice *dev;
     uint32_t features;
 
-    bus = qpci_init_pc(NULL);
-    g_assert_nonnull(bus);
+    s->bus = qpci_init_pc(NULL);
+    g_assert_nonnull(s->bus);
 
-    dev = qvirtio_pci_device_find(bus, VIRTIO_ID_NET);
+    dev = qvirtio_pci_device_find(s->bus, VIRTIO_ID_NET);
     g_assert_nonnull(dev);
 
     qvirtio_pci_device_enable(dev);
@@ -180,6 +180,7 @@ static void init_virtio_dev(TestServer *s)
     qvirtio_set_features(&dev->vdev, features);
 
     qvirtio_set_driver_ok(&dev->vdev);
+    qvirtio_pci_device_free(dev);
 }
 
 static void wait_for_fds(TestServer *s)
@@ -507,6 +508,8 @@ static gboolean _test_server_free(TestServer *server)
     g_free(server->mig_path);
 
     g_free(server->chr_name);
+    qpci_free_pc(server->bus);
+
     g_free(server);
 
     return FALSE;
diff --git a/tests/virtio-9p-test.c b/tests/virtio-9p-test.c
index 9556291567..43a1ad813f 100644
--- a/tests/virtio-9p-test.c
+++ b/tests/virtio-9p-test.c
@@ -80,7 +80,7 @@ static void qvirtio_9p_pci_stop(QVirtIO9P *v9p)
 {
     qvirtqueue_cleanup(v9p->dev->bus, v9p->vq, v9p->qs->alloc);
     qvirtio_pci_device_disable(container_of(v9p->dev, QVirtioPCIDevice, vdev));
-    g_free(v9p->dev);
+    qvirtio_pci_device_free((QVirtioPCIDevice *)v9p->dev);
     qvirtio_9p_stop(v9p);
 }
 
diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
index 0e32e416dd..1eee95df49 100644
--- a/tests/virtio-blk-test.c
+++ b/tests/virtio-blk-test.c
@@ -108,7 +108,7 @@ static QVirtioPCIDevice *virtio_blk_pci_init(QPCIBus *bus, int slot)
 {
     QVirtioPCIDevice *dev;
 
-    dev = qvirtio_pci_device_find(bus, VIRTIO_ID_BLOCK);
+    dev = qvirtio_pci_device_find_slot(bus, VIRTIO_ID_BLOCK, slot);
     g_assert(dev != NULL);
     g_assert_cmphex(dev->vdev.device_type, ==, VIRTIO_ID_BLOCK);
     g_assert_cmphex(dev->pdev->devfn, ==, ((slot << 3) | PCI_FN));
@@ -296,7 +296,7 @@ static void pci_basic(void)
     /* End test */
     qvirtqueue_cleanup(dev->vdev.bus, &vqpci->vq, qs->alloc);
     qvirtio_pci_device_disable(dev);
-    g_free(dev);
+    qvirtio_pci_device_free(dev);
     qtest_shutdown(qs);
 }
 
@@ -389,7 +389,7 @@ static void pci_indirect(void)
     /* End test */
     qvirtqueue_cleanup(dev->vdev.bus, &vqpci->vq, qs->alloc);
     qvirtio_pci_device_disable(dev);
-    g_free(dev);
+    qvirtio_pci_device_free(dev);
     qtest_shutdown(qs);
 }
 
@@ -409,15 +409,16 @@ static void pci_config(void)
 
     qvirtio_set_driver_ok(&dev->vdev);
 
-    qmp("{ 'execute': 'block_resize', 'arguments': { 'device': 'drive0', "
-                                                    " 'size': %d } }", n_size);
+    qmp_discard_response("{ 'execute': 'block_resize', "
+                         " 'arguments': { 'device': 'drive0', "
+                         " 'size': %d } }", n_size);
     qvirtio_wait_config_isr(&dev->vdev, QVIRTIO_BLK_TIMEOUT_US);
 
     capacity = qvirtio_config_readq(&dev->vdev, 0);
     g_assert_cmpint(capacity, ==, n_size / 512);
 
     qvirtio_pci_device_disable(dev);
-    g_free(dev);
+    qvirtio_pci_device_free(dev);
 
     qtest_shutdown(qs);
 }
@@ -458,8 +459,9 @@ static void pci_msix(void)
 
     qvirtio_set_driver_ok(&dev->vdev);
 
-    qmp("{ 'execute': 'block_resize', 'arguments': { 'device': 'drive0', "
-                                                    " 'size': %d } }", n_size);
+    qmp_discard_response("{ 'execute': 'block_resize', "
+                         " 'arguments': { 'device': 'drive0', "
+                         " 'size': %d } }", n_size);
 
     qvirtio_wait_config_isr(&dev->vdev, QVIRTIO_BLK_TIMEOUT_US);
 
@@ -524,7 +526,7 @@ static void pci_msix(void)
     qvirtqueue_cleanup(dev->vdev.bus, &vqpci->vq, qs->alloc);
     qpci_msix_disable(dev->pdev);
     qvirtio_pci_device_disable(dev);
-    g_free(dev);
+    qvirtio_pci_device_free(dev);
     qtest_shutdown(qs);
 }
 
@@ -640,7 +642,7 @@ static void pci_idx(void)
     qvirtqueue_cleanup(dev->vdev.bus, &vqpci->vq, qs->alloc);
     qpci_msix_disable(dev->pdev);
     qvirtio_pci_device_disable(dev);
-    g_free(dev);
+    qvirtio_pci_device_free(dev);
     qtest_shutdown(qs);
 }
 
@@ -659,7 +661,7 @@ static void pci_hotplug(void)
     dev = virtio_blk_pci_init(qs->pcibus, PCI_SLOT_HP);
     g_assert(dev);
     qvirtio_pci_device_disable(dev);
-    g_free(dev);
+    qvirtio_pci_device_free(dev);
 
     /* unplug secondary disk */
     if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
@@ -691,8 +693,9 @@ static void mmio_basic(void)
 
     test_basic(&dev->vdev, alloc, vq);
 
-    qmp("{ 'execute': 'block_resize', 'arguments': { 'device': 'drive0', "
-                                                    " 'size': %d } }", n_size);
+    qmp_discard_response("{ 'execute': 'block_resize', "
+                         " 'arguments': { 'device': 'drive0', "
+                         " 'size': %d } }", n_size);
 
     qvirtio_wait_queue_isr(&dev->vdev, vq, QVIRTIO_BLK_TIMEOUT_US);
 
diff --git a/tests/virtio-scsi-test.c b/tests/virtio-scsi-test.c
index 69220ef07b..0eabd56fd9 100644
--- a/tests/virtio-scsi-test.c
+++ b/tests/virtio-scsi-test.c
@@ -63,7 +63,7 @@ static void qvirtio_scsi_pci_free(QVirtIOSCSI *vs)
         qvirtqueue_cleanup(vs->dev->bus, vs->vq[i], vs->qs->alloc);
     }
     qvirtio_pci_device_disable(container_of(vs->dev, QVirtioPCIDevice, vdev));
-    g_free(vs->dev);
+    qvirtio_pci_device_free((QVirtioPCIDevice *)vs->dev);
     qvirtio_scsi_stop(vs->qs);
     g_free(vs);
 }
diff --git a/translate-all.c b/translate-all.c
index 1a21e3fb1f..d42d003e67 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -55,11 +55,11 @@
 #include "translate-all.h"
 #include "qemu/bitmap.h"
 #include "qemu/timer.h"
+#include "qemu/main-loop.h"
 #include "exec/log.h"
 
 /* #define DEBUG_TB_INVALIDATE */
 /* #define DEBUG_TB_FLUSH */
-/* #define DEBUG_LOCKING */
 /* make various TB consistency checks */
 /* #define DEBUG_TB_CHECK */
 
@@ -74,20 +74,10 @@
  * access to the memory related structures are protected with the
  * mmap_lock.
  */
-#ifdef DEBUG_LOCKING
-#define DEBUG_MEM_LOCKS 1
-#else
-#define DEBUG_MEM_LOCKS 0
-#endif
-
 #ifdef CONFIG_SOFTMMU
-#define assert_memory_lock() do { /* nothing */ } while (0)
+#define assert_memory_lock() tcg_debug_assert(have_tb_lock)
 #else
-#define assert_memory_lock() do {               \
-        if (DEBUG_MEM_LOCKS) {                  \
-            g_assert(have_mmap_lock());         \
-        }                                       \
-    } while (0)
+#define assert_memory_lock() tcg_debug_assert(have_mmap_lock())
 #endif
 
 #define SMC_BITMAP_USE_THRESHOLD 10
@@ -145,9 +135,7 @@ TCGContext tcg_ctx;
 bool parallel_cpus;
 
 /* translation block context */
-#ifdef CONFIG_USER_ONLY
 __thread int have_tb_lock;
-#endif
 
 static void page_table_config_init(void)
 {
@@ -169,51 +157,31 @@ static void page_table_config_init(void)
     assert(v_l2_levels >= 0);
 }
 
+#define assert_tb_locked() tcg_debug_assert(have_tb_lock)
+#define assert_tb_unlocked() tcg_debug_assert(!have_tb_lock)
+
 void tb_lock(void)
 {
-#ifdef CONFIG_USER_ONLY
-    assert(!have_tb_lock);
+    assert_tb_unlocked();
     qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock);
     have_tb_lock++;
-#endif
 }
 
 void tb_unlock(void)
 {
-#ifdef CONFIG_USER_ONLY
-    assert(have_tb_lock);
+    assert_tb_locked();
     have_tb_lock--;
     qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
-#endif
 }
 
 void tb_lock_reset(void)
 {
-#ifdef CONFIG_USER_ONLY
     if (have_tb_lock) {
         qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
         have_tb_lock = 0;
     }
-#endif
 }
 
-#ifdef DEBUG_LOCKING
-#define DEBUG_TB_LOCKS 1
-#else
-#define DEBUG_TB_LOCKS 0
-#endif
-
-#ifdef CONFIG_SOFTMMU
-#define assert_tb_lock() do { /* nothing */ } while (0)
-#else
-#define assert_tb_lock() do {               \
-        if (DEBUG_TB_LOCKS) {               \
-            g_assert(have_tb_lock);         \
-        }                                   \
-    } while (0)
-#endif
-
-
 static TranslationBlock *tb_find_pc(uintptr_t tc_ptr);
 
 void cpu_gen_init(void)
@@ -847,7 +815,7 @@ static TranslationBlock *tb_alloc(target_ulong pc)
 {
     TranslationBlock *tb;
 
-    assert_tb_lock();
+    assert_tb_locked();
 
     if (tcg_ctx.tb_ctx.nb_tbs >= tcg_ctx.code_gen_max_blocks) {
         return NULL;
@@ -862,7 +830,7 @@ static TranslationBlock *tb_alloc(target_ulong pc)
 /* Called with tb_lock held.  */
 void tb_free(TranslationBlock *tb)
 {
-    assert_tb_lock();
+    assert_tb_locked();
 
     /* In practice this is mostly used for single use temporary TB
        Ignore the hard cases and just back up if this TB happens to
@@ -1104,7 +1072,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
     uint32_t h;
     tb_page_addr_t phys_pc;
 
-    assert_tb_lock();
+    assert_tb_locked();
 
     atomic_set(&tb->invalid, true);
 
@@ -1421,7 +1389,7 @@ static void tb_invalidate_phys_range_1(tb_page_addr_t start, tb_page_addr_t end)
 #ifdef CONFIG_SOFTMMU
 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end)
 {
-    assert_tb_lock();
+    assert_tb_locked();
     tb_invalidate_phys_range_1(start, end);
 }
 #else
@@ -1464,7 +1432,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
 #endif /* TARGET_HAS_PRECISE_SMC */
 
     assert_memory_lock();
-    assert_tb_lock();
+    assert_tb_locked();
 
     p = page_find(start >> TARGET_PAGE_BITS);
     if (!p) {
@@ -1543,7 +1511,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
 #ifdef CONFIG_SOFTMMU
 /* len must be <= 8 and start must be a multiple of len.
  * Called via softmmu_template.h when code areas are written to with
- * tb_lock held.
+ * iothread mutex not held.
  */
 void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
 {
@@ -1745,7 +1713,10 @@ void tb_check_watchpoint(CPUState *cpu)
 
 #ifndef CONFIG_USER_ONLY
 /* in deterministic execution mode, instructions doing device I/Os
-   must be at the end of the TB */
+ * must be at the end of the TB.
+ *
+ * Called by softmmu_template.h, with iothread mutex not held.
+ */
 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
 {
 #if defined(TARGET_MIPS) || defined(TARGET_SH4)
@@ -1957,6 +1928,7 @@ void dump_opcount_info(FILE *f, fprintf_function cpu_fprintf)
 
 void cpu_interrupt(CPUState *cpu, int mask)
 {
+    g_assert(qemu_mutex_iothread_locked());
     cpu->interrupt_request |= mask;
     cpu->icount_decr.u16.high = -1;
 }
diff --git a/translate-common.c b/translate-common.c
index 77762fd86c..40fe5a19bb 100644
--- a/translate-common.c
+++ b/translate-common.c
@@ -21,6 +21,7 @@
 #include "qemu-common.h"
 #include "qom/cpu.h"
 #include "sysemu/cpus.h"
+#include "qemu/main-loop.h"
 
 uintptr_t qemu_real_host_page_size;
 intptr_t qemu_real_host_page_mask;
@@ -30,6 +31,7 @@ intptr_t qemu_real_host_page_mask;
 static void tcg_handle_interrupt(CPUState *cpu, int mask)
 {
     int old_mask;
+    g_assert(qemu_mutex_iothread_locked());
 
     old_mask = cpu->interrupt_request;
     cpu->interrupt_request |= mask;
@@ -40,14 +42,13 @@ static void tcg_handle_interrupt(CPUState *cpu, int mask)
      */
     if (!qemu_cpu_is_self(cpu)) {
         qemu_cpu_kick(cpu);
-        return;
-    }
-
-    cpu->icount_decr.u16.high = -1;
-    if (use_icount &&
-        !cpu->can_do_io
-        && (mask & ~old_mask) != 0) {
-        cpu_abort(cpu, "Raised interrupt while not in I/O function");
+    } else {
+        cpu->icount_decr.u16.high = -1;
+        if (use_icount &&
+            !cpu->can_do_io
+            && (mask & ~old_mask) != 0) {
+            cpu_abort(cpu, "Raised interrupt while not in I/O function");
+        }
     }
 }
 
diff --git a/ui/console.c b/ui/console.c
index 49d0740b40..d1ff7504ec 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -1735,16 +1735,30 @@ QEMUGLContext dpy_gl_ctx_get_current(QemuConsole *con)
     return con->gl->ops->dpy_gl_ctx_get_current(con->gl);
 }
 
-void dpy_gl_scanout(QemuConsole *con,
-                    uint32_t backing_id, bool backing_y_0_top,
-                    uint32_t backing_width, uint32_t backing_height,
-                    uint32_t x, uint32_t y, uint32_t width, uint32_t height)
+void dpy_gl_scanout_disable(QemuConsole *con)
 {
     assert(con->gl);
-    con->gl->ops->dpy_gl_scanout(con->gl, backing_id,
-                                 backing_y_0_top,
-                                 backing_width, backing_height,
-                                 x, y, width, height);
+    if (con->gl->ops->dpy_gl_scanout_disable) {
+        con->gl->ops->dpy_gl_scanout_disable(con->gl);
+    } else {
+        con->gl->ops->dpy_gl_scanout_texture(con->gl, 0, false, 0, 0,
+                                             0, 0, 0, 0);
+    }
+}
+
+void dpy_gl_scanout_texture(QemuConsole *con,
+                            uint32_t backing_id,
+                            bool backing_y_0_top,
+                            uint32_t backing_width,
+                            uint32_t backing_height,
+                            uint32_t x, uint32_t y,
+                            uint32_t width, uint32_t height)
+{
+    assert(con->gl);
+    con->gl->ops->dpy_gl_scanout_texture(con->gl, backing_id,
+                                         backing_y_0_top,
+                                         backing_width, backing_height,
+                                         x, y, width, height);
 }
 
 void dpy_gl_update(QemuConsole *con,
diff --git a/ui/gtk-egl.c b/ui/gtk-egl.c
index 3f5d328c7b..d53288f027 100644
--- a/ui/gtk-egl.c
+++ b/ui/gtk-egl.c
@@ -170,11 +170,21 @@ QEMUGLContext gd_egl_create_context(DisplayChangeListener *dcl,
     return qemu_egl_create_context(dcl, params);
 }
 
-void gd_egl_scanout(DisplayChangeListener *dcl,
-                    uint32_t backing_id, bool backing_y_0_top,
-                    uint32_t backing_width, uint32_t backing_height,
-                    uint32_t x, uint32_t y,
-                    uint32_t w, uint32_t h)
+void gd_egl_scanout_disable(DisplayChangeListener *dcl)
+{
+    VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl);
+
+    vc->gfx.w = 0;
+    vc->gfx.h = 0;
+    vc->gfx.tex_id = 0;
+    gtk_egl_set_scanout_mode(vc, false);
+}
+
+void gd_egl_scanout_texture(DisplayChangeListener *dcl,
+                            uint32_t backing_id, bool backing_y_0_top,
+                            uint32_t backing_width, uint32_t backing_height,
+                            uint32_t x, uint32_t y,
+                            uint32_t w, uint32_t h)
 {
     VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl);
 
@@ -188,11 +198,6 @@ void gd_egl_scanout(DisplayChangeListener *dcl,
     eglMakeCurrent(qemu_egl_display, vc->gfx.esurface,
                    vc->gfx.esurface, vc->gfx.ectx);
 
-    if (vc->gfx.tex_id == 0 || vc->gfx.w == 0 || vc->gfx.h == 0) {
-        gtk_egl_set_scanout_mode(vc, false);
-        return;
-    }
-
     gtk_egl_set_scanout_mode(vc, true);
     if (!vc->gfx.fbo_id) {
         glGenFramebuffers(1, &vc->gfx.fbo_id);
diff --git a/ui/gtk-gl-area.c b/ui/gtk-gl-area.c
index 0df5a36a9f..b05c665cbb 100644
--- a/ui/gtk-gl-area.c
+++ b/ui/gtk-gl-area.c
@@ -167,11 +167,13 @@ void gd_gl_area_destroy_context(DisplayChangeListener *dcl, QEMUGLContext ctx)
     /* FIXME */
 }
 
-void gd_gl_area_scanout(DisplayChangeListener *dcl,
-                        uint32_t backing_id, bool backing_y_0_top,
-                        uint32_t backing_width, uint32_t backing_height,
-                        uint32_t x, uint32_t y,
-                        uint32_t w, uint32_t h)
+void gd_gl_area_scanout_texture(DisplayChangeListener *dcl,
+                                uint32_t backing_id,
+                                bool backing_y_0_top,
+                                uint32_t backing_width,
+                                uint32_t backing_height,
+                                uint32_t x, uint32_t y,
+                                uint32_t w, uint32_t h)
 {
     VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl);
 
diff --git a/ui/gtk.c b/ui/gtk.c
index f21e9e7f7b..a86848f3b0 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -669,7 +669,7 @@ static const DisplayChangeListenerOps dcl_gl_area_ops = {
     .dpy_gl_ctx_destroy      = gd_gl_area_destroy_context,
     .dpy_gl_ctx_make_current = gd_gl_area_make_current,
     .dpy_gl_ctx_get_current  = gd_gl_area_get_current_context,
-    .dpy_gl_scanout          = gd_gl_area_scanout,
+    .dpy_gl_scanout_texture  = gd_gl_area_scanout_texture,
     .dpy_gl_update           = gd_gl_area_scanout_flush,
 };
 
@@ -688,7 +688,8 @@ static const DisplayChangeListenerOps dcl_egl_ops = {
     .dpy_gl_ctx_destroy      = qemu_egl_destroy_context,
     .dpy_gl_ctx_make_current = gd_egl_make_current,
     .dpy_gl_ctx_get_current  = qemu_egl_get_current_context,
-    .dpy_gl_scanout          = gd_egl_scanout,
+    .dpy_gl_scanout_disable  = gd_egl_scanout_disable,
+    .dpy_gl_scanout_texture  = gd_egl_scanout_texture,
     .dpy_gl_update           = gd_egl_scanout_flush,
 };
 
@@ -2200,11 +2201,12 @@ static void gd_set_keycode_type(GtkDisplayState *s)
     GdkDisplay *display = gtk_widget_get_display(s->window);
     if (GDK_IS_X11_DISPLAY(display)) {
         Display *x11_display = gdk_x11_display_get_xdisplay(display);
-        XkbDescPtr desc = XkbGetKeyboard(x11_display, XkbGBN_AllComponentsMask,
-                                         XkbUseCoreKbd);
+        XkbDescPtr desc = XkbGetMap(x11_display, XkbGBN_AllComponentsMask,
+                                    XkbUseCoreKbd);
         char *keycodes = NULL;
 
-        if (desc && desc->names) {
+        if (desc &&
+            (XkbGetNames(x11_display, XkbKeycodesNameMask, desc) == Success)) {
             keycodes = XGetAtomName(x11_display, desc->names->keycodes);
         }
         if (keycodes == NULL) {
diff --git a/ui/sdl.c b/ui/sdl.c
index 19e8a848a7..37c21a00fb 100644
--- a/ui/sdl.c
+++ b/ui/sdl.c
@@ -233,10 +233,12 @@ static int check_for_evdev(void)
     if (!SDL_GetWMInfo(&info)) {
         return 0;
     }
-    desc = XkbGetKeyboard(info.info.x11.display,
-                          XkbGBN_AllComponentsMask,
-                          XkbUseCoreKbd);
-    if (desc && desc->names) {
+    desc = XkbGetMap(info.info.x11.display,
+                     XkbGBN_AllComponentsMask,
+                     XkbUseCoreKbd);
+    if (desc &&
+        (XkbGetNames(info.info.x11.display,
+                     XkbKeycodesNameMask, desc) == Success)) {
         keycodes = XGetAtomName(info.info.x11.display, desc->names->keycodes);
         if (keycodes == NULL) {
             fprintf(stderr, "could not lookup keycode name\n");
diff --git a/ui/sdl2-gl.c b/ui/sdl2-gl.c
index 039645df3e..1cd77e2c16 100644
--- a/ui/sdl2-gl.c
+++ b/ui/sdl2-gl.c
@@ -184,11 +184,24 @@ QEMUGLContext sdl2_gl_get_current_context(DisplayChangeListener *dcl)
     return (QEMUGLContext)sdlctx;
 }
 
-void sdl2_gl_scanout(DisplayChangeListener *dcl,
-                     uint32_t backing_id, bool backing_y_0_top,
-                     uint32_t backing_width, uint32_t backing_height,
-                     uint32_t x, uint32_t y,
-                     uint32_t w, uint32_t h)
+void sdl2_gl_scanout_disable(DisplayChangeListener *dcl)
+{
+    struct sdl2_console *scon = container_of(dcl, struct sdl2_console, dcl);
+
+    assert(scon->opengl);
+    scon->w = 0;
+    scon->h = 0;
+    scon->tex_id = 0;
+    sdl2_set_scanout_mode(scon, false);
+}
+
+void sdl2_gl_scanout_texture(DisplayChangeListener *dcl,
+                             uint32_t backing_id,
+                             bool backing_y_0_top,
+                             uint32_t backing_width,
+                             uint32_t backing_height,
+                             uint32_t x, uint32_t y,
+                             uint32_t w, uint32_t h)
 {
     struct sdl2_console *scon = container_of(dcl, struct sdl2_console, dcl);
 
@@ -202,11 +215,6 @@ void sdl2_gl_scanout(DisplayChangeListener *dcl,
 
     SDL_GL_MakeCurrent(scon->real_window, scon->winctx);
 
-    if (scon->tex_id == 0 || scon->w == 0 || scon->h == 0) {
-        sdl2_set_scanout_mode(scon, false);
-        return;
-    }
-
     sdl2_set_scanout_mode(scon, true);
     if (!scon->fbo_id) {
         glGenFramebuffers(1, &scon->fbo_id);
diff --git a/ui/sdl2.c b/ui/sdl2.c
index 91fb111aa5..faf9bdff5c 100644
--- a/ui/sdl2.c
+++ b/ui/sdl2.c
@@ -733,7 +733,8 @@ static const DisplayChangeListenerOps dcl_gl_ops = {
     .dpy_gl_ctx_destroy      = sdl2_gl_destroy_context,
     .dpy_gl_ctx_make_current = sdl2_gl_make_context_current,
     .dpy_gl_ctx_get_current  = sdl2_gl_get_current_context,
-    .dpy_gl_scanout          = sdl2_gl_scanout,
+    .dpy_gl_scanout_disable  = sdl2_gl_scanout_disable,
+    .dpy_gl_scanout_texture  = sdl2_gl_scanout_texture,
     .dpy_gl_update           = sdl2_gl_scanout_flush,
 };
 #endif
diff --git a/ui/spice-core.c b/ui/spice-core.c
index 39ccab7561..804abc5c0f 100644
--- a/ui/spice-core.c
+++ b/ui/spice-core.c
@@ -497,6 +497,12 @@ static QemuOptsList qemu_spice_opts = {
         },{
             .name = "seamless-migration",
             .type = QEMU_OPT_BOOL,
+        },{
+            .name = "display",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "head",
+            .type = QEMU_OPT_NUMBER,
 #ifdef HAVE_SPICE_GL
         },{
             .name = "gl",
diff --git a/ui/spice-display.c b/ui/spice-display.c
index 64e472eeb0..b353445f58 100644
--- a/ui/spice-display.c
+++ b/ui/spice-display.c
@@ -928,39 +928,44 @@ static QEMUGLContext qemu_spice_gl_create_context(DisplayChangeListener *dcl,
     return qemu_egl_create_context(dcl, params);
 }
 
-static void qemu_spice_gl_scanout(DisplayChangeListener *dcl,
-                                  uint32_t tex_id,
-                                  bool y_0_top,
-                                  uint32_t backing_width,
-                                  uint32_t backing_height,
-                                  uint32_t x, uint32_t y,
-                                  uint32_t w, uint32_t h)
+static void qemu_spice_gl_scanout_disable(DisplayChangeListener *dcl)
+{
+    SimpleSpiceDisplay *ssd = container_of(dcl, SimpleSpiceDisplay, dcl);
+
+    dprint(1, "%s: no framebuffer\n", __func__);
+    spice_qxl_gl_scanout(&ssd->qxl, -1, 0, 0, 0, 0, false);
+    qemu_spice_gl_monitor_config(ssd, 0, 0, 0, 0);
+    ssd->have_surface = false;
+    ssd->have_scanout = false;
+}
+
+static void qemu_spice_gl_scanout_texture(DisplayChangeListener *dcl,
+                                          uint32_t tex_id,
+                                          bool y_0_top,
+                                          uint32_t backing_width,
+                                          uint32_t backing_height,
+                                          uint32_t x, uint32_t y,
+                                          uint32_t w, uint32_t h)
 {
     SimpleSpiceDisplay *ssd = container_of(dcl, SimpleSpiceDisplay, dcl);
     EGLint stride = 0, fourcc = 0;
     int fd = -1;
 
-    if (tex_id) {
-        fd = egl_get_fd_for_texture(tex_id, &stride, &fourcc);
-        if (fd < 0) {
-            fprintf(stderr, "%s: failed to get fd for texture\n", __func__);
-            return;
-        }
-        dprint(1, "%s: %dx%d (stride %d, fourcc 0x%x)\n", __func__,
-               w, h, stride, fourcc);
-    } else {
-        dprint(1, "%s: no texture (no framebuffer)\n", __func__);
+    assert(tex_id);
+    fd = egl_get_fd_for_texture(tex_id, &stride, &fourcc);
+    if (fd < 0) {
+        fprintf(stderr, "%s: failed to get fd for texture\n", __func__);
+        return;
     }
-
-    assert(!tex_id || fd >= 0);
+    dprint(1, "%s: %dx%d (stride %d, fourcc 0x%x)\n", __func__,
+           w, h, stride, fourcc);
 
     /* note: spice server will close the fd */
     spice_qxl_gl_scanout(&ssd->qxl, fd, backing_width, backing_height,
                          stride, fourcc, y_0_top);
-    ssd->have_surface = false;
-    ssd->have_scanout = (tex_id != 0);
-
     qemu_spice_gl_monitor_config(ssd, x, y, w, h);
+    ssd->have_surface = false;
+    ssd->have_scanout = true;
 }
 
 static void qemu_spice_gl_update(DisplayChangeListener *dcl,
@@ -993,7 +998,8 @@ static const DisplayChangeListenerOps display_listener_gl_ops = {
     .dpy_gl_ctx_make_current = qemu_egl_make_context_current,
     .dpy_gl_ctx_get_current  = qemu_egl_get_current_context,
 
-    .dpy_gl_scanout          = qemu_spice_gl_scanout,
+    .dpy_gl_scanout_disable  = qemu_spice_gl_scanout_disable,
+    .dpy_gl_scanout_texture  = qemu_spice_gl_scanout_texture,
     .dpy_gl_update           = qemu_spice_gl_update,
 };
 
@@ -1029,9 +1035,26 @@ static void qemu_spice_display_init_one(QemuConsole *con)
 
 void qemu_spice_display_init(void)
 {
-    QemuConsole *con;
+    QemuOptsList *olist = qemu_find_opts("spice");
+    QemuOpts *opts = QTAILQ_FIRST(&olist->head);
+    QemuConsole *spice_con, *con;
+    const char *str;
     int i;
 
+    str = qemu_opt_get(opts, "display");
+    if (str) {
+        int head = qemu_opt_get_number(opts, "head", 0);
+        Error *err = NULL;
+
+        spice_con = qemu_console_lookup_by_device_name(str, head, &err);
+        if (err) {
+            error_report("Failed to lookup display/head");
+            exit(1);
+        }
+    } else {
+        spice_con = NULL;
+    }
+
     for (i = 0;; i++) {
         con = qemu_console_lookup_by_index(i);
         if (!con || !qemu_console_is_graphic(con)) {
@@ -1040,6 +1063,9 @@ void qemu_spice_display_init(void)
         if (qemu_spice_have_display_interface(con)) {
             continue;
         }
+        if (spice_con != NULL && spice_con != con) {
+            continue;
+        }
         qemu_spice_display_init_one(con);
     }
 }
diff --git a/ui/vnc.c b/ui/vnc.c
index 62e85edf5d..51f4b30959 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -3181,6 +3181,7 @@ static void vnc_display_close(VncDisplay *vd)
     g_free(vd->lsock);
     g_free(vd->lsock_tag);
     vd->lsock = NULL;
+    vd->lsock_tag = NULL;
     vd->nlsock = 0;
 
     for (i = 0; i < vd->nlwebsock; i++) {
@@ -3192,6 +3193,7 @@ static void vnc_display_close(VncDisplay *vd)
     g_free(vd->lwebsock);
     g_free(vd->lwebsock_tag);
     vd->lwebsock = NULL;
+    vd->lwebsock_tag = NULL;
     vd->nlwebsock = 0;
 
     vd->auth = VNC_AUTH_INVALID;
@@ -3204,6 +3206,7 @@ static void vnc_display_close(VncDisplay *vd)
     vd->tlsaclname = NULL;
     if (vd->lock_key_sync) {
         qemu_remove_led_event_handler(vd->led);
+        vd->led = NULL;
     }
 }
 
diff --git a/util/cutils.c b/util/cutils.c
index 4fefcf3be3..50ad179dc5 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -181,19 +181,19 @@ int fcntl_setfl(int fd, int flag)
 static int64_t suffix_mul(char suffix, int64_t unit)
 {
     switch (qemu_toupper(suffix)) {
-    case QEMU_STRTOSZ_DEFSUFFIX_B:
+    case 'B':
         return 1;
-    case QEMU_STRTOSZ_DEFSUFFIX_KB:
+    case 'K':
         return unit;
-    case QEMU_STRTOSZ_DEFSUFFIX_MB:
+    case 'M':
         return unit * unit;
-    case QEMU_STRTOSZ_DEFSUFFIX_GB:
+    case 'G':
         return unit * unit * unit;
-    case QEMU_STRTOSZ_DEFSUFFIX_TB:
+    case 'T':
         return unit * unit * unit * unit;
-    case QEMU_STRTOSZ_DEFSUFFIX_PB:
+    case 'P':
         return unit * unit * unit * unit * unit;
-    case QEMU_STRTOSZ_DEFSUFFIX_EB:
+    case 'E':
         return unit * unit * unit * unit * unit * unit;
     }
     return -1;
@@ -205,10 +205,11 @@ static int64_t suffix_mul(char suffix, int64_t unit)
  * in *end, if not NULL. Return -ERANGE on overflow, Return -EINVAL on
  * other error.
  */
-int64_t qemu_strtosz_suffix_unit(const char *nptr, char **end,
-                            const char default_suffix, int64_t unit)
+static int do_strtosz(const char *nptr, char **end,
+                      const char default_suffix, int64_t unit,
+                      uint64_t *result)
 {
-    int64_t retval = -EINVAL;
+    int retval;
     char *endptr;
     unsigned char c;
     int mul_required = 0;
@@ -217,7 +218,8 @@ int64_t qemu_strtosz_suffix_unit(const char *nptr, char **end,
     errno = 0;
     val = strtod(nptr, &endptr);
     if (isnan(val) || endptr == nptr || errno != 0) {
-        goto fail;
+        retval = -EINVAL;
+        goto out;
     }
     fraction = modf(val, &integral);
     if (fraction != 0) {
@@ -232,181 +234,204 @@ int64_t qemu_strtosz_suffix_unit(const char *nptr, char **end,
         assert(mul >= 0);
     }
     if (mul == 1 && mul_required) {
-        goto fail;
+        retval = -EINVAL;
+        goto out;
     }
-    if ((val * mul >= INT64_MAX) || val < 0) {
+    /*
+     * Values >= 0xfffffffffffffc00 overflow uint64_t after their trip
+     * through double (53 bits of precision).
+     */
+    if ((val * mul >= 0xfffffffffffffc00) || val < 0) {
         retval = -ERANGE;
-        goto fail;
+        goto out;
     }
-    retval = val * mul;
+    *result = val * mul;
+    retval = 0;
 
-fail:
+out:
     if (end) {
         *end = endptr;
+    } else if (*endptr) {
+        retval = -EINVAL;
     }
 
     return retval;
 }
 
-int64_t qemu_strtosz_suffix(const char *nptr, char **end,
-                            const char default_suffix)
+int qemu_strtosz(const char *nptr, char **end, uint64_t *result)
+{
+    return do_strtosz(nptr, end, 'B', 1024, result);
+}
+
+int qemu_strtosz_MiB(const char *nptr, char **end, uint64_t *result)
 {
-    return qemu_strtosz_suffix_unit(nptr, end, default_suffix, 1024);
+    return do_strtosz(nptr, end, 'M', 1024, result);
 }
 
-int64_t qemu_strtosz(const char *nptr, char **end)
+int qemu_strtosz_metric(const char *nptr, char **end, uint64_t *result)
 {
-    return qemu_strtosz_suffix(nptr, end, QEMU_STRTOSZ_DEFSUFFIX_MB);
+    return do_strtosz(nptr, end, 'B', 1000, result);
 }
 
 /**
- * Helper function for qemu_strto*l() functions.
+ * Helper function for error checking after strtol() and the like
  */
-static int check_strtox_error(const char *p, char *endptr, const char **next,
-                              int err)
+static int check_strtox_error(const char *nptr, char *ep,
+                              const char **endptr, int libc_errno)
 {
-    /* If no conversion was performed, prefer BSD behavior over glibc
-     * behavior.
-     */
-    if (err == 0 && endptr == p) {
-        err = EINVAL;
+    if (endptr) {
+        *endptr = ep;
     }
-    if (!next && *endptr) {
+
+    /* Turn "no conversion" into an error */
+    if (libc_errno == 0 && ep == nptr) {
         return -EINVAL;
     }
-    if (next) {
-        *next = endptr;
+
+    /* Fail when we're expected to consume the string, but didn't */
+    if (!endptr && *ep) {
+        return -EINVAL;
     }
-    return -err;
+
+    return -libc_errno;
 }
 
 /**
- * QEMU wrappers for strtol(), strtoll(), strtoul(), strotull() C functions.
- *
- * Convert ASCII string @nptr to a long integer value
- * from the given @base. Parameters @nptr, @endptr, @base
- * follows same semantics as strtol() C function.
- *
- * Unlike from strtol() function, if @endptr is not NULL, this
- * function will return -EINVAL whenever it cannot fully convert
- * the string in @nptr with given @base to a long. This function returns
- * the result of the conversion only through the @result parameter.
- *
- * If NULL is passed in @endptr, then the whole string in @ntpr
- * is a number otherwise it returns -EINVAL.
- *
- * RETURN VALUE
- * Unlike from strtol() function, this wrapper returns either
- * -EINVAL or the errno set by strtol() function (e.g -ERANGE).
- * If the conversion overflows, -ERANGE is returned, and @result
- * is set to the max value of the desired type
- * (e.g. LONG_MAX, LLONG_MAX, ULONG_MAX, ULLONG_MAX). If the case
- * of underflow, -ERANGE is returned, and @result is set to the min
- * value of the desired type. For strtol(), strtoll(), @result is set to
- * LONG_MIN, LLONG_MIN, respectively, and for strtoul(), strtoull() it
- * is set to 0.
+ * Convert string @nptr to a long integer, and store it in @result.
+ *
+ * This is a wrapper around strtol() that is harder to misuse.
+ * Semantics of @nptr, @endptr, @base match strtol() with differences
+ * noted below.
+ *
+ * @nptr may be null, and no conversion is performed then.
+ *
+ * If no conversion is performed, store @nptr in *@endptr and return
+ * -EINVAL.
+ *
+ * If @endptr is null, and the string isn't fully converted, return
+ * -EINVAL.  This is the case when the pointer that would be stored in
+ * a non-null @endptr points to a character other than '\0'.
+ *
+ * If the conversion overflows @result, store LONG_MAX in @result,
+ * and return -ERANGE.
+ *
+ * If the conversion underflows @result, store LONG_MIN in @result,
+ * and return -ERANGE.
+ *
+ * Else store the converted value in @result, and return zero.
  */
 int qemu_strtol(const char *nptr, const char **endptr, int base,
                 long *result)
 {
-    char *p;
-    int err = 0;
+    char *ep;
+
     if (!nptr) {
         if (endptr) {
             *endptr = nptr;
         }
-        err = -EINVAL;
-    } else {
-        errno = 0;
-        *result = strtol(nptr, &p, base);
-        err = check_strtox_error(nptr, p, endptr, errno);
+        return -EINVAL;
     }
-    return err;
+
+    errno = 0;
+    *result = strtol(nptr, &ep, base);
+    return check_strtox_error(nptr, ep, endptr, errno);
 }
 
 /**
- * Converts ASCII string to an unsigned long integer.
+ * Convert string @nptr to an unsigned long, and store it in @result.
+ *
+ * This is a wrapper around strtoul() that is harder to misuse.
+ * Semantics of @nptr, @endptr, @base match strtoul() with differences
+ * noted below.
+ *
+ * @nptr may be null, and no conversion is performed then.
  *
- * If string contains a negative number, value will be converted to
- * the unsigned representation of the signed value, unless the original
- * (nonnegated) value would overflow, in this case, it will set @result
- * to ULONG_MAX, and return ERANGE.
+ * If no conversion is performed, store @nptr in *@endptr and return
+ * -EINVAL.
  *
- * The same behavior holds, for qemu_strtoull() but sets @result to
- * ULLONG_MAX instead of ULONG_MAX.
+ * If @endptr is null, and the string isn't fully converted, return
+ * -EINVAL.  This is the case when the pointer that would be stored in
+ * a non-null @endptr points to a character other than '\0'.
  *
- * See qemu_strtol() documentation for more info.
+ * If the conversion overflows @result, store ULONG_MAX in @result,
+ * and return -ERANGE.
+ *
+ * Else store the converted value in @result, and return zero.
+ *
+ * Note that a number with a leading minus sign gets converted without
+ * the minus sign, checked for overflow (see above), then negated (in
+ * @result's type).  This is exactly how strtoul() works.
  */
 int qemu_strtoul(const char *nptr, const char **endptr, int base,
                  unsigned long *result)
 {
-    char *p;
-    int err = 0;
+    char *ep;
+
     if (!nptr) {
         if (endptr) {
             *endptr = nptr;
         }
-        err = -EINVAL;
-    } else {
-        errno = 0;
-        *result = strtoul(nptr, &p, base);
-        /* Windows returns 1 for negative out-of-range values.  */
-        if (errno == ERANGE) {
-            *result = -1;
-        }
-        err = check_strtox_error(nptr, p, endptr, errno);
+        return -EINVAL;
+    }
+
+    errno = 0;
+    *result = strtoul(nptr, &ep, base);
+    /* Windows returns 1 for negative out-of-range values.  */
+    if (errno == ERANGE) {
+        *result = -1;
     }
-    return err;
+    return check_strtox_error(nptr, ep, endptr, errno);
 }
 
 /**
- * Converts ASCII string to a long long integer.
+ * Convert string @nptr to an int64_t.
  *
- * See qemu_strtol() documentation for more info.
+ * Works like qemu_strtol(), except it stores INT64_MAX on overflow,
+ * and INT_MIN on underflow.
  */
-int qemu_strtoll(const char *nptr, const char **endptr, int base,
+int qemu_strtoi64(const char *nptr, const char **endptr, int base,
                  int64_t *result)
 {
-    char *p;
-    int err = 0;
+    char *ep;
+
     if (!nptr) {
         if (endptr) {
             *endptr = nptr;
         }
-        err = -EINVAL;
-    } else {
-        errno = 0;
-        *result = strtoll(nptr, &p, base);
-        err = check_strtox_error(nptr, p, endptr, errno);
+        return -EINVAL;
     }
-    return err;
+
+    errno = 0;
+    /* FIXME This assumes int64_t is long long */
+    *result = strtoll(nptr, &ep, base);
+    return check_strtox_error(nptr, ep, endptr, errno);
 }
 
 /**
- * Converts ASCII string to an unsigned long long integer.
+ * Convert string @nptr to an uint64_t.
  *
- * See qemu_strtol() documentation for more info.
+ * Works like qemu_strtoul(), except it stores UINT64_MAX on overflow.
  */
-int qemu_strtoull(const char *nptr, const char **endptr, int base,
+int qemu_strtou64(const char *nptr, const char **endptr, int base,
                   uint64_t *result)
 {
-    char *p;
-    int err = 0;
+    char *ep;
+
     if (!nptr) {
         if (endptr) {
             *endptr = nptr;
         }
-        err = -EINVAL;
-    } else {
-        errno = 0;
-        *result = strtoull(nptr, &p, base);
-        /* Windows returns 1 for negative out-of-range values.  */
-        if (errno == ERANGE) {
-            *result = -1;
-        }
-        err = check_strtox_error(nptr, p, endptr, errno);
+        return -EINVAL;
+    }
+
+    errno = 0;
+    /* FIXME This assumes uint64_t is unsigned long long */
+    *result = strtoull(nptr, &ep, base);
+    /* Windows returns 1 for negative out-of-range values.  */
+    if (errno == ERANGE) {
+        *result = -1;
     }
-    return err;
+    return check_strtox_error(nptr, ep, endptr, errno);
 }
 
 /**
diff --git a/util/log.c b/util/log.c
index e077340ae1..96f30dd21a 100644
--- a/util/log.c
+++ b/util/log.c
@@ -183,13 +183,13 @@ void qemu_set_dfilter_ranges(const char *filter_spec, Error **errp)
             goto out;
         }
 
-        if (qemu_strtoull(r, &e, 0, &r1val)
+        if (qemu_strtou64(r, &e, 0, &r1val)
             || e != range_op) {
             error_setg(errp, "Invalid number to the left of %.*s",
                        (int)(r2 - range_op), range_op);
             goto out;
         }
-        if (qemu_strtoull(r2, NULL, 0, &r2val)) {
+        if (qemu_strtou64(r2, NULL, 0, &r2val)) {
             error_setg(errp, "Invalid number to the right of %.*s",
                        (int)(r2 - range_op), range_op);
             goto out;
diff --git a/util/qemu-option.c b/util/qemu-option.c
index d611946333..5ce1b5c246 100644
--- a/util/qemu-option.c
+++ b/util/qemu-option.c
@@ -128,36 +128,33 @@ int get_param_value(char *buf, int buf_size,
 static void parse_option_bool(const char *name, const char *value, bool *ret,
                               Error **errp)
 {
-    if (value != NULL) {
-        if (!strcmp(value, "on")) {
-            *ret = 1;
-        } else if (!strcmp(value, "off")) {
-            *ret = 0;
-        } else {
-            error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
-                       name, "'on' or 'off'");
-        }
-    } else {
+    if (!strcmp(value, "on")) {
         *ret = 1;
+    } else if (!strcmp(value, "off")) {
+        *ret = 0;
+    } else {
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+                   name, "'on' or 'off'");
     }
 }
 
 static void parse_option_number(const char *name, const char *value,
                                 uint64_t *ret, Error **errp)
 {
-    char *postfix;
     uint64_t number;
+    int err;
 
-    if (value != NULL) {
-        number = strtoull(value, &postfix, 0);
-        if (*postfix != '\0') {
-            error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name, "a number");
-            return;
-        }
-        *ret = number;
-    } else {
+    err = qemu_strtou64(value, NULL, 0, &number);
+    if (err == -ERANGE) {
+        error_setg(errp, "Value '%s' is too large for parameter '%s'",
+                   value, name);
+        return;
+    }
+    if (err) {
         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name, "a number");
+        return;
     }
+    *ret = number;
 }
 
 static const QemuOptDesc *find_desc_by_name(const QemuOptDesc *desc,
@@ -177,43 +174,24 @@ static const QemuOptDesc *find_desc_by_name(const QemuOptDesc *desc,
 void parse_option_size(const char *name, const char *value,
                        uint64_t *ret, Error **errp)
 {
-    char *postfix;
-    double sizef;
-
-    if (value != NULL) {
-        sizef = strtod(value, &postfix);
-        if (sizef < 0 || sizef > UINT64_MAX) {
-            error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name,
-                             "a non-negative number below 2^64");
-            return;
-        }
-        switch (*postfix) {
-        case 'T':
-            sizef *= 1024;
-            /* fall through */
-        case 'G':
-            sizef *= 1024;
-            /* fall through */
-        case 'M':
-            sizef *= 1024;
-            /* fall through */
-        case 'K':
-        case 'k':
-            sizef *= 1024;
-            /* fall through */
-        case 'b':
-        case '\0':
-            *ret = (uint64_t) sizef;
-            break;
-        default:
-            error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name, "a size");
-            error_append_hint(errp, "You may use k, M, G or T suffixes for "
-                    "kilobytes, megabytes, gigabytes and terabytes.\n");
-            return;
-        }
-    } else {
-        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name, "a size");
+    uint64_t size;
+    int err;
+
+    err = qemu_strtosz(value, NULL, &size);
+    if (err == -ERANGE) {
+        error_setg(errp, "Value '%s' is out of range for parameter '%s'",
+                   value, name);
+        return;
+    }
+    if (err) {
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name,
+                   "a non-negative number below 2^64");
+        error_append_hint(errp, "Optional suffix k, M, G, T, P or E means"
+                          " kilo-, mega-, giga-, tera-, peta-\n"
+                          "and exabytes, respectively.\n");
+        return;
     }
+    *ret = size;
 }
 
 bool has_help_option(const char *param)
@@ -566,6 +544,7 @@ static void opt_set(QemuOpts *opts, const char *name, const char *value,
     }
     opt->desc = desc;
     opt->str = g_strdup(value);
+    assert(opt->str);
     qemu_opt_parse(opt, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
diff --git a/util/qemu-timer.c b/util/qemu-timer.c
index ff620ecff7..6cf70b96f6 100644
--- a/util/qemu-timer.c
+++ b/util/qemu-timer.c
@@ -355,11 +355,6 @@ void timer_deinit(QEMUTimer *ts)
     ts->timer_list = NULL;
 }
 
-void timer_free(QEMUTimer *ts)
-{
-    g_free(ts);
-}
-
 static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts)
 {
     QEMUTimer **pt, *t;
diff --git a/vl.c b/vl.c
index b5d0a19811..e10a27bdd6 100644
--- a/vl.c
+++ b/vl.c
@@ -300,6 +300,26 @@ static QemuOptsList qemu_machine_opts = {
     },
 };
 
+static QemuOptsList qemu_accel_opts = {
+    .name = "accel",
+    .implied_opt_name = "accel",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_accel_opts.head),
+    .merge_lists = true,
+    .desc = {
+        {
+            .name = "accel",
+            .type = QEMU_OPT_STRING,
+            .help = "Select the type of accelerator",
+        },
+        {
+            .name = "thread",
+            .type = QEMU_OPT_STRING,
+            .help = "Enable/disable multi-threaded TCG",
+        },
+        { /* end of list */ }
+    },
+};
+
 static QemuOptsList qemu_boot_opts = {
     .name = "boot-opts",
     .implied_opt_name = "order",
@@ -1492,7 +1512,7 @@ MachineInfoList *qmp_query_machines(Error **errp)
 
         info->name = g_strdup(mc->name);
         info->cpu_max = !mc->max_cpus ? 1 : mc->max_cpus;
-        info->hotpluggable_cpus = !!mc->query_hotpluggable_cpus;
+        info->hotpluggable_cpus = mc->has_hotpluggable_cpus;
 
         entry = g_malloc0(sizeof(*entry));
         entry->value = info;
@@ -2928,7 +2948,8 @@ int main(int argc, char **argv, char **envp)
     const char *boot_once = NULL;
     DisplayState *ds;
     int cyls, heads, secs, translation;
-    QemuOpts *hda_opts = NULL, *opts, *machine_opts, *icount_opts = NULL;
+    QemuOpts *opts, *machine_opts;
+    QemuOpts *hda_opts = NULL, *icount_opts = NULL, *accel_opts = NULL;
     QemuOptsList *olist;
     int optind;
     const char *optarg;
@@ -2983,6 +3004,7 @@ int main(int argc, char **argv, char **envp)
     qemu_add_opts(&qemu_trace_opts);
     qemu_add_opts(&qemu_option_rom_opts);
     qemu_add_opts(&qemu_machine_opts);
+    qemu_add_opts(&qemu_accel_opts);
     qemu_add_opts(&qemu_mem_opts);
     qemu_add_opts(&qemu_smp_opts);
     qemu_add_opts(&qemu_boot_opts);
@@ -3675,6 +3697,26 @@ int main(int argc, char **argv, char **envp)
                 qdev_prop_register_global(&kvm_pit_lost_tick_policy);
                 break;
             }
+            case QEMU_OPTION_accel:
+                accel_opts = qemu_opts_parse_noisily(qemu_find_opts("accel"),
+                                                     optarg, true);
+                optarg = qemu_opt_get(accel_opts, "accel");
+
+                olist = qemu_find_opts("machine");
+                if (strcmp("kvm", optarg) == 0) {
+                    qemu_opts_parse_noisily(olist, "accel=kvm", false);
+                } else if (strcmp("xen", optarg) == 0) {
+                    qemu_opts_parse_noisily(olist, "accel=xen", false);
+                } else if (strcmp("tcg", optarg) == 0) {
+                    qemu_opts_parse_noisily(olist, "accel=tcg", false);
+                } else {
+                    if (!is_help_option(optarg)) {
+                        error_printf("Unknown accelerator: %s", optarg);
+                    }
+                    error_printf("Supported accelerators: kvm, xen, tcg\n");
+                    exit(1);
+                }
+                break;
             case QEMU_OPTION_usb:
                 olist = qemu_find_opts("machine");
                 qemu_opts_parse_noisily(olist, "usb=on", false);
@@ -3983,6 +4025,8 @@ int main(int argc, char **argv, char **envp)
 
     replay_configure(icount_opts);
 
+    qemu_tcg_configure(accel_opts, &error_fatal);
+
     machine_class = select_machine();
 
     set_memory_options(&ram_slots, &maxram_size, machine_class);
@@ -4349,6 +4393,9 @@ int main(int argc, char **argv, char **envp)
         if (!tcg_enabled()) {
             error_report("-icount is not allowed with hardware virtualization");
             exit(1);
+        } else if (qemu_tcg_mttcg_enabled()) {
+            error_report("-icount does not currently work with MTTCG");
+            exit(1);
         }
         configure_icount(icount_opts, &error_abort);
         qemu_opts_del(icount_opts);