93 files changed, 8163 insertions, 2275 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 564b8dba6e..0e7baa9aa2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1455,6 +1455,7 @@ vhost
 M: Michael S. Tsirkin <mst@redhat.com>
 S: Supported
 F: hw/*/*vhost*
+F: docs/interop/vhost-user.json
 F: docs/interop/vhost-user.txt
 F: contrib/vhost-user-*/
 
diff --git a/Makefile b/Makefile
index 6ccb8639b0..abd78a9826 100644
--- a/Makefile
+++ b/Makefile
@@ -497,7 +497,7 @@ Makefile: $(version-obj-y)
 # Build libraries
 
 libqemuutil.a: $(util-obj-y) $(trace-obj-y) $(stub-obj-y)
-libvhost-user.a: $(libvhost-user-obj-y)
+libvhost-user.a: $(libvhost-user-obj-y) $(util-obj-y) $(stub-obj-y)
 
 ######################################################################
 
diff --git a/backends/cryptodev-vhost-user.c b/backends/cryptodev-vhost-user.c
index d539f14d59..1052a5d0e9 100644
--- a/backends/cryptodev-vhost-user.c
+++ b/backends/cryptodev-vhost-user.c
@@ -47,7 +47,7 @@
 typedef struct CryptoDevBackendVhostUser {
     CryptoDevBackend parent_obj;
 
-    VhostUserState *vhost_user;
+    VhostUserState vhost_user;
     CharBackend chr;
     char *chr_name;
     bool opened;
@@ -104,7 +104,7 @@ cryptodev_vhost_user_start(int queues,
             continue;
         }
 
-        options.opaque = s->vhost_user;
+        options.opaque = &s->vhost_user;
         options.backend_type = VHOST_BACKEND_TYPE_USER;
         options.cc = b->conf.peers.ccs[i];
         s->vhost_crypto[i] = cryptodev_vhost_init(&options);
@@ -182,7 +182,6 @@ static void cryptodev_vhost_user_init(
     size_t i;
     Error *local_err = NULL;
     Chardev *chr;
-    VhostUserState *user;
     CryptoDevBackendClient *cc;
     CryptoDevBackendVhostUser *s =
                       CRYPTODEV_BACKEND_VHOST_USER(backend);
@@ -213,15 +212,10 @@ static void cryptodev_vhost_user_init(
         }
     }
 
-    user = vhost_user_init();
-    if (!user) {
-        error_setg(errp, "Failed to init vhost_user");
+    if (!vhost_user_init(&s->vhost_user, &s->chr, errp)) {
         return;
     }
 
-    user->chr = &s->chr;
-    s->vhost_user = user;
-
     qemu_chr_fe_set_handlers(&s->chr, NULL, NULL,
                      cryptodev_vhost_user_event, NULL, s, NULL, true);
 
@@ -307,11 +301,7 @@ static void cryptodev_vhost_user_cleanup(
         }
     }
 
-    if (s->vhost_user) {
-        vhost_user_cleanup(s->vhost_user);
-        g_free(s->vhost_user);
-        s->vhost_user = NULL;
-    }
+    vhost_user_cleanup(&s->vhost_user);
 }
 
 static void cryptodev_vhost_user_set_chardev(Object *obj,
diff --git a/block.c b/block.c
index ccf008c177..ed9253c786 100644
--- a/block.c
+++ b/block.c
@@ -1163,13 +1163,6 @@ static int bdrv_open_flags(BlockDriverState *bs, int flags)
      */
     open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
 
-    /*
-     * Snapshots should be writable.
-     */
-    if (flags & BDRV_O_TEMPORARY) {
-        open_flags |= BDRV_O_RDWR;
-    }
-
     return open_flags;
 }
 
@@ -1698,6 +1691,7 @@ static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared);
 
 typedef struct BlockReopenQueueEntry {
      bool prepared;
+     bool perms_checked;
      BDRVReopenState state;
      QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
 } BlockReopenQueueEntry;
@@ -2132,6 +2126,8 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
     BlockDriverState *old_bs = child->bs;
     int i;
 
+    assert(!child->frozen);
+
     if (old_bs && new_bs) {
         assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
     }
@@ -2348,6 +2344,10 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
     bool update_inherits_from = bdrv_chain_contains(bs, backing_hd) &&
         bdrv_inherits_from_recursive(backing_hd, bs);
 
+    if (bdrv_is_backing_chain_frozen(bs, backing_bs(bs), errp)) {
+        return;
+    }
+
     if (backing_hd) {
         bdrv_ref(backing_hd);
     }
@@ -2983,6 +2983,74 @@ BlockDriverState *bdrv_open(const char *filename, const char *reference,
                              NULL, errp);
 }
 
+/* Return true if the NULL-terminated @list contains @str */
+static bool is_str_in_list(const char *str, const char *const *list)
+{
+    if (str && list) {
+        int i;
+        for (i = 0; list[i] != NULL; i++) {
+            if (!strcmp(str, list[i])) {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+/*
+ * Check that every option set in @bs->options is also set in
+ * @new_opts.
+ *
+ * Options listed in the common_options list and in
+ * @bs->drv->mutable_opts are skipped.
+ *
+ * Return 0 on success, otherwise return -EINVAL and set @errp.
+ */
+static int bdrv_reset_options_allowed(BlockDriverState *bs,
+                                      const QDict *new_opts, Error **errp)
+{
+    const QDictEntry *e;
+    /* These options are common to all block drivers and are handled
+     * in bdrv_reopen_prepare() so they can be left out of @new_opts */
+    const char *const common_options[] = {
+        "node-name", "discard", "cache.direct", "cache.no-flush",
+        "read-only", "auto-read-only", "detect-zeroes", NULL
+    };
+
+    for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) {
+        if (!qdict_haskey(new_opts, e->key) &&
+            !is_str_in_list(e->key, common_options) &&
+            !is_str_in_list(e->key, bs->drv->mutable_opts)) {
+            error_setg(errp, "Option '%s' cannot be reset "
+                       "to its default value", e->key);
+            return -EINVAL;
+        }
+    }
+
+    return 0;
+}
+
+/*
+ * Returns true if @child can be reached recursively from @bs
+ */
+static bool bdrv_recurse_has_child(BlockDriverState *bs,
+                                   BlockDriverState *child)
+{
+    BdrvChild *c;
+
+    if (bs == child) {
+        return true;
+    }
+
+    QLIST_FOREACH(c, &bs->children, next) {
+        if (bdrv_recurse_has_child(c->bs, child)) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
 /*
  * Adds a BlockDriverState to a simple queue for an atomic, transactional
  * reopen of multiple devices.
@@ -3010,7 +3078,8 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
                                                  QDict *options,
                                                  const BdrvChildRole *role,
                                                  QDict *parent_options,
-                                                 int parent_flags)
+                                                 int parent_flags,
+                                                 bool keep_old_opts)
 {
     assert(bs != NULL);
 
@@ -3050,13 +3119,13 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
      */
 
     /* Old explicitly set values (don't overwrite by inherited value) */
-    if (bs_entry) {
-        old_options = qdict_clone_shallow(bs_entry->state.explicit_options);
-    } else {
-        old_options = qdict_clone_shallow(bs->explicit_options);
+    if (bs_entry || keep_old_opts) {
+        old_options = qdict_clone_shallow(bs_entry ?
+                                          bs_entry->state.explicit_options :
+                                          bs->explicit_options);
+        bdrv_join_options(bs, options, old_options);
+        qobject_unref(old_options);
     }
-    bdrv_join_options(bs, options, old_options);
-    qobject_unref(old_options);
 
     explicit_options = qdict_clone_shallow(options);
 
@@ -3068,10 +3137,12 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
         flags = bdrv_get_flags(bs);
     }
 
-    /* Old values are used for options that aren't set yet */
-    old_options = qdict_clone_shallow(bs->options);
-    bdrv_join_options(bs, options, old_options);
-    qobject_unref(old_options);
+    if (keep_old_opts) {
+        /* Old values are used for options that aren't set yet */
+        old_options = qdict_clone_shallow(bs->options);
+        bdrv_join_options(bs, options, old_options);
+        qobject_unref(old_options);
+    }
 
     /* We have the final set of options so let's update the flags */
     options_copy = qdict_clone_shallow(options);
@@ -3104,9 +3175,21 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
     bs_entry->state.perm = UINT64_MAX;
     bs_entry->state.shared_perm = 0;
 
+    /*
+     * If keep_old_opts is false then it means that unspecified
+     * options must be reset to their original value. We don't allow
+     * resetting 'backing' but we need to know if the option is
+     * missing in order to decide if we have to return an error.
+     */
+    if (!keep_old_opts) {
+        bs_entry->state.backing_missing =
+            !qdict_haskey(options, "backing") &&
+            !qdict_haskey(options, "backing.driver");
+    }
+
     QLIST_FOREACH(child, &bs->children, next) {
-        QDict *new_child_options;
-        char *child_key_dot;
+        QDict *new_child_options = NULL;
+        bool child_keep_old = keep_old_opts;
 
         /* reopen can only change the options of block devices that were
          * implicitly created and inherited options. For other (referenced)
@@ -3115,13 +3198,32 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
             continue;
         }
 
-        child_key_dot = g_strdup_printf("%s.", child->name);
-        qdict_extract_subqdict(explicit_options, NULL, child_key_dot);
-        qdict_extract_subqdict(options, &new_child_options, child_key_dot);
-        g_free(child_key_dot);
+        /* Check if the options contain a child reference */
+        if (qdict_haskey(options, child->name)) {
+            const char *childref = qdict_get_try_str(options, child->name);
+            /*
+             * The current child must not be reopened if the child
+             * reference is null or points to a different node.
+             */
+            if (g_strcmp0(childref, child->bs->node_name)) {
+                continue;
+            }
+            /*
+             * If the child reference points to the current child then
+             * reopen it with its existing set of options (note that
+             * it can still inherit new options from the parent).
+             */
+            child_keep_old = true;
+        } else {
+            /* Extract child options ("child-name.*") */
+            char *child_key_dot = g_strdup_printf("%s.", child->name);
+            qdict_extract_subqdict(explicit_options, NULL, child_key_dot);
+            qdict_extract_subqdict(options, &new_child_options, child_key_dot);
+            g_free(child_key_dot);
+        }
 
         bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options,
-                                child->role, options, flags);
+                                child->role, options, flags, child_keep_old);
     }
 
     return bs_queue;
@@ -3129,9 +3231,10 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
 
 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
                                     BlockDriverState *bs,
-                                    QDict *options)
+                                    QDict *options, bool keep_old_opts)
 {
-    return bdrv_reopen_queue_child(bs_queue, bs, options, NULL, NULL, 0);
+    return bdrv_reopen_queue_child(bs_queue, bs, options, NULL, NULL, 0,
+                                   keep_old_opts);
 }
 
 /*
@@ -3151,23 +3254,44 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
  * All affected nodes must be drained between bdrv_reopen_queue() and
  * bdrv_reopen_multiple().
  */
-int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **errp)
+int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
 {
     int ret = -1;
     BlockReopenQueueEntry *bs_entry, *next;
-    Error *local_err = NULL;
 
     assert(bs_queue != NULL);
 
     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
         assert(bs_entry->state.bs->quiesce_counter > 0);
-        if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
-            error_propagate(errp, local_err);
+        if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, errp)) {
             goto cleanup;
         }
         bs_entry->prepared = true;
     }
 
+    QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
+        BDRVReopenState *state = &bs_entry->state;
+        ret = bdrv_check_perm(state->bs, bs_queue, state->perm,
+                              state->shared_perm, NULL, errp);
+        if (ret < 0) {
+            goto cleanup_perm;
+        }
+        /* Check if new_backing_bs would accept the new permissions */
+        if (state->replace_backing_bs && state->new_backing_bs) {
+            uint64_t nperm, nshared;
+            bdrv_child_perm(state->bs, state->new_backing_bs,
+                            NULL, &child_backing, bs_queue,
+                            state->perm, state->shared_perm,
+                            &nperm, &nshared);
+            ret = bdrv_check_update_perm(state->new_backing_bs, NULL,
+                                         nperm, nshared, NULL, errp);
+            if (ret < 0) {
+                goto cleanup_perm;
+            }
+        }
+        bs_entry->perms_checked = true;
+    }
+
     /* If we reach this point, we have success and just need to apply the
      * changes
      */
@@ -3176,7 +3300,23 @@ int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **er
     }
 
     ret = 0;
+cleanup_perm:
+    QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
+        BDRVReopenState *state = &bs_entry->state;
+
+        if (!bs_entry->perms_checked) {
+            continue;
+        }
 
+        if (ret == 0) {
+            bdrv_set_perm(state->bs, state->perm, state->shared_perm);
+        } else {
+            bdrv_abort_perm_update(state->bs);
+            if (state->replace_backing_bs && state->new_backing_bs) {
+                bdrv_abort_perm_update(state->new_backing_bs);
+            }
+        }
+    }
 cleanup:
     QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
         if (ret) {
@@ -3186,6 +3326,9 @@ cleanup:
             qobject_unref(bs_entry->state.explicit_options);
             qobject_unref(bs_entry->state.options);
         }
+        if (bs_entry->state.new_backing_bs) {
+            bdrv_unref(bs_entry->state.new_backing_bs);
+        }
         g_free(bs_entry);
     }
     g_free(bs_queue);
@@ -3203,8 +3346,8 @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
     qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only);
 
     bdrv_subtree_drained_begin(bs);
-    queue = bdrv_reopen_queue(NULL, bs, opts);
-    ret = bdrv_reopen_multiple(bdrv_get_aio_context(bs), queue, errp);
+    queue = bdrv_reopen_queue(NULL, bs, opts, true);
+    ret = bdrv_reopen_multiple(queue, errp);
     bdrv_subtree_drained_end(bs);
 
     return ret;
@@ -3258,6 +3401,101 @@ static void bdrv_reopen_perm(BlockReopenQueue *q, BlockDriverState *bs,
 }
 
 /*
+ * Take a BDRVReopenState and check if the value of 'backing' in the
+ * reopen_state->options QDict is valid or not.
+ *
+ * If 'backing' is missing from the QDict then return 0.
+ *
+ * If 'backing' contains the node name of the backing file of
+ * reopen_state->bs then return 0.
+ *
+ * If 'backing' contains a different node name (or is null) then check
+ * whether the current backing file can be replaced with the new one.
+ * If that's the case then reopen_state->replace_backing_bs is set to
+ * true and reopen_state->new_backing_bs contains a pointer to the new
+ * backing BlockDriverState (or NULL).
+ *
+ * Return 0 on success, otherwise return < 0 and set @errp.
+ */
+static int bdrv_reopen_parse_backing(BDRVReopenState *reopen_state,
+                                     Error **errp)
+{
+    BlockDriverState *bs = reopen_state->bs;
+    BlockDriverState *overlay_bs, *new_backing_bs;
+    QObject *value;
+    const char *str;
+
+    value = qdict_get(reopen_state->options, "backing");
+    if (value == NULL) {
+        return 0;
+    }
+
+    switch (qobject_type(value)) {
+    case QTYPE_QNULL:
+        new_backing_bs = NULL;
+        break;
+    case QTYPE_QSTRING:
+        str = qobject_get_try_str(value);
+        new_backing_bs = bdrv_lookup_bs(NULL, str, errp);
+        if (new_backing_bs == NULL) {
+            return -EINVAL;
+        } else if (bdrv_recurse_has_child(new_backing_bs, bs)) {
+            error_setg(errp, "Making '%s' a backing file of '%s' "
+                       "would create a cycle", str, bs->node_name);
+            return -EINVAL;
+        }
+        break;
+    default:
+        /* 'backing' does not allow any other data type */
+        g_assert_not_reached();
+    }
+
+    /*
+     * TODO: before removing the x- prefix from x-blockdev-reopen we
+     * should move the new backing file into the right AioContext
+     * instead of returning an error.
+     */
+    if (new_backing_bs) {
+        if (bdrv_get_aio_context(new_backing_bs) != bdrv_get_aio_context(bs)) {
+            error_setg(errp, "Cannot use a new backing file "
+                       "with a different AioContext");
+            return -EINVAL;
+        }
+    }
+
+    /*
+     * Find the "actual" backing file by skipping all links that point
+     * to an implicit node, if any (e.g. a commit filter node).
+     */
+    overlay_bs = bs;
+    while (backing_bs(overlay_bs) && backing_bs(overlay_bs)->implicit) {
+        overlay_bs = backing_bs(overlay_bs);
+    }
+
+    /* If we want to replace the backing file we need some extra checks */
+    if (new_backing_bs != backing_bs(overlay_bs)) {
+        /* Check for implicit nodes between bs and its backing file */
+        if (bs != overlay_bs) {
+            error_setg(errp, "Cannot change backing link if '%s' has "
+                       "an implicit backing file", bs->node_name);
+            return -EPERM;
+        }
+        /* Check if the backing link that we want to replace is frozen */
+        if (bdrv_is_backing_chain_frozen(overlay_bs, backing_bs(overlay_bs),
+                                         errp)) {
+            return -EPERM;
+        }
+        reopen_state->replace_backing_bs = true;
+        if (new_backing_bs) {
+            bdrv_ref(new_backing_bs);
+            reopen_state->new_backing_bs = new_backing_bs;
+        }
+    }
+
+    return 0;
+}
+
+/*
  * Prepares a BlockDriverState for reopen. All changes are staged in the
  * 'opaque' field of the BDRVReopenState, which is used and allocated by
  * the block driver layer .bdrv_reopen_prepare()
@@ -3355,6 +3593,17 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
     }
 
     if (drv->bdrv_reopen_prepare) {
+        /*
+         * If a driver-specific option is missing, it means that we
+         * should reset it to its default value.
+         * But not all options allow that, so we need to check it first.
+         */
+        ret = bdrv_reset_options_allowed(reopen_state->bs,
+                                         reopen_state->options, errp);
+        if (ret) {
+            goto error;
+        }
+
         ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
         if (ret) {
             if (local_err != NULL) {
@@ -3378,6 +3627,30 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
 
     drv_prepared = true;
 
+    /*
+     * We must provide the 'backing' option if the BDS has a backing
+     * file or if the image file has a backing file name as part of
+     * its metadata. Otherwise the 'backing' option can be omitted.
+     */
+    if (drv->supports_backing && reopen_state->backing_missing &&
+        (backing_bs(reopen_state->bs) || reopen_state->bs->backing_file[0])) {
+        error_setg(errp, "backing is missing for '%s'",
+                   reopen_state->bs->node_name);
+        ret = -EINVAL;
+        goto error;
+    }
+
+    /*
+     * Allow changing the 'backing' option. The new value can be
+     * either a reference to an existing node (using its node name)
+     * or NULL to simply detach the current backing file.
+     */
+    ret = bdrv_reopen_parse_backing(reopen_state, errp);
+    if (ret < 0) {
+        goto error;
+    }
+    qdict_del(reopen_state->options, "backing");
+
     /* Options that are not handled are only okay if they are unchanged
      * compared to the old state. It is expected that some options are only
      * used for the initial open, but not reopen (e.g. filename) */
@@ -3430,12 +3703,6 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
         } while ((entry = qdict_next(reopen_state->options, entry)));
     }
 
-    ret = bdrv_check_perm(reopen_state->bs, queue, reopen_state->perm,
-                          reopen_state->shared_perm, NULL, errp);
-    if (ret < 0) {
-        goto error;
-    }
-
     ret = 0;
 
     /* Restore the original reopen_state->options QDict */
@@ -3493,6 +3760,11 @@ void bdrv_reopen_commit(BDRVReopenState *reopen_state)
     bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
     bs->detect_zeroes      = reopen_state->detect_zeroes;
 
+    if (reopen_state->replace_backing_bs) {
+        qdict_del(bs->explicit_options, "backing");
+        qdict_del(bs->options, "backing");
+    }
+
     /* Remove child references from bs->options and bs->explicit_options.
      * Child options were already removed in bdrv_reopen_queue_child() */
     QLIST_FOREACH(child, &bs->children, next) {
@@ -3500,10 +3772,22 @@ void bdrv_reopen_commit(BDRVReopenState *reopen_state)
         qdict_del(bs->options, child->name);
     }
 
-    bdrv_refresh_limits(bs, NULL);
+    /*
+     * Change the backing file if a new one was specified. We do this
+     * after updating bs->options, so bdrv_refresh_filename() (called
+     * from bdrv_set_backing_hd()) has the new values.
+     */
+    if (reopen_state->replace_backing_bs) {
+        BlockDriverState *old_backing_bs = backing_bs(bs);
+        assert(!old_backing_bs || !old_backing_bs->implicit);
+        /* Abort the permission update on the backing bs we're detaching */
+        if (old_backing_bs) {
+            bdrv_abort_perm_update(old_backing_bs);
+        }
+        bdrv_set_backing_hd(bs, reopen_state->new_backing_bs, &error_abort);
+    }
 
-    bdrv_set_perm(reopen_state->bs, reopen_state->perm,
-                  reopen_state->shared_perm);
+    bdrv_refresh_limits(bs, NULL);
 
     new_can_write =
         !bdrv_is_read_only(bs) && !(bdrv_get_flags(bs) & BDRV_O_INACTIVE);
@@ -3536,8 +3820,6 @@ void bdrv_reopen_abort(BDRVReopenState *reopen_state)
     if (drv->bdrv_reopen_abort) {
         drv->bdrv_reopen_abort(reopen_state);
     }
-
-    bdrv_abort_perm_update(reopen_state->bs);
 }
 
 
@@ -3717,6 +3999,11 @@ void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
         if (!should_update_child(c, to)) {
             continue;
         }
+        if (c->frozen) {
+            error_setg(errp, "Cannot change '%s' link to '%s'",
+                       c->name, from->node_name);
+            goto out;
+        }
         list = g_slist_prepend(list, c);
         perm |= c->perm;
         shared &= c->shared_perm;
@@ -3929,6 +4216,63 @@ BlockDriverState *bdrv_find_base(BlockDriverState *bs)
 }
 
 /*
+ * Return true if at least one of the backing links between @bs and
+ * @base is frozen. @errp is set if that's the case.
+ */
+bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base,
+                                  Error **errp)
+{
+    BlockDriverState *i;
+
+    for (i = bs; i != base && i->backing; i = backing_bs(i)) {
+        if (i->backing->frozen) {
+            error_setg(errp, "Cannot change '%s' link from '%s' to '%s'",
+                       i->backing->name, i->node_name,
+                       backing_bs(i)->node_name);
+            return true;
+        }
+    }
+
+    return false;
+}
+
+/*
+ * Freeze all backing links between @bs and @base.
+ * If any of the links is already frozen the operation is aborted and
+ * none of the links are modified.
+ * Returns 0 on success. On failure returns < 0 and sets @errp.
+ */
+int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base,
+                              Error **errp)
+{
+    BlockDriverState *i;
+
+    if (bdrv_is_backing_chain_frozen(bs, base, errp)) {
+        return -EPERM;
+    }
+
+    for (i = bs; i != base && i->backing; i = backing_bs(i)) {
+        i->backing->frozen = true;
+    }
+
+    return 0;
+}
+
+/*
+ * Unfreeze all backing links between @bs and @base. The caller must
+ * ensure that all links are frozen before using this function.
+ */
+void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base)
+{
+    BlockDriverState *i;
+
+    for (i = bs; i != base && i->backing; i = backing_bs(i)) {
+        assert(i->backing->frozen);
+        i->backing->frozen = false;
+    }
+}
+
+/*
  * Drops images above 'base' up to and including 'top', and sets the image
  * above 'top' to have base as its backing file.
  *
@@ -3977,6 +4321,14 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
         goto exit;
     }
 
+    /* This function changes all links that point to top and makes
+     * them point to base. Check that none of them is frozen. */
+    QLIST_FOREACH(c, &top->parents, next_parent) {
+        if (c->frozen) {
+            goto exit;
+        }
+    }
+
     /* If 'base' recursively inherits from 'top' then we should set
      * base->inherits_from to top->inherits_from after 'top' and all
      * other intermediate nodes have been dropped.
diff --git a/block/commit.c b/block/commit.c
index 3b46ca7f97..ba60fef58a 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -39,6 +39,7 @@ typedef struct CommitBlockJob {
     BlockDriverState *base_bs;
     BlockdevOnError on_error;
     bool base_read_only;
+    bool chain_frozen;
     char *backing_file_str;
 } CommitBlockJob;
 
@@ -68,6 +69,9 @@ static int commit_prepare(Job *job)
 {
     CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
 
+    bdrv_unfreeze_backing_chain(s->commit_top_bs, s->base_bs);
+    s->chain_frozen = false;
+
     /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before
      * the normal backing chain can be restored. */
     blk_unref(s->base);
@@ -84,6 +88,10 @@ static void commit_abort(Job *job)
     CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
     BlockDriverState *top_bs = blk_bs(s->top);
 
+    if (s->chain_frozen) {
+        bdrv_unfreeze_backing_chain(s->commit_top_bs, s->base_bs);
+    }
+
     /* Make sure commit_top_bs and top stay around until bdrv_replace_node() */
     bdrv_ref(top_bs);
     bdrv_ref(s->commit_top_bs);
@@ -330,6 +338,11 @@ void commit_start(const char *job_id, BlockDriverState *bs,
         }
     }
 
+    if (bdrv_freeze_backing_chain(commit_top_bs, base, errp) < 0) {
+        goto fail;
+    }
+    s->chain_frozen = true;
+
     ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp);
     if (ret < 0) {
         goto fail;
@@ -362,6 +375,9 @@ void commit_start(const char *job_id, BlockDriverState *bs,
     return;
 
 fail:
+    if (s->chain_frozen) {
+        bdrv_unfreeze_backing_chain(commit_top_bs, base);
+    }
     if (s->base) {
         blk_unref(s->base);
     }
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index c6d4acebfa..59e6ebb861 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -28,29 +28,12 @@
 #include "block/block_int.h"
 #include "block/blockjob.h"
 
-/**
- * A BdrvDirtyBitmap can be in four possible user-visible states:
- * (1) Active:   successor is NULL, and disabled is false: full r/w mode
- * (2) Disabled: successor is NULL, and disabled is true: qualified r/w mode,
- *               guest writes are dropped, but monitor writes are possible,
- *               through commands like merge and clear.
- * (3) Frozen:   successor is not NULL.
- *               A frozen bitmap cannot be renamed, deleted, cleared, set,
- *               enabled, merged to, etc. A frozen bitmap can only abdicate()
- *               or reclaim().
- *               In this state, the anonymous successor bitmap may be either
- *               Active and recording writes from the guest (e.g. backup jobs),
- *               but it can be Disabled and not recording writes.
- * (4) Locked:   Whether Active or Disabled, the user cannot modify this bitmap
- *               in any way from the monitor.
- */
 struct BdrvDirtyBitmap {
     QemuMutex *mutex;
     HBitmap *bitmap;            /* Dirty bitmap implementation */
     HBitmap *meta;              /* Meta dirty bitmap */
-    bool qmp_locked;            /* Bitmap is locked, it can't be modified
-                                   through QMP */
-    BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
+    bool busy;                  /* Bitmap is busy, it can't be used via QMP */
+    BdrvDirtyBitmap *successor; /* Anonymous child, if any. */
     char *name;                 /* Optional non-empty unique ID */
     int64_t size;               /* Size of the bitmap, in bytes */
     bool disabled;              /* Bitmap is disabled. It ignores all writes to
@@ -63,6 +46,9 @@ struct BdrvDirtyBitmap {
                                    and this bitmap must remain unchanged while
                                    this flag is set. */
     bool persistent;            /* bitmap must be saved to owner disk image */
+    bool inconsistent;          /* bitmap is persistent, but inconsistent.
+                                   It cannot be used at all in any way, except
+                                   a QMP user can remove it. */
     bool migration;             /* Bitmap is selected for migration, it should
                                    not be stored on the next inactivation
                                    (persistent flag doesn't matter until next
@@ -183,41 +169,58 @@ const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap)
 }
 
 /* Called with BQL taken.  */
-bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
+bool bdrv_dirty_bitmap_has_successor(BdrvDirtyBitmap *bitmap)
 {
     return bitmap->successor;
 }
 
-/* Both conditions disallow user-modification via QMP. */
-bool bdrv_dirty_bitmap_user_locked(BdrvDirtyBitmap *bitmap) {
-    return bdrv_dirty_bitmap_frozen(bitmap) ||
-           bdrv_dirty_bitmap_qmp_locked(bitmap);
+static bool bdrv_dirty_bitmap_busy(const BdrvDirtyBitmap *bitmap)
+{
+    return bitmap->busy;
 }
 
-void bdrv_dirty_bitmap_set_qmp_locked(BdrvDirtyBitmap *bitmap, bool qmp_locked)
+void bdrv_dirty_bitmap_set_busy(BdrvDirtyBitmap *bitmap, bool busy)
 {
     qemu_mutex_lock(bitmap->mutex);
-    bitmap->qmp_locked = qmp_locked;
+    bitmap->busy = busy;
     qemu_mutex_unlock(bitmap->mutex);
 }
 
-bool bdrv_dirty_bitmap_qmp_locked(BdrvDirtyBitmap *bitmap)
-{
-    return bitmap->qmp_locked;
-}
-
 /* Called with BQL taken.  */
 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
 {
-    return !(bitmap->disabled || bitmap->successor);
+    return !bitmap->disabled;
 }
 
-/* Called with BQL taken.  */
+/**
+ * bdrv_dirty_bitmap_status: This API is now deprecated.
+ * Called with BQL taken.
+ *
+ * A BdrvDirtyBitmap can be in four possible user-visible states:
+ * (1) Active:   successor is NULL, and disabled is false: full r/w mode
+ * (2) Disabled: successor is NULL, and disabled is true: qualified r/w mode,
+ *               guest writes are dropped, but monitor writes are possible,
+ *               through commands like merge and clear.
+ * (3) Frozen:   successor is not NULL.
+ *               A frozen bitmap cannot be renamed, deleted, cleared, set,
+ *               enabled, merged to, etc. A frozen bitmap can only abdicate()
+ *               or reclaim().
+ *               In this state, the anonymous successor bitmap may be either
+ *               Active and recording writes from the guest (e.g. backup jobs),
+ *               or it can be Disabled and not recording writes.
+ * (4) Locked:   Whether Active or Disabled, the user cannot modify this bitmap
+ *               in any way from the monitor.
+ * (5) Inconsistent: This is a persistent bitmap whose "in use" bit is set, and
+ *                   is unusable by QEMU. It can be deleted to remove it from
+ *                   the qcow2.
+ */
 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
 {
-    if (bdrv_dirty_bitmap_frozen(bitmap)) {
+    if (bdrv_dirty_bitmap_inconsistent(bitmap)) {
+        return DIRTY_BITMAP_STATUS_INCONSISTENT;
+    } else if (bdrv_dirty_bitmap_has_successor(bitmap)) {
         return DIRTY_BITMAP_STATUS_FROZEN;
-    } else if (bdrv_dirty_bitmap_qmp_locked(bitmap)) {
+    } else if (bdrv_dirty_bitmap_busy(bitmap)) {
         return DIRTY_BITMAP_STATUS_LOCKED;
     } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
         return DIRTY_BITMAP_STATUS_DISABLED;
@@ -226,9 +229,44 @@ DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
     }
 }
 
+/* Called with BQL taken.  */
+static bool bdrv_dirty_bitmap_recording(BdrvDirtyBitmap *bitmap)
+{
+    return !bitmap->disabled || (bitmap->successor &&
+                                 !bitmap->successor->disabled);
+}
+
+int bdrv_dirty_bitmap_check(const BdrvDirtyBitmap *bitmap, uint32_t flags,
+                            Error **errp)
+{
+    if ((flags & BDRV_BITMAP_BUSY) && bdrv_dirty_bitmap_busy(bitmap)) {
+        error_setg(errp, "Bitmap '%s' is currently in use by another"
+                   " operation and cannot be used", bitmap->name);
+        return -1;
+    }
+
+    if ((flags & BDRV_BITMAP_RO) && bdrv_dirty_bitmap_readonly(bitmap)) {
+        error_setg(errp, "Bitmap '%s' is readonly and cannot be modified",
+                   bitmap->name);
+        return -1;
+    }
+
+    if ((flags & BDRV_BITMAP_INCONSISTENT) &&
+        bdrv_dirty_bitmap_inconsistent(bitmap)) {
+        error_setg(errp, "Bitmap '%s' is inconsistent and cannot be used",
+                   bitmap->name);
+        error_append_hint(errp, "Try block-dirty-bitmap-remove to delete"
+                          " this bitmap from disk");
+        return -1;
+    }
+
+    return 0;
+}
+
 /**
  * Create a successor bitmap destined to replace this bitmap after an operation.
- * Requires that the bitmap is not frozen and has no successor.
+ * Requires that the bitmap is not marked busy and has no successor.
+ * The successor will be enabled if the parent bitmap was.
  * Called with BQL taken.
  */
 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
@@ -237,12 +275,14 @@ int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
     uint64_t granularity;
     BdrvDirtyBitmap *child;
 
-    if (bdrv_dirty_bitmap_frozen(bitmap)) {
-        error_setg(errp, "Cannot create a successor for a bitmap that is "
-                   "currently frozen");
+    if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_BUSY, errp)) {
+        return -1;
+    }
+    if (bdrv_dirty_bitmap_has_successor(bitmap)) {
+        error_setg(errp, "Cannot create a successor for a bitmap that already "
+                   "has one");
         return -1;
     }
-    assert(!bitmap->successor);
 
     /* Create an anonymous successor */
     granularity = bdrv_dirty_bitmap_granularity(bitmap);
@@ -253,15 +293,16 @@ int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
 
     /* Successor will be on or off based on our current state. */
     child->disabled = bitmap->disabled;
+    bitmap->disabled = true;
 
-    /* Install the successor and freeze the parent */
+    /* Install the successor and mark the parent as busy */
     bitmap->successor = child;
+    bitmap->busy = true;
     return 0;
 }
 
 void bdrv_enable_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap)
 {
-    assert(!bdrv_dirty_bitmap_frozen(bitmap));
     bitmap->disabled = false;
 }
 
@@ -278,7 +319,8 @@ void bdrv_dirty_bitmap_enable_successor(BdrvDirtyBitmap *bitmap)
 static void bdrv_release_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap)
 {
     assert(!bitmap->active_iterators);
-    assert(!bdrv_dirty_bitmap_frozen(bitmap));
+    assert(!bdrv_dirty_bitmap_busy(bitmap));
+    assert(!bdrv_dirty_bitmap_has_successor(bitmap));
     assert(!bitmap->meta);
     QLIST_REMOVE(bitmap, list);
     hbitmap_free(bitmap->bitmap);
@@ -310,6 +352,7 @@ BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
     bitmap->successor = NULL;
     successor->persistent = bitmap->persistent;
     bitmap->persistent = false;
+    bitmap->busy = false;
     bdrv_release_dirty_bitmap(bs, bitmap);
 
     return successor;
@@ -318,7 +361,8 @@ BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
 /**
  * In cases of failure where we can no longer safely delete the parent,
  * we may wish to re-join the parent and child/successor.
- * The merged parent will be un-frozen, but not explicitly re-enabled.
+ * The merged parent will be marked as not busy.
+ * The marged parent will be enabled if and only if the successor was enabled.
  * Called within bdrv_dirty_bitmap_lock..unlock and with BQL taken.
  */
 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap_locked(BlockDriverState *bs,
@@ -336,6 +380,9 @@ BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap_locked(BlockDriverState *bs,
         error_setg(errp, "Merging of parent and successor bitmap failed");
         return NULL;
     }
+
+    parent->disabled = successor->disabled;
+    parent->busy = false;
     bdrv_release_dirty_bitmap_locked(successor);
     parent->successor = NULL;
 
@@ -366,7 +413,8 @@ void bdrv_dirty_bitmap_truncate(BlockDriverState *bs, int64_t bytes)
 
     bdrv_dirty_bitmaps_lock(bs);
     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
-        assert(!bdrv_dirty_bitmap_frozen(bitmap));
+        assert(!bdrv_dirty_bitmap_busy(bitmap));
+        assert(!bdrv_dirty_bitmap_has_successor(bitmap));
         assert(!bitmap->active_iterators);
         hbitmap_truncate(bitmap->bitmap, bytes);
         bitmap->size = bytes;
@@ -384,7 +432,7 @@ void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
 
 /**
  * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()).
- * There must not be any frozen bitmaps attached.
+ * There must not be any busy bitmaps attached.
  * This function does not remove persistent bitmaps from the storage.
  * Called with BQL taken.
  */
@@ -421,7 +469,6 @@ void bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs,
 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
 {
     bdrv_dirty_bitmap_lock(bitmap);
-    assert(!bdrv_dirty_bitmap_frozen(bitmap));
     bitmap->disabled = true;
     bdrv_dirty_bitmap_unlock(bitmap);
 }
@@ -448,7 +495,11 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
         info->has_name = !!bm->name;
         info->name = g_strdup(bm->name);
         info->status = bdrv_dirty_bitmap_status(bm);
+        info->recording = bdrv_dirty_bitmap_recording(bm);
+        info->busy = bdrv_dirty_bitmap_busy(bm);
         info->persistent = bm->persistent;
+        info->has_inconsistent = bm->inconsistent;
+        info->inconsistent = bm->inconsistent;
         entry->value = info;
         *plist = entry;
         plist = &entry->next;
@@ -531,7 +582,6 @@ int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter)
 void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
                                   int64_t offset, int64_t bytes)
 {
-    assert(bdrv_dirty_bitmap_enabled(bitmap));
     assert(!bdrv_dirty_bitmap_readonly(bitmap));
     hbitmap_set(bitmap->bitmap, offset, bytes);
 }
@@ -548,7 +598,6 @@ void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
 void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
                                     int64_t offset, int64_t bytes)
 {
-    assert(bdrv_dirty_bitmap_enabled(bitmap));
     assert(!bdrv_dirty_bitmap_readonly(bitmap));
     hbitmap_reset(bitmap->bitmap, offset, bytes);
 }
@@ -691,7 +740,7 @@ bool bdrv_has_readonly_bitmaps(BlockDriverState *bs)
 }
 
 /* Called with BQL taken. */
-void bdrv_dirty_bitmap_set_persistance(BdrvDirtyBitmap *bitmap, bool persistent)
+void bdrv_dirty_bitmap_set_persistence(BdrvDirtyBitmap *bitmap, bool persistent)
 {
     qemu_mutex_lock(bitmap->mutex);
     bitmap->persistent = persistent;
@@ -699,6 +748,16 @@ void bdrv_dirty_bitmap_set_persistance(BdrvDirtyBitmap *bitmap, bool persistent)
 }
 
 /* Called with BQL taken. */
+void bdrv_dirty_bitmap_set_inconsistent(BdrvDirtyBitmap *bitmap)
+{
+    qemu_mutex_lock(bitmap->mutex);
+    assert(bitmap->persistent == true);
+    bitmap->inconsistent = true;
+    bitmap->disabled = true;
+    qemu_mutex_unlock(bitmap->mutex);
+}
+
+/* Called with BQL taken. */
 void bdrv_dirty_bitmap_set_migration(BdrvDirtyBitmap *bitmap, bool migration)
 {
     qemu_mutex_lock(bitmap->mutex);
@@ -706,11 +765,16 @@ void bdrv_dirty_bitmap_set_migration(BdrvDirtyBitmap *bitmap, bool migration)
     qemu_mutex_unlock(bitmap->mutex);
 }
 
-bool bdrv_dirty_bitmap_get_persistance(BdrvDirtyBitmap *bitmap)
+bool bdrv_dirty_bitmap_get_persistence(BdrvDirtyBitmap *bitmap)
 {
     return bitmap->persistent && !bitmap->migration;
 }
 
+bool bdrv_dirty_bitmap_inconsistent(const BdrvDirtyBitmap *bitmap)
+{
+    return bitmap->inconsistent;
+}
+
 bool bdrv_has_changed_persistent_bitmaps(BlockDriverState *bs)
 {
     BdrvDirtyBitmap *bm;
@@ -757,15 +821,11 @@ void bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src,
 
     qemu_mutex_lock(dest->mutex);
 
-    if (bdrv_dirty_bitmap_user_locked(dest)) {
-        error_setg(errp, "Bitmap '%s' is currently in use by another"
-        " operation and cannot be modified", dest->name);
+    if (bdrv_dirty_bitmap_check(dest, BDRV_BITMAP_DEFAULT, errp)) {
         goto out;
     }
 
-    if (bdrv_dirty_bitmap_readonly(dest)) {
-        error_setg(errp, "Bitmap '%s' is readonly and cannot be modified",
-                   dest->name);
+    if (bdrv_dirty_bitmap_check(src, BDRV_BITMAP_ALLOW_RO, errp)) {
         goto out;
     }
 
diff --git a/block/file-posix.c b/block/file-posix.c
index ba6ab62a38..e9fa6aac48 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -144,6 +144,9 @@ typedef struct BDRVRawState {
     uint64_t locked_perm;
     uint64_t locked_shared_perm;
 
+    int perm_change_fd;
+    BDRVReopenState *reopen_state;
+
 #ifdef CONFIG_XFS
     bool is_xfs:1;
 #endif
@@ -373,13 +376,21 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
     }
 }
 
-static void raw_parse_flags(int bdrv_flags, int *open_flags)
+static void raw_parse_flags(int bdrv_flags, int *open_flags, bool has_writers)
 {
+    bool read_write = false;
     assert(open_flags != NULL);
 
     *open_flags |= O_BINARY;
     *open_flags &= ~O_ACCMODE;
-    if (bdrv_flags & BDRV_O_RDWR) {
+
+    if (bdrv_flags & BDRV_O_AUTO_RDONLY) {
+        read_write = has_writers;
+    } else if (bdrv_flags & BDRV_O_RDWR) {
+        read_write = true;
+    }
+
+    if (read_write) {
         *open_flags |= O_RDWR;
     } else {
         *open_flags |= O_RDONLY;
@@ -431,6 +442,8 @@ static QemuOptsList raw_runtime_opts = {
     },
 };
 
+static const char *const mutable_opts[] = { "x-check-cache-dropped", NULL };
+
 static int raw_open_common(BlockDriverState *bs, QDict *options,
                            int bdrv_flags, int open_flags,
                            bool device, Error **errp)
@@ -515,24 +528,12 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
                                                false);
 
     s->open_flags = open_flags;
-    raw_parse_flags(bdrv_flags, &s->open_flags);
+    raw_parse_flags(bdrv_flags, &s->open_flags, false);
 
     s->fd = -1;
     fd = qemu_open(filename, s->open_flags, 0644);
     ret = fd < 0 ? -errno : 0;
 
-    if (ret == -EACCES || ret == -EROFS) {
-        /* Try to degrade to read-only, but if it doesn't work, still use the
-         * normal error message. */
-        if (bdrv_apply_auto_read_only(bs, NULL, NULL) == 0) {
-            bdrv_flags &= ~BDRV_O_RDWR;
-            raw_parse_flags(bdrv_flags, &s->open_flags);
-            assert(!(s->open_flags & O_CREAT));
-            fd = qemu_open(filename, s->open_flags);
-            ret = fd < 0 ? -errno : 0;
-        }
-    }
-
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Could not open '%s'", filename);
         if (ret == -EROFS) {
@@ -842,13 +843,77 @@ static int raw_handle_perm_lock(BlockDriverState *bs,
     return ret;
 }
 
+static int raw_reconfigure_getfd(BlockDriverState *bs, int flags,
+                                 int *open_flags, uint64_t perm, bool force_dup,
+                                 Error **errp)
+{
+    BDRVRawState *s = bs->opaque;
+    int fd = -1;
+    int ret;
+    bool has_writers = perm &
+        (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED | BLK_PERM_RESIZE);
+    int fcntl_flags = O_APPEND | O_NONBLOCK;
+#ifdef O_NOATIME
+    fcntl_flags |= O_NOATIME;
+#endif
+
+    *open_flags = 0;
+    if (s->type == FTYPE_CD) {
+        *open_flags |= O_NONBLOCK;
+    }
+
+    raw_parse_flags(flags, open_flags, has_writers);
+
+#ifdef O_ASYNC
+    /* Not all operating systems have O_ASYNC, and those that don't
+     * will not let us track the state into rs->open_flags (typically
+     * you achieve the same effect with an ioctl, for example I_SETSIG
+     * on Solaris). But we do not use O_ASYNC, so that's fine.
+     */
+    assert((s->open_flags & O_ASYNC) == 0);
+#endif
+
+    if (!force_dup && *open_flags == s->open_flags) {
+        /* We're lucky, the existing fd is fine */
+        return s->fd;
+    }
+
+    if ((*open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
+        /* dup the original fd */
+        fd = qemu_dup(s->fd);
+        if (fd >= 0) {
+            ret = fcntl_setfl(fd, *open_flags);
+            if (ret) {
+                qemu_close(fd);
+                fd = -1;
+            }
+        }
+    }
+
+    /* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */
+    if (fd == -1) {
+        const char *normalized_filename = bs->filename;
+        ret = raw_normalize_devicepath(&normalized_filename, errp);
+        if (ret >= 0) {
+            assert(!(*open_flags & O_CREAT));
+            fd = qemu_open(normalized_filename, *open_flags);
+            if (fd == -1) {
+                error_setg_errno(errp, errno, "Could not reopen file");
+                return -1;
+            }
+        }
+    }
+
+    return fd;
+}
+
 static int raw_reopen_prepare(BDRVReopenState *state,
                               BlockReopenQueue *queue, Error **errp)
 {
     BDRVRawState *s;
     BDRVRawReopenState *rs;
     QemuOpts *opts;
-    int ret = 0;
+    int ret;
     Error *local_err = NULL;
 
     assert(state != NULL);
@@ -858,7 +923,6 @@ static int raw_reopen_prepare(BDRVReopenState *state,
 
     state->opaque = g_new0(BDRVRawReopenState, 1);
     rs = state->opaque;
-    rs->fd = -1;
 
     /* Handle options changes */
     opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
@@ -877,50 +941,12 @@ static int raw_reopen_prepare(BDRVReopenState *state,
      * bdrv_reopen_prepare() will detect changes and complain. */
     qemu_opts_to_qdict(opts, state->options);
 
-    if (s->type == FTYPE_CD) {
-        rs->open_flags |= O_NONBLOCK;
-    }
-
-    raw_parse_flags(state->flags, &rs->open_flags);
-
-    int fcntl_flags = O_APPEND | O_NONBLOCK;
-#ifdef O_NOATIME
-    fcntl_flags |= O_NOATIME;
-#endif
-
-#ifdef O_ASYNC
-    /* Not all operating systems have O_ASYNC, and those that don't
-     * will not let us track the state into rs->open_flags (typically
-     * you achieve the same effect with an ioctl, for example I_SETSIG
-     * on Solaris). But we do not use O_ASYNC, so that's fine.
-     */
-    assert((s->open_flags & O_ASYNC) == 0);
-#endif
-
-    if ((rs->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
-        /* dup the original fd */
-        rs->fd = qemu_dup(s->fd);
-        if (rs->fd >= 0) {
-            ret = fcntl_setfl(rs->fd, rs->open_flags);
-            if (ret) {
-                qemu_close(rs->fd);
-                rs->fd = -1;
-            }
-        }
-    }
-
-    /* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */
-    if (rs->fd == -1) {
-        const char *normalized_filename = state->bs->filename;
-        ret = raw_normalize_devicepath(&normalized_filename, errp);
-        if (ret >= 0) {
-            assert(!(rs->open_flags & O_CREAT));
-            rs->fd = qemu_open(normalized_filename, rs->open_flags);
-            if (rs->fd == -1) {
-                error_setg_errno(errp, errno, "Could not reopen file");
-                ret = -1;
-            }
-        }
+    rs->fd = raw_reconfigure_getfd(state->bs, state->flags, &rs->open_flags,
+                                   state->perm, true, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -1;
+        goto out;
     }
 
     /* Fail already reopen_prepare() if we can't get a working O_DIRECT
@@ -928,13 +954,19 @@ static int raw_reopen_prepare(BDRVReopenState *state,
     if (rs->fd != -1) {
         raw_probe_alignment(state->bs, rs->fd, &local_err);
         if (local_err) {
-            qemu_close(rs->fd);
-            rs->fd = -1;
             error_propagate(errp, local_err);
             ret = -EINVAL;
+            goto out_fd;
         }
     }
 
+    s->reopen_state = state;
+    ret = 0;
+out_fd:
+    if (ret < 0) {
+        qemu_close(rs->fd);
+        rs->fd = -1;
+    }
 out:
     qemu_opts_del(opts);
     return ret;
@@ -944,29 +976,25 @@ static void raw_reopen_commit(BDRVReopenState *state)
 {
     BDRVRawReopenState *rs = state->opaque;
     BDRVRawState *s = state->bs->opaque;
-    Error *local_err = NULL;
 
     s->check_cache_dropped = rs->check_cache_dropped;
     s->open_flags = rs->open_flags;
 
-    /* Copy locks to the new fd before closing the old one. */
-    raw_apply_lock_bytes(NULL, rs->fd, s->locked_perm,
-                         s->locked_shared_perm, false, &local_err);
-    if (local_err) {
-        /* shouldn't fail in a sane host, but report it just in case. */
-        error_report_err(local_err);
-    }
     qemu_close(s->fd);
     s->fd = rs->fd;
 
     g_free(state->opaque);
     state->opaque = NULL;
+
+    assert(s->reopen_state == state);
+    s->reopen_state = NULL;
 }
 
 
 static void raw_reopen_abort(BDRVReopenState *state)
 {
     BDRVRawReopenState *rs = state->opaque;
+    BDRVRawState *s = state->bs->opaque;
 
      /* nothing to do if NULL, we didn't get far enough */
     if (rs == NULL) {
@@ -979,6 +1007,9 @@ static void raw_reopen_abort(BDRVReopenState *state)
     }
     g_free(state->opaque);
     state->opaque = NULL;
+
+    assert(s->reopen_state == state);
+    s->reopen_state = NULL;
 }
 
 static int hdev_get_max_transfer_length(BlockDriverState *bs, int fd)
@@ -2664,12 +2695,78 @@ static QemuOptsList raw_create_opts = {
 static int raw_check_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared,
                           Error **errp)
 {
-    return raw_handle_perm_lock(bs, RAW_PL_PREPARE, perm, shared, errp);
+    BDRVRawState *s = bs->opaque;
+    BDRVRawReopenState *rs = NULL;
+    int open_flags;
+    int ret;
+
+    if (s->perm_change_fd) {
+        /*
+         * In the context of reopen, this function may be called several times
+         * (directly and recursively while change permissions of the parent).
+         * This is even true for children that don't inherit from the original
+         * reopen node, so s->reopen_state is not set.
+         *
+         * Ignore all but the first call.
+         */
+        return 0;
+    }
+
+    if (s->reopen_state) {
+        /* We already have a new file descriptor to set permissions for */
+        assert(s->reopen_state->perm == perm);
+        assert(s->reopen_state->shared_perm == shared);
+        rs = s->reopen_state->opaque;
+        s->perm_change_fd = rs->fd;
+    } else {
+        /* We may need a new fd if auto-read-only switches the mode */
+        ret = raw_reconfigure_getfd(bs, bs->open_flags, &open_flags, perm,
+                                    false, errp);
+        if (ret < 0) {
+            return ret;
+        } else if (ret != s->fd) {
+            s->perm_change_fd = ret;
+        }
+    }
+
+    /* Prepare permissions on old fd to avoid conflicts between old and new,
+     * but keep everything locked that new will need. */
+    ret = raw_handle_perm_lock(bs, RAW_PL_PREPARE, perm, shared, errp);
+    if (ret < 0) {
+        goto fail;
+    }
+
+    /* Copy locks to the new fd */
+    if (s->perm_change_fd) {
+        ret = raw_apply_lock_bytes(NULL, s->perm_change_fd, perm, ~shared,
+                                   false, errp);
+        if (ret < 0) {
+            raw_handle_perm_lock(bs, RAW_PL_ABORT, 0, 0, NULL);
+            goto fail;
+        }
+    }
+    return 0;
+
+fail:
+    if (s->perm_change_fd && !s->reopen_state) {
+        qemu_close(s->perm_change_fd);
+    }
+    s->perm_change_fd = 0;
+    return ret;
 }
 
 static void raw_set_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared)
 {
     BDRVRawState *s = bs->opaque;
+
+    /* For reopen, we have already switched to the new fd (.bdrv_set_perm is
+     * called after .bdrv_reopen_commit) */
+    if (s->perm_change_fd && s->fd != s->perm_change_fd) {
+        qemu_close(s->fd);
+        s->fd = s->perm_change_fd;
+    }
+    s->perm_change_fd = 0;
+
     raw_handle_perm_lock(bs, RAW_PL_COMMIT, perm, shared, NULL);
     s->perm = perm;
     s->shared_perm = shared;
@@ -2677,6 +2774,15 @@ static void raw_set_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared)
 
 static void raw_abort_perm_update(BlockDriverState *bs)
 {
+    BDRVRawState *s = bs->opaque;
+
+    /* For reopen, .bdrv_reopen_abort is called afterwards and will close
+     * the file descriptor. */
+    if (s->perm_change_fd && !s->reopen_state) {
+        qemu_close(s->perm_change_fd);
+    }
+    s->perm_change_fd = 0;
+
     raw_handle_perm_lock(bs, RAW_PL_ABORT, 0, 0, NULL);
 }
 
@@ -2766,6 +2872,7 @@ BlockDriver bdrv_file = {
     .bdrv_set_perm   = raw_set_perm,
     .bdrv_abort_perm_update = raw_abort_perm_update,
     .create_opts = &raw_create_opts,
+    .mutable_opts = mutable_opts,
 };
 
 /***********************************************/
@@ -3217,6 +3324,7 @@ static BlockDriver bdrv_host_device = {
     .bdrv_reopen_abort   = raw_reopen_abort,
     .bdrv_co_create_opts = hdev_co_create_opts,
     .create_opts         = &raw_create_opts,
+    .mutable_opts        = mutable_opts,
     .bdrv_co_invalidate_cache = raw_co_invalidate_cache,
     .bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes,
 
@@ -3343,6 +3451,7 @@ static BlockDriver bdrv_host_cdrom = {
     .bdrv_reopen_abort   = raw_reopen_abort,
     .bdrv_co_create_opts = hdev_co_create_opts,
     .create_opts         = &raw_create_opts,
+    .mutable_opts        = mutable_opts,
     .bdrv_co_invalidate_cache = raw_co_invalidate_cache,
 
 
@@ -3476,6 +3585,7 @@ static BlockDriver bdrv_host_cdrom = {
     .bdrv_reopen_abort   = raw_reopen_abort,
     .bdrv_co_create_opts = hdev_co_create_opts,
     .create_opts        = &raw_create_opts,
+    .mutable_opts       = mutable_opts,
 
     .bdrv_co_preadv         = raw_co_preadv,
     .bdrv_co_pwritev        = raw_co_pwritev,
diff --git a/block/gluster.c b/block/gluster.c
index af64330211..51f184cbd8 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -20,6 +20,10 @@
 #include "qemu/option.h"
 #include "qemu/cutils.h"
 
+#ifdef CONFIG_GLUSTERFS_FTRUNCATE_HAS_STAT
+# define glfs_ftruncate(fd, offset) glfs_ftruncate(fd, offset, NULL, NULL)
+#endif
+
 #define GLUSTER_OPT_FILENAME        "filename"
 #define GLUSTER_OPT_VOLUME          "volume"
 #define GLUSTER_OPT_PATH            "path"
@@ -725,7 +729,11 @@ static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf,
 /*
  * AIO callback routine called from GlusterFS thread.
  */
-static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
+static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret,
+#ifdef CONFIG_GLUSTERFS_IOCB_HAS_STAT
+                                 struct glfs_stat *pre, struct glfs_stat *post,
+#endif
+                                 void *arg)
 {
     GlusterAIOCB *acb = (GlusterAIOCB *)arg;
 
diff --git a/block/mirror.c b/block/mirror.c
index 726d3c27fb..010fdafd79 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -630,6 +630,10 @@ static int mirror_exit_common(Job *job)
     }
     s->prepared = true;
 
+    if (bdrv_chain_contains(src, target_bs)) {
+        bdrv_unfreeze_backing_chain(mirror_top_bs, target_bs);
+    }
+
     bdrv_release_dirty_bitmap(src, s->dirty_bitmap);
 
     /* Make sure that the source BDS doesn't go away during bdrv_replace_node,
@@ -1639,6 +1643,10 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
                 goto fail;
             }
         }
+
+        if (bdrv_freeze_backing_chain(mirror_top_bs, target, errp) < 0) {
+            goto fail;
+        }
     }
 
     QTAILQ_INIT(&s->ops_in_flight);
diff --git a/block/qapi.c b/block/qapi.c
index 6002a768f8..21edab34fc 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -493,14 +493,14 @@ static void bdrv_query_blk_stats(BlockDeviceStats *ds, BlockBackend *blk)
     }
 
     bdrv_latency_histogram_stats(&stats->latency_histogram[BLOCK_ACCT_READ],
-                                 &ds->has_x_rd_latency_histogram,
-                                 &ds->x_rd_latency_histogram);
+                                 &ds->has_rd_latency_histogram,
+                                 &ds->rd_latency_histogram);
     bdrv_latency_histogram_stats(&stats->latency_histogram[BLOCK_ACCT_WRITE],
-                                 &ds->has_x_wr_latency_histogram,
-                                 &ds->x_wr_latency_histogram);
+                                 &ds->has_wr_latency_histogram,
+                                 &ds->wr_latency_histogram);
     bdrv_latency_histogram_stats(&stats->latency_histogram[BLOCK_ACCT_FLUSH],
-                                 &ds->has_x_flush_latency_histogram,
-                                 &ds->x_flush_latency_histogram);
+                                 &ds->has_flush_latency_histogram,
+                                 &ds->flush_latency_histogram);
 }
 
 static BlockStats *bdrv_query_bds_stats(BlockDriverState *bs,
diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index 9d968bdcda..e53a1609d7 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -343,11 +343,17 @@ static BdrvDirtyBitmap *load_bitmap(BlockDriverState *bs,
     uint32_t granularity;
     BdrvDirtyBitmap *bitmap = NULL;
 
-    if (bm->flags & BME_FLAG_IN_USE) {
-        error_setg(errp, "Bitmap '%s' is in use", bm->name);
+    granularity = 1U << bm->granularity_bits;
+    bitmap = bdrv_create_dirty_bitmap(bs, granularity, bm->name, errp);
+    if (bitmap == NULL) {
         goto fail;
     }
 
+    if (bm->flags & BME_FLAG_IN_USE) {
+        /* Data is unusable, skip loading it */
+        return bitmap;
+    }
+
     ret = bitmap_table_load(bs, &bm->table, &bitmap_table);
     if (ret < 0) {
         error_setg_errno(errp, -ret,
@@ -356,12 +362,6 @@ static BdrvDirtyBitmap *load_bitmap(BlockDriverState *bs,
         goto fail;
     }
 
-    granularity = 1U << bm->granularity_bits;
-    bitmap = bdrv_create_dirty_bitmap(bs, granularity, bm->name, errp);
-    if (bitmap == NULL) {
-        goto fail;
-    }
-
     ret = load_bitmap_data(bs, bitmap_table, bm->table.size, bitmap);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Could not read bitmap '%s' from image",
@@ -462,10 +462,25 @@ static int check_dir_entry(BlockDriverState *bs, Qcow2BitmapDirEntry *entry)
         return len;
     }
 
-    fail = (phys_bitmap_bytes > BME_MAX_PHYS_SIZE) ||
-           (len > ((phys_bitmap_bytes * 8) << entry->granularity_bits));
+    if (phys_bitmap_bytes > BME_MAX_PHYS_SIZE) {
+        return -EINVAL;
+    }
+
+    if (!(entry->flags & BME_FLAG_IN_USE) &&
+        (len > ((phys_bitmap_bytes * 8) << entry->granularity_bits)))
+    {
+        /*
+         * We've loaded a valid bitmap (IN_USE not set) or we are going to
+         * store a valid bitmap, but the allocated bitmap table size is not
+         * enough to store this bitmap.
+         *
+         * Note, that it's OK to have an invalid bitmap with invalid size due
+         * to a bitmap that was not correctly saved after image resize.
+         */
+        return -EINVAL;
+    }
 
-    return fail ? -EINVAL : 0;
+    return 0;
 }
 
 static inline void bitmap_directory_to_be(uint8_t *dir, size_t size)
@@ -950,6 +965,7 @@ bool qcow2_load_dirty_bitmaps(BlockDriverState *bs, Error **errp)
     Qcow2Bitmap *bm;
     GSList *created_dirty_bitmaps = NULL;
     bool header_updated = false;
+    bool needs_update = false;
 
     if (s->nb_bitmaps == 0) {
         /* No bitmaps - nothing to do */
@@ -963,35 +979,39 @@ bool qcow2_load_dirty_bitmaps(BlockDriverState *bs, Error **errp)
     }
 
     QSIMPLEQ_FOREACH(bm, bm_list, entry) {
-        if (!(bm->flags & BME_FLAG_IN_USE)) {
-            BdrvDirtyBitmap *bitmap = load_bitmap(bs, bm, errp);
-            if (bitmap == NULL) {
-                goto fail;
-            }
+        BdrvDirtyBitmap *bitmap = load_bitmap(bs, bm, errp);
+        if (bitmap == NULL) {
+            goto fail;
+        }
 
-            if (!(bm->flags & BME_FLAG_AUTO)) {
-                bdrv_disable_dirty_bitmap(bitmap);
-            }
-            bdrv_dirty_bitmap_set_persistance(bitmap, true);
+        bdrv_dirty_bitmap_set_persistence(bitmap, true);
+        if (bm->flags & BME_FLAG_IN_USE) {
+            bdrv_dirty_bitmap_set_inconsistent(bitmap);
+        } else {
+            /* NB: updated flags only get written if can_write(bs) is true. */
             bm->flags |= BME_FLAG_IN_USE;
-            created_dirty_bitmaps =
-                    g_slist_append(created_dirty_bitmaps, bitmap);
+            needs_update = true;
+        }
+        if (!(bm->flags & BME_FLAG_AUTO)) {
+            bdrv_disable_dirty_bitmap(bitmap);
         }
+        created_dirty_bitmaps =
+            g_slist_append(created_dirty_bitmaps, bitmap);
     }
 
-    if (created_dirty_bitmaps != NULL) {
-        if (can_write(bs)) {
-            /* in_use flags must be updated */
-            int ret = update_ext_header_and_dir_in_place(bs, bm_list);
-            if (ret < 0) {
-                error_setg_errno(errp, -ret, "Can't update bitmap directory");
-                goto fail;
-            }
-            header_updated = true;
-        } else {
-            g_slist_foreach(created_dirty_bitmaps, set_readonly_helper,
-                            (gpointer)true);
+    if (needs_update && can_write(bs)) {
+        /* in_use flags must be updated */
+        int ret = update_ext_header_and_dir_in_place(bs, bm_list);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret, "Can't update bitmap directory");
+            goto fail;
         }
+        header_updated = true;
+    }
+
+    if (!can_write(bs)) {
+        g_slist_foreach(created_dirty_bitmaps, set_readonly_helper,
+                        (gpointer)true);
     }
 
     g_slist_free(created_dirty_bitmaps);
@@ -1113,23 +1133,21 @@ int qcow2_reopen_bitmaps_rw_hint(BlockDriverState *bs, bool *header_updated,
     }
 
     QSIMPLEQ_FOREACH(bm, bm_list, entry) {
-        if (!(bm->flags & BME_FLAG_IN_USE)) {
-            BdrvDirtyBitmap *bitmap = bdrv_find_dirty_bitmap(bs, bm->name);
-            if (bitmap == NULL) {
-                continue;
-            }
-
-            if (!bdrv_dirty_bitmap_readonly(bitmap)) {
-                error_setg(errp, "Bitmap %s is not readonly but not marked"
-                                 "'IN_USE' in the image. Something went wrong,"
-                                 "all the bitmaps may be corrupted", bm->name);
-                ret = -EINVAL;
-                goto out;
-            }
+        BdrvDirtyBitmap *bitmap = bdrv_find_dirty_bitmap(bs, bm->name);
+        if (bitmap == NULL) {
+            continue;
+        }
 
-            bm->flags |= BME_FLAG_IN_USE;
-            ro_dirty_bitmaps = g_slist_append(ro_dirty_bitmaps, bitmap);
+        if (!bdrv_dirty_bitmap_readonly(bitmap)) {
+            error_setg(errp, "Bitmap %s was loaded prior to rw-reopen, but was "
+                       "not marked as readonly. This is a bug, something went "
+                       "wrong. All of the bitmaps may be corrupted", bm->name);
+            ret = -EINVAL;
+            goto out;
         }
+
+        bm->flags |= BME_FLAG_IN_USE;
+        ro_dirty_bitmaps = g_slist_append(ro_dirty_bitmaps, bitmap);
     }
 
     if (ro_dirty_bitmaps != NULL) {
@@ -1157,6 +1175,52 @@ int qcow2_reopen_bitmaps_rw(BlockDriverState *bs, Error **errp)
     return qcow2_reopen_bitmaps_rw_hint(bs, NULL, errp);
 }
 
+/* Checks to see if it's safe to resize bitmaps */
+int qcow2_truncate_bitmaps_check(BlockDriverState *bs, Error **errp)
+{
+    BDRVQcow2State *s = bs->opaque;
+    Qcow2BitmapList *bm_list;
+    Qcow2Bitmap *bm;
+    int ret = 0;
+
+    if (s->nb_bitmaps == 0) {
+        return 0;
+    }
+
+    bm_list = bitmap_list_load(bs, s->bitmap_directory_offset,
+                               s->bitmap_directory_size, errp);
+    if (bm_list == NULL) {
+        return -EINVAL;
+    }
+
+    QSIMPLEQ_FOREACH(bm, bm_list, entry) {
+        BdrvDirtyBitmap *bitmap = bdrv_find_dirty_bitmap(bs, bm->name);
+        if (bitmap == NULL) {
+            /*
+             * We rely on all bitmaps being in-memory to be able to resize them,
+             * Otherwise, we'd need to resize them on disk explicitly
+             */
+            error_setg(errp, "Cannot resize qcow2 with persistent bitmaps that "
+                       "were not loaded into memory");
+            ret = -ENOTSUP;
+            goto out;
+        }
+
+        /*
+         * The checks against readonly and busy are redundant, but certainly
+         * do no harm. checks against inconsistent are crucial:
+         */
+        if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_DEFAULT, errp)) {
+            ret = -ENOTSUP;
+            goto out;
+        }
+    }
+
+out:
+    bitmap_list_free(bm_list);
+    return ret;
+}
+
 /* store_bitmap_data()
  * Store bitmap to image, filling bitmap table accordingly.
  */
@@ -1424,9 +1488,9 @@ void qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs, Error **errp)
         uint32_t granularity = bdrv_dirty_bitmap_granularity(bitmap);
         Qcow2Bitmap *bm;
 
-        if (!bdrv_dirty_bitmap_get_persistance(bitmap) ||
-            bdrv_dirty_bitmap_readonly(bitmap))
-        {
+        if (!bdrv_dirty_bitmap_get_persistence(bitmap) ||
+            bdrv_dirty_bitmap_readonly(bitmap) ||
+            bdrv_dirty_bitmap_inconsistent(bitmap)) {
             continue;
         }
 
@@ -1542,7 +1606,7 @@ int qcow2_reopen_bitmaps_ro(BlockDriverState *bs, Error **errp)
     for (bitmap = bdrv_dirty_bitmap_next(bs, NULL); bitmap != NULL;
          bitmap = bdrv_dirty_bitmap_next(bs, bitmap))
     {
-        if (bdrv_dirty_bitmap_get_persistance(bitmap)) {
+        if (bdrv_dirty_bitmap_get_persistence(bitmap)) {
             bdrv_dirty_bitmap_set_readonly(bitmap, true);
         }
     }
diff --git a/block/qcow2.c b/block/qcow2.c
index c4dd876fb4..0dd77c6367 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -627,6 +627,30 @@ int qcow2_validate_table(BlockDriverState *bs, uint64_t offset,
     return 0;
 }
 
+static const char *const mutable_opts[] = {
+    QCOW2_OPT_LAZY_REFCOUNTS,
+    QCOW2_OPT_DISCARD_REQUEST,
+    QCOW2_OPT_DISCARD_SNAPSHOT,
+    QCOW2_OPT_DISCARD_OTHER,
+    QCOW2_OPT_OVERLAP,
+    QCOW2_OPT_OVERLAP_TEMPLATE,
+    QCOW2_OPT_OVERLAP_MAIN_HEADER,
+    QCOW2_OPT_OVERLAP_ACTIVE_L1,
+    QCOW2_OPT_OVERLAP_ACTIVE_L2,
+    QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
+    QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
+    QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
+    QCOW2_OPT_OVERLAP_INACTIVE_L1,
+    QCOW2_OPT_OVERLAP_INACTIVE_L2,
+    QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY,
+    QCOW2_OPT_CACHE_SIZE,
+    QCOW2_OPT_L2_CACHE_SIZE,
+    QCOW2_OPT_L2_CACHE_ENTRY_SIZE,
+    QCOW2_OPT_REFCOUNT_CACHE_SIZE,
+    QCOW2_OPT_CACHE_CLEAN_INTERVAL,
+    NULL
+};
+
 static QemuOptsList qcow2_runtime_opts = {
     .name = "qcow2",
     .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head),
@@ -3646,9 +3670,7 @@ static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
     }
 
     /* cannot proceed if image has bitmaps */
-    if (s->nb_bitmaps) {
-        /* TODO: resize bitmaps in the image */
-        error_setg(errp, "Can't resize an image which has bitmaps");
+    if (qcow2_truncate_bitmaps_check(bs, errp)) {
         ret = -ENOTSUP;
         goto fail;
     }
@@ -5275,6 +5297,7 @@ BlockDriver bdrv_qcow2 = {
 
     .create_opts         = &qcow2_create_opts,
     .strong_runtime_opts = qcow2_strong_runtime_opts,
+    .mutable_opts        = mutable_opts,
     .bdrv_co_check       = qcow2_co_check,
     .bdrv_amend_options  = qcow2_amend_options,
 
diff --git a/block/qcow2.h b/block/qcow2.h
index de2a3bdfc5..fdee297f33 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -723,6 +723,7 @@ Qcow2BitmapInfoList *qcow2_get_bitmap_info_list(BlockDriverState *bs,
 int qcow2_reopen_bitmaps_rw_hint(BlockDriverState *bs, bool *header_updated,
                                  Error **errp);
 int qcow2_reopen_bitmaps_rw(BlockDriverState *bs, Error **errp);
+int qcow2_truncate_bitmaps_check(BlockDriverState *bs, Error **errp);
 void qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs, Error **errp);
 int qcow2_reopen_bitmaps_ro(BlockDriverState *bs, Error **errp);
 bool qcow2_can_store_new_dirty_bitmap(BlockDriverState *bs,
diff --git a/block/raw-format.c b/block/raw-format.c
index e3e5ba2c8a..cec29986cc 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -37,6 +37,8 @@ typedef struct BDRVRawState {
     bool has_size;
 } BDRVRawState;
 
+static const char *const mutable_opts[] = { "offset", "size", NULL };
+
 static QemuOptsList raw_runtime_opts = {
     .name = "raw",
     .head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head),
@@ -570,6 +572,7 @@ BlockDriver bdrv_raw = {
     .create_opts          = &raw_create_opts,
     .bdrv_has_zero_init   = &raw_has_zero_init,
     .strong_runtime_opts  = raw_strong_runtime_opts,
+    .mutable_opts         = mutable_opts,
 };
 
 static void bdrv_raw_init(void)
diff --git a/block/replication.c b/block/replication.c
index 4c80b54daf..b95bd28802 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -374,19 +374,18 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable,
         QDict *opts = qdict_new();
         qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable);
         reopen_queue = bdrv_reopen_queue(reopen_queue, s->hidden_disk->bs,
-                                         opts);
+                                         opts, true);
     }
 
     if (s->orig_secondary_read_only) {
         QDict *opts = qdict_new();
         qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable);
         reopen_queue = bdrv_reopen_queue(reopen_queue, s->secondary_disk->bs,
-                                         opts);
+                                         opts, true);
     }
 
     if (reopen_queue) {
-        bdrv_reopen_multiple(bdrv_get_aio_context(bs),
-                             reopen_queue, &local_err);
+        bdrv_reopen_multiple(reopen_queue, &local_err);
         error_propagate(errp, local_err);
     }
 
diff --git a/block/stream.c b/block/stream.c
index e14579ff80..6253c86fae 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -35,6 +35,7 @@ typedef struct StreamBlockJob {
     BlockdevOnError on_error;
     char *backing_file_str;
     bool bs_read_only;
+    bool chain_frozen;
 } StreamBlockJob;
 
 static int coroutine_fn stream_populate(BlockBackend *blk,
@@ -49,6 +50,16 @@ static int coroutine_fn stream_populate(BlockBackend *blk,
     return blk_co_preadv(blk, offset, qiov.size, &qiov, BDRV_REQ_COPY_ON_READ);
 }
 
+static void stream_abort(Job *job)
+{
+    StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
+
+    if (s->chain_frozen) {
+        BlockJob *bjob = &s->common;
+        bdrv_unfreeze_backing_chain(blk_bs(bjob->blk), s->base);
+    }
+}
+
 static int stream_prepare(Job *job)
 {
     StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
@@ -58,6 +69,9 @@ static int stream_prepare(Job *job)
     Error *local_err = NULL;
     int ret = 0;
 
+    bdrv_unfreeze_backing_chain(bs, base);
+    s->chain_frozen = false;
+
     if (bs->backing) {
         const char *base_id = NULL, *base_fmt = NULL;
         if (base) {
@@ -208,6 +222,7 @@ static const BlockJobDriver stream_job_driver = {
         .free          = block_job_free,
         .run           = stream_run,
         .prepare       = stream_prepare,
+        .abort         = stream_abort,
         .clean         = stream_clean,
         .user_resume   = block_job_user_resume,
         .drain         = block_job_drain,
@@ -254,9 +269,15 @@ void stream_start(const char *job_id, BlockDriverState *bs,
                            &error_abort);
     }
 
+    if (bdrv_freeze_backing_chain(bs, base, errp) < 0) {
+        job_early_fail(&s->common.job);
+        goto fail;
+    }
+
     s->base = base;
     s->backing_file_str = g_strdup(backing_file_str);
     s->bs_read_only = bs_read_only;
+    s->chain_frozen = true;
 
     s->on_error = on_error;
     trace_stream_start(bs, base, s);
diff --git a/blockdev.c b/blockdev.c
index 871966ca13..53df2eb875 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1257,7 +1257,6 @@ out_aio_context:
  * @node: The name of the BDS node to search for bitmaps
  * @name: The name of the bitmap to search for
  * @pbs: Output pointer for BDS lookup, if desired. Can be NULL.
- * @paio: Output pointer for aio_context acquisition, if desired. Can be NULL.
  * @errp: Output pointer for error information. Can be NULL.
  *
  * @return: A bitmap object on success, or NULL on failure.
@@ -2011,11 +2010,7 @@ static void block_dirty_bitmap_clear_prepare(BlkActionState *common,
         return;
     }
 
-    if (bdrv_dirty_bitmap_user_locked(state->bitmap)) {
-        error_setg(errp, "Cannot modify a bitmap in use by another operation");
-        return;
-    } else if (bdrv_dirty_bitmap_readonly(state->bitmap)) {
-        error_setg(errp, "Cannot clear a readonly bitmap");
+    if (bdrv_dirty_bitmap_check(state->bitmap, BDRV_BITMAP_DEFAULT, errp)) {
         return;
     }
 
@@ -2060,10 +2055,7 @@ static void block_dirty_bitmap_enable_prepare(BlkActionState *common,
         return;
     }
 
-    if (bdrv_dirty_bitmap_user_locked(state->bitmap)) {
-        error_setg(errp,
-                   "Bitmap '%s' is currently in use by another operation"
-                   " and cannot be enabled", action->name);
+    if (bdrv_dirty_bitmap_check(state->bitmap, BDRV_BITMAP_ALLOW_RO, errp)) {
         return;
     }
 
@@ -2101,10 +2093,7 @@ static void block_dirty_bitmap_disable_prepare(BlkActionState *common,
         return;
     }
 
-    if (bdrv_dirty_bitmap_user_locked(state->bitmap)) {
-        error_setg(errp,
-                   "Bitmap '%s' is currently in use by another operation"
-                   " and cannot be disabled", action->name);
+    if (bdrv_dirty_bitmap_check(state->bitmap, BDRV_BITMAP_ALLOW_RO, errp)) {
         return;
     }
 
@@ -2875,7 +2864,7 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name,
         bdrv_disable_dirty_bitmap(bitmap);
     }
 
-    bdrv_dirty_bitmap_set_persistance(bitmap, persistent);
+    bdrv_dirty_bitmap_set_persistence(bitmap, persistent);
  out:
     if (aio_context) {
         aio_context_release(aio_context);
@@ -2895,14 +2884,12 @@ void qmp_block_dirty_bitmap_remove(const char *node, const char *name,
         return;
     }
 
-    if (bdrv_dirty_bitmap_user_locked(bitmap)) {
-        error_setg(errp,
-                   "Bitmap '%s' is currently in use by another operation and"
-                   " cannot be removed", name);
+    if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_BUSY | BDRV_BITMAP_RO,
+                                errp)) {
         return;
     }
 
-    if (bdrv_dirty_bitmap_get_persistance(bitmap)) {
+    if (bdrv_dirty_bitmap_get_persistence(bitmap)) {
         aio_context = bdrv_get_aio_context(bs);
         aio_context_acquire(aio_context);
         bdrv_remove_persistent_dirty_bitmap(bs, name, &local_err);
@@ -2934,13 +2921,7 @@ void qmp_block_dirty_bitmap_clear(const char *node, const char *name,
         return;
     }
 
-    if (bdrv_dirty_bitmap_user_locked(bitmap)) {
-        error_setg(errp,
-                   "Bitmap '%s' is currently in use by another operation"
-                   " and cannot be cleared", name);
-        return;
-    } else if (bdrv_dirty_bitmap_readonly(bitmap)) {
-        error_setg(errp, "Bitmap '%s' is readonly and cannot be cleared", name);
+    if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_DEFAULT, errp)) {
         return;
     }
 
@@ -2958,10 +2939,7 @@ void qmp_block_dirty_bitmap_enable(const char *node, const char *name,
         return;
     }
 
-    if (bdrv_dirty_bitmap_user_locked(bitmap)) {
-        error_setg(errp,
-                   "Bitmap '%s' is currently in use by another operation"
-                   " and cannot be enabled", name);
+    if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_ALLOW_RO, errp)) {
         return;
     }
 
@@ -2979,10 +2957,7 @@ void qmp_block_dirty_bitmap_disable(const char *node, const char *name,
         return;
     }
 
-    if (bdrv_dirty_bitmap_user_locked(bitmap)) {
-        error_setg(errp,
-                   "Bitmap '%s' is currently in use by another operation"
-                   " and cannot be disabled", name);
+    if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_ALLOW_RO, errp)) {
         return;
     }
 
@@ -3561,10 +3536,7 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn,
             bdrv_unref(target_bs);
             goto out;
         }
-        if (bdrv_dirty_bitmap_user_locked(bmap)) {
-            error_setg(errp,
-                       "Bitmap '%s' is currently in use by another operation"
-                       " and cannot be used for backup", backup->bitmap);
+        if (bdrv_dirty_bitmap_check(bmap, BDRV_BITMAP_DEFAULT, errp)) {
             goto out;
         }
     }
@@ -3674,10 +3646,7 @@ BlockJob *do_blockdev_backup(BlockdevBackup *backup, JobTxn *txn,
             error_setg(errp, "Bitmap '%s' could not be found", backup->bitmap);
             goto out;
         }
-        if (bdrv_dirty_bitmap_user_locked(bmap)) {
-            error_setg(errp,
-                       "Bitmap '%s' is currently in use by another operation"
-                       " and cannot be used for backup", backup->bitmap);
+        if (bdrv_dirty_bitmap_check(bmap, BDRV_BITMAP_DEFAULT, errp)) {
             goto out;
         }
     }
@@ -4287,6 +4256,53 @@ fail:
     visit_free(v);
 }
 
+void qmp_x_blockdev_reopen(BlockdevOptions *options, Error **errp)
+{
+    BlockDriverState *bs;
+    AioContext *ctx;
+    QObject *obj;
+    Visitor *v = qobject_output_visitor_new(&obj);
+    Error *local_err = NULL;
+    BlockReopenQueue *queue;
+    QDict *qdict;
+
+    /* Check for the selected node name */
+    if (!options->has_node_name) {
+        error_setg(errp, "Node name not specified");
+        goto fail;
+    }
+
+    bs = bdrv_find_node(options->node_name);
+    if (!bs) {
+        error_setg(errp, "Cannot find node named '%s'", options->node_name);
+        goto fail;
+    }
+
+    /* Put all options in a QDict and flatten it */
+    visit_type_BlockdevOptions(v, NULL, &options, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        goto fail;
+    }
+
+    visit_complete(v, &obj);
+    qdict = qobject_to(QDict, obj);
+
+    qdict_flatten(qdict);
+
+    /* Perform the reopen operation */
+    ctx = bdrv_get_aio_context(bs);
+    aio_context_acquire(ctx);
+    bdrv_subtree_drained_begin(bs);
+    queue = bdrv_reopen_queue(NULL, bs, qdict, false);
+    bdrv_reopen_multiple(queue, errp);
+    bdrv_subtree_drained_end(bs);
+    aio_context_release(ctx);
+
+fail:
+    visit_free(v);
+}
+
 void qmp_blockdev_del(const char *node_name, Error **errp)
 {
     AioContext *aio_context;
@@ -4452,22 +4468,22 @@ void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread,
     aio_context_release(old_context);
 }
 
-void qmp_x_block_latency_histogram_set(
-    const char *device,
+void qmp_block_latency_histogram_set(
+    const char *id,
     bool has_boundaries, uint64List *boundaries,
     bool has_boundaries_read, uint64List *boundaries_read,
     bool has_boundaries_write, uint64List *boundaries_write,
     bool has_boundaries_flush, uint64List *boundaries_flush,
     Error **errp)
 {
-    BlockBackend *blk = blk_by_name(device);
+    BlockBackend *blk = qmp_get_blk(NULL, id, errp);
     BlockAcctStats *stats;
     int ret;
 
     if (!blk) {
-        error_setg(errp, "Device '%s' not found", device);
         return;
     }
+
     stats = blk_get_stats(blk);
 
     if (!has_boundaries && !has_boundaries_read && !has_boundaries_write &&
@@ -4482,7 +4498,7 @@ void qmp_x_block_latency_histogram_set(
             stats, BLOCK_ACCT_READ,
             has_boundaries_read ? boundaries_read : boundaries);
         if (ret) {
-            error_setg(errp, "Device '%s' set read boundaries fail", device);
+            error_setg(errp, "Device '%s' set read boundaries fail", id);
             return;
         }
     }
@@ -4492,7 +4508,7 @@ void qmp_x_block_latency_histogram_set(
             stats, BLOCK_ACCT_WRITE,
             has_boundaries_write ? boundaries_write : boundaries);
         if (ret) {
-            error_setg(errp, "Device '%s' set write boundaries fail", device);
+            error_setg(errp, "Device '%s' set write boundaries fail", id);
             return;
         }
     }
@@ -4502,7 +4518,7 @@ void qmp_x_block_latency_histogram_set(
             stats, BLOCK_ACCT_FLUSH,
             has_boundaries_flush ? boundaries_flush : boundaries);
         if (ret) {
-            error_setg(errp, "Device '%s' set flush boundaries fail", device);
+            error_setg(errp, "Device '%s' set flush boundaries fail", id);
             return;
         }
     }
diff --git a/configure b/configure
index 355eebfc5d..7071f52584 100755
--- a/configure
+++ b/configure
@@ -456,6 +456,8 @@ glusterfs_xlator_opt="no"
 glusterfs_discard="no"
 glusterfs_fallocate="no"
 glusterfs_zerofill="no"
+glusterfs_ftruncate_has_stat="no"
+glusterfs_iocb_has_stat="no"
 gtk=""
 gtk_gl="no"
 tls_priority="NORMAL"
@@ -1220,6 +1222,10 @@ for opt do
   ;;
   --enable-curses) curses="yes"
   ;;
+  --disable-iconv) iconv="no"
+  ;;
+  --enable-iconv) iconv="yes"
+  ;;
   --disable-curl) curl="no"
   ;;
   --enable-curl) curl="yes"
@@ -1709,6 +1715,7 @@ disabled with --disable-FEATURE, default is enabled if available:
   gtk             gtk UI
   vte             vte support for the gtk UI
   curses          curses UI
+  iconv           font glyph conversion support
   vnc             VNC UI support
   vnc-sasl        SASL encryption for VNC server
   vnc-jpeg        JPEG lossy compression for VNC server
@@ -3431,7 +3438,51 @@ EOF
 fi
 
 ##########################################
+# iconv probe
+if test "$iconv" != "no" ; then
+  cat > $TMPC << EOF
+#include <iconv.h>
+int main(void) {
+  iconv_t conv = iconv_open("WCHAR_T", "UCS-2");
+  return conv != (iconv_t) -1;
+}
+EOF
+  iconv_prefix_list="/usr/local:/usr"
+  iconv_lib_list=":-liconv"
+  IFS=:
+  for iconv_prefix in $iconv_prefix_list; do
+    IFS=:
+    iconv_cflags="-I$iconv_prefix/include"
+    iconv_ldflags="-L$iconv_prefix/lib"
+    for iconv_link in $iconv_lib_list; do
+      unset IFS
+      iconv_lib="$iconv_ldflags $iconv_link"
+      echo "looking at iconv in '$iconv_cflags' '$iconv_lib'" >> config.log
+      if compile_prog "$iconv_cflags" "$iconv_lib" ; then
+        iconv_found=yes
+        break
+      fi
+    done
+    if test "$iconv_found" = yes ; then
+      break
+    fi
+  done
+  if test "$iconv_found" = "yes" ; then
+    iconv=yes
+  else
+    if test "$iconv" = "yes" ; then
+      feature_not_found "iconv" "Install iconv devel"
+    fi
+    iconv=no
+  fi
+fi
+
+##########################################
 # curses probe
+if test "$iconv" = "no" ; then
+  # curses will need iconv
+  curses=no
+fi
 if test "$curses" != "no" ; then
   if test "$mingw32" = "yes" ; then
     curses_inc_list="$($pkg_config --cflags ncurses 2>/dev/null):"
@@ -3445,14 +3496,17 @@ if test "$curses" != "no" ; then
 #include <locale.h>
 #include <curses.h>
 #include <wchar.h>
+#include <langinfo.h>
 int main(void) {
+  const char *codeset;
   wchar_t wch = L'w';
   setlocale(LC_ALL, "");
   resize_term(0, 0);
   addwstr(L"wide chars\n");
   addnwstr(&wch, 1);
   add_wch(WACS_DEGREE);
-  return 0;
+  codeset = nl_langinfo(CODESET);
+  return codeset != 0;
 }
 EOF
   IFS=:
@@ -4089,6 +4143,38 @@ if test "$glusterfs" != "no" ; then
       glusterfs_fallocate="yes"
       glusterfs_zerofill="yes"
     fi
+    cat > $TMPC << EOF
+#include <glusterfs/api/glfs.h>
+
+int
+main(void)
+{
+	/* new glfs_ftruncate() passes two additional args */
+	return glfs_ftruncate(NULL, 0, NULL, NULL);
+}
+EOF
+    if compile_prog "$glusterfs_cflags" "$glusterfs_libs" ; then
+      glusterfs_ftruncate_has_stat="yes"
+    fi
+    cat > $TMPC << EOF
+#include <glusterfs/api/glfs.h>
+
+/* new glfs_io_cbk() passes two additional glfs_stat structs */
+static void
+glusterfs_iocb(glfs_fd_t *fd, ssize_t ret, struct glfs_stat *prestat, struct glfs_stat *poststat, void *data)
+{}
+
+int
+main(void)
+{
+	glfs_io_cbk iocb = &glusterfs_iocb;
+	iocb(NULL, 0 , NULL, NULL, NULL);
+	return 0;
+}
+EOF
+    if compile_prog "$glusterfs_cflags" "$glusterfs_libs" ; then
+      glusterfs_iocb_has_stat="yes"
+    fi
   else
     if test "$glusterfs" = "yes" ; then
       feature_not_found "GlusterFS backend support" \
@@ -6210,6 +6296,7 @@ echo "libgcrypt         $gcrypt"
 echo "nettle            $nettle $(echo_version $nettle $nettle_version)"
 echo "libtasn1          $tasn1"
 echo "PAM               $auth_pam"
+echo "iconv support     $iconv"
 echo "curses support    $curses"
 echo "virgl support     $virglrenderer $(echo_version $virglrenderer $virgl_version)"
 echo "curl support      $curl"
@@ -6545,6 +6632,11 @@ fi
 if test "$cocoa" = "yes" ; then
   echo "CONFIG_COCOA=y" >> $config_host_mak
 fi
+if test "$iconv" = "yes" ; then
+  echo "CONFIG_ICONV=y" >> $config_host_mak
+  echo "ICONV_CFLAGS=$iconv_cflags" >> $config_host_mak
+  echo "ICONV_LIBS=$iconv_lib" >> $config_host_mak
+fi
 if test "$curses" = "yes" ; then
   echo "CONFIG_CURSES=m" >> $config_host_mak
   echo "CURSES_CFLAGS=$curses_inc" >> $config_host_mak
@@ -6969,6 +7061,14 @@ if test "$glusterfs_zerofill" = "yes" ; then
   echo "CONFIG_GLUSTERFS_ZEROFILL=y" >> $config_host_mak
 fi
 
+if test "$glusterfs_ftruncate_has_stat" = "yes" ; then
+  echo "CONFIG_GLUSTERFS_FTRUNCATE_HAS_STAT=y" >> $config_host_mak
+fi
+
+if test "$glusterfs_iocb_has_stat" = "yes" ; then
+  echo "CONFIG_GLUSTERFS_IOCB_HAS_STAT=y" >> $config_host_mak
+fi
+
 if test "$libssh2" = "yes" ; then
   echo "CONFIG_LIBSSH2=m" >> $config_host_mak
   echo "LIBSSH2_CFLAGS=$libssh2_cflags" >> $config_host_mak
diff --git a/contrib/libvhost-user/libvhost-user-glib.c b/contrib/libvhost-user/libvhost-user-glib.c
index 545f089587..42660a1b36 100644
--- a/contrib/libvhost-user/libvhost-user-glib.c
+++ b/contrib/libvhost-user/libvhost-user-glib.c
@@ -68,15 +68,16 @@ static GSourceFuncs vug_src_funcs = {
     NULL
 };
 
-static GSource *
-vug_source_new(VuDev *dev, int fd, GIOCondition cond,
+GSource *
+vug_source_new(VugDev *gdev, int fd, GIOCondition cond,
                vu_watch_cb vu_cb, gpointer data)
 {
+    VuDev *dev = &gdev->parent;
     GSource *gsrc;
     VugSrc *src;
     guint id;
 
-    g_assert(dev);
+    g_assert(gdev);
     g_assert(fd >= 0);
     g_assert(vu_cb);
 
@@ -106,7 +107,7 @@ set_watch(VuDev *vu_dev, int fd, int vu_evt, vu_watch_cb cb, void *pvt)
     g_assert(cb);
 
     dev = container_of(vu_dev, VugDev, parent);
-    src = vug_source_new(vu_dev, fd, vu_evt, cb, pvt);
+    src = vug_source_new(dev, fd, vu_evt, cb, pvt);
     g_hash_table_replace(dev->fdmap, GINT_TO_POINTER(fd), src);
 }
 
@@ -141,7 +142,7 @@ vug_init(VugDev *dev, int socket,
     dev->fdmap = g_hash_table_new_full(NULL, NULL, NULL,
                                        (GDestroyNotify) g_source_destroy);
 
-    dev->src = vug_source_new(&dev->parent, socket, G_IO_IN, vug_watch, NULL);
+    dev->src = vug_source_new(dev, socket, G_IO_IN, vug_watch, NULL);
 }
 
 void
diff --git a/contrib/libvhost-user/libvhost-user-glib.h b/contrib/libvhost-user/libvhost-user-glib.h
index 6b2110b94c..d3200f3afc 100644
--- a/contrib/libvhost-user/libvhost-user-glib.h
+++ b/contrib/libvhost-user/libvhost-user-glib.h
@@ -29,4 +29,7 @@ void vug_init(VugDev *dev, int socket,
               vu_panic_cb panic, const VuDevIface *iface);
 void vug_deinit(VugDev *dev);
 
+GSource *vug_source_new(VugDev *dev, int fd, GIOCondition cond,
+                        vu_watch_cb vu_cb, gpointer data);
+
 #endif /* LIBVHOST_USER_GLIB_H */
diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
index 3f14b4138b..e08d6c7b97 100644
--- a/contrib/libvhost-user/libvhost-user.c
+++ b/contrib/libvhost-user/libvhost-user.c
@@ -41,6 +41,8 @@
 #endif
 
 #include "qemu/atomic.h"
+#include "qemu/osdep.h"
+#include "qemu/memfd.h"
 
 #include "libvhost-user.h"
 
@@ -53,6 +55,18 @@
             _min1 < _min2 ? _min1 : _min2; })
 #endif
 
+/* Round number down to multiple */
+#define ALIGN_DOWN(n, m) ((n) / (m) * (m))
+
+/* Round number up to multiple */
+#define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m))
+
+/* Align each region to cache line size in inflight buffer */
+#define INFLIGHT_ALIGNMENT 64
+
+/* The version of inflight buffer */
+#define INFLIGHT_VERSION 1
+
 #define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
 
 /* The version of the protocol we support */
@@ -66,6 +80,20 @@
         }                                       \
     } while (0)
 
+static inline
+bool has_feature(uint64_t features, unsigned int fbit)
+{
+    assert(fbit < 64);
+    return !!(features & (1ULL << fbit));
+}
+
+static inline
+bool vu_has_feature(VuDev *dev,
+                    unsigned int fbit)
+{
+    return has_feature(dev->features, fbit);
+}
+
 static const char *
 vu_request_to_string(unsigned int req)
 {
@@ -100,6 +128,8 @@ vu_request_to_string(unsigned int req)
         REQ(VHOST_USER_POSTCOPY_ADVISE),
         REQ(VHOST_USER_POSTCOPY_LISTEN),
         REQ(VHOST_USER_POSTCOPY_END),
+        REQ(VHOST_USER_GET_INFLIGHT_FD),
+        REQ(VHOST_USER_SET_INFLIGHT_FD),
         REQ(VHOST_USER_MAX),
     };
 #undef REQ
@@ -890,6 +920,91 @@ vu_check_queue_msg_file(VuDev *dev, VhostUserMsg *vmsg)
     return true;
 }
 
+static int
+inflight_desc_compare(const void *a, const void *b)
+{
+    VuVirtqInflightDesc *desc0 = (VuVirtqInflightDesc *)a,
+                        *desc1 = (VuVirtqInflightDesc *)b;
+
+    if (desc1->counter > desc0->counter &&
+        (desc1->counter - desc0->counter) < VIRTQUEUE_MAX_SIZE * 2) {
+        return 1;
+    }
+
+    return -1;
+}
+
+static int
+vu_check_queue_inflights(VuDev *dev, VuVirtq *vq)
+{
+    int i = 0;
+
+    if (!has_feature(dev->protocol_features,
+        VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
+        return 0;
+    }
+
+    if (unlikely(!vq->inflight)) {
+        return -1;
+    }
+
+    if (unlikely(!vq->inflight->version)) {
+        /* initialize the buffer */
+        vq->inflight->version = INFLIGHT_VERSION;
+        return 0;
+    }
+
+    vq->used_idx = vq->vring.used->idx;
+    vq->resubmit_num = 0;
+    vq->resubmit_list = NULL;
+    vq->counter = 0;
+
+    if (unlikely(vq->inflight->used_idx != vq->used_idx)) {
+        vq->inflight->desc[vq->inflight->last_batch_head].inflight = 0;
+
+        barrier();
+
+        vq->inflight->used_idx = vq->used_idx;
+    }
+
+    for (i = 0; i < vq->inflight->desc_num; i++) {
+        if (vq->inflight->desc[i].inflight == 1) {
+            vq->inuse++;
+        }
+    }
+
+    vq->shadow_avail_idx = vq->last_avail_idx = vq->inuse + vq->used_idx;
+
+    if (vq->inuse) {
+        vq->resubmit_list = malloc(sizeof(VuVirtqInflightDesc) * vq->inuse);
+        if (!vq->resubmit_list) {
+            return -1;
+        }
+
+        for (i = 0; i < vq->inflight->desc_num; i++) {
+            if (vq->inflight->desc[i].inflight) {
+                vq->resubmit_list[vq->resubmit_num].index = i;
+                vq->resubmit_list[vq->resubmit_num].counter =
+                                        vq->inflight->desc[i].counter;
+                vq->resubmit_num++;
+            }
+        }
+
+        if (vq->resubmit_num > 1) {
+            qsort(vq->resubmit_list, vq->resubmit_num,
+                  sizeof(VuVirtqInflightDesc), inflight_desc_compare);
+        }
+        vq->counter = vq->resubmit_list[0].counter + 1;
+    }
+
+    /* in case of I/O hang after reconnecting */
+    if (eventfd_write(vq->kick_fd, 1)) {
+        return -1;
+    }
+
+    return 0;
+}
+
 static bool
 vu_set_vring_kick_exec(VuDev *dev, VhostUserMsg *vmsg)
 {
@@ -907,10 +1022,8 @@ vu_set_vring_kick_exec(VuDev *dev, VhostUserMsg *vmsg)
         dev->vq[index].kick_fd = -1;
     }
 
-    if (!(vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)) {
-        dev->vq[index].kick_fd = vmsg->fds[0];
-        DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index);
-    }
+    dev->vq[index].kick_fd = vmsg->fds[0];
+    DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index);
 
     dev->vq[index].started = true;
     if (dev->iface->queue_set_started) {
@@ -925,6 +1038,10 @@ vu_set_vring_kick_exec(VuDev *dev, VhostUserMsg *vmsg)
                dev->vq[index].kick_fd, index);
     }
 
+    if (vu_check_queue_inflights(dev, &dev->vq[index])) {
+        vu_panic(dev, "Failed to check inflights for vq: %d\n", index);
+    }
+
     return false;
 }
 
@@ -995,8 +1112,11 @@ vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg)
         dev->vq[index].call_fd = -1;
     }
 
-    if (!(vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)) {
-        dev->vq[index].call_fd = vmsg->fds[0];
+    dev->vq[index].call_fd = vmsg->fds[0];
+
+    /* in case of I/O hang after reconnecting */
+    if (eventfd_write(vmsg->fds[0], 1)) {
+        return -1;
     }
 
     DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index);
@@ -1020,9 +1140,7 @@ vu_set_vring_err_exec(VuDev *dev, VhostUserMsg *vmsg)
         dev->vq[index].err_fd = -1;
     }
 
-    if (!(vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)) {
-        dev->vq[index].err_fd = vmsg->fds[0];
-    }
+    dev->vq[index].err_fd = vmsg->fds[0];
 
     return false;
 }
@@ -1215,6 +1333,116 @@ vu_set_postcopy_end(VuDev *dev, VhostUserMsg *vmsg)
     return true;
 }
 
+static inline uint64_t
+vu_inflight_queue_size(uint16_t queue_size)
+{
+    return ALIGN_UP(sizeof(VuDescStateSplit) * queue_size +
+           sizeof(uint16_t), INFLIGHT_ALIGNMENT);
+}
+
+static bool
+vu_get_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
+{
+    int fd;
+    void *addr;
+    uint64_t mmap_size;
+    uint16_t num_queues, queue_size;
+
+    if (vmsg->size != sizeof(vmsg->payload.inflight)) {
+        vu_panic(dev, "Invalid get_inflight_fd message:%d", vmsg->size);
+        vmsg->payload.inflight.mmap_size = 0;
+        return true;
+    }
+
+    num_queues = vmsg->payload.inflight.num_queues;
+    queue_size = vmsg->payload.inflight.queue_size;
+
+    DPRINT("set_inflight_fd num_queues: %"PRId16"\n", num_queues);
+    DPRINT("set_inflight_fd queue_size: %"PRId16"\n", queue_size);
+
+    mmap_size = vu_inflight_queue_size(queue_size) * num_queues;
+
+    addr = qemu_memfd_alloc("vhost-inflight", mmap_size,
+                            F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
+                            &fd, NULL);
+
+    if (!addr) {
+        vu_panic(dev, "Failed to alloc vhost inflight area");
+        vmsg->payload.inflight.mmap_size = 0;
+        return true;
+    }
+
+    memset(addr, 0, mmap_size);
+
+    dev->inflight_info.addr = addr;
+    dev->inflight_info.size = vmsg->payload.inflight.mmap_size = mmap_size;
+    dev->inflight_info.fd = vmsg->fds[0] = fd;
+    vmsg->fd_num = 1;
+    vmsg->payload.inflight.mmap_offset = 0;
+
+    DPRINT("send inflight mmap_size: %"PRId64"\n",
+           vmsg->payload.inflight.mmap_size);
+    DPRINT("send inflight mmap offset: %"PRId64"\n",
+           vmsg->payload.inflight.mmap_offset);
+
+    return true;
+}
+
+static bool
+vu_set_inflight_fd(VuDev *dev, VhostUserMsg *vmsg)
+{
+    int fd, i;
+    uint64_t mmap_size, mmap_offset;
+    uint16_t num_queues, queue_size;
+    void *rc;
+
+    if (vmsg->fd_num != 1 ||
+        vmsg->size != sizeof(vmsg->payload.inflight)) {
+        vu_panic(dev, "Invalid set_inflight_fd message size:%d fds:%d",
+                 vmsg->size, vmsg->fd_num);
+        return false;
+    }
+
+    fd = vmsg->fds[0];
+    mmap_size = vmsg->payload.inflight.mmap_size;
+    mmap_offset = vmsg->payload.inflight.mmap_offset;
+    num_queues = vmsg->payload.inflight.num_queues;
+    queue_size = vmsg->payload.inflight.queue_size;
+
+    DPRINT("set_inflight_fd mmap_size: %"PRId64"\n", mmap_size);
+    DPRINT("set_inflight_fd mmap_offset: %"PRId64"\n", mmap_offset);
+    DPRINT("set_inflight_fd num_queues: %"PRId16"\n", num_queues);
+    DPRINT("set_inflight_fd queue_size: %"PRId16"\n", queue_size);
+
+    rc = mmap(0, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
+              fd, mmap_offset);
+
+    if (rc == MAP_FAILED) {
+        vu_panic(dev, "set_inflight_fd mmap error: %s", strerror(errno));
+        return false;
+    }
+
+    if (dev->inflight_info.fd) {
+        close(dev->inflight_info.fd);
+    }
+
+    if (dev->inflight_info.addr) {
+        munmap(dev->inflight_info.addr, dev->inflight_info.size);
+    }
+
+    dev->inflight_info.fd = fd;
+    dev->inflight_info.addr = rc;
+    dev->inflight_info.size = mmap_size;
+
+    for (i = 0; i < num_queues; i++) {
+        dev->vq[i].inflight = (VuVirtqInflight *)rc;
+        dev->vq[i].inflight->desc_num = queue_size;
+        rc = (void *)((char *)rc + vu_inflight_queue_size(queue_size));
+    }
+
+    return false;
+}
+
 static bool
 vu_process_message(VuDev *dev, VhostUserMsg *vmsg)
 {
@@ -1285,13 +1513,18 @@ vu_process_message(VuDev *dev, VhostUserMsg *vmsg)
     case VHOST_USER_SET_CONFIG:
         return vu_set_config(dev, vmsg);
     case VHOST_USER_NONE:
-        break;
+        /* if you need processing before exit, override iface->process_msg */
+        exit(0);
     case VHOST_USER_POSTCOPY_ADVISE:
         return vu_set_postcopy_advise(dev, vmsg);
     case VHOST_USER_POSTCOPY_LISTEN:
         return vu_set_postcopy_listen(dev, vmsg);
     case VHOST_USER_POSTCOPY_END:
         return vu_set_postcopy_end(dev, vmsg);
+    case VHOST_USER_GET_INFLIGHT_FD:
+        return vu_get_inflight_fd(dev, vmsg);
+    case VHOST_USER_SET_INFLIGHT_FD:
+        return vu_set_inflight_fd(dev, vmsg);
     default:
         vmsg_close_fds(vmsg);
         vu_panic(dev, "Unhandled request: %d", vmsg->request);
@@ -1359,8 +1592,24 @@ vu_deinit(VuDev *dev)
             close(vq->err_fd);
             vq->err_fd = -1;
         }
+
+        if (vq->resubmit_list) {
+            free(vq->resubmit_list);
+            vq->resubmit_list = NULL;
+        }
+
+        vq->inflight = NULL;
     }
 
+    if (dev->inflight_info.addr) {
+        munmap(dev->inflight_info.addr, dev->inflight_info.size);
+        dev->inflight_info.addr = NULL;
+    }
+
+    if (dev->inflight_info.fd > 0) {
+        close(dev->inflight_info.fd);
+        dev->inflight_info.fd = -1;
+    }
 
     vu_close_log(dev);
     if (dev->slave_fd != -1) {
@@ -1687,20 +1936,6 @@ vu_queue_empty(VuDev *dev, VuVirtq *vq)
     return vring_avail_idx(vq) == vq->last_avail_idx;
 }
 
-static inline
-bool has_feature(uint64_t features, unsigned int fbit)
-{
-    assert(fbit < 64);
-    return !!(features & (1ULL << fbit));
-}
-
-static inline
-bool vu_has_feature(VuDev *dev,
-                    unsigned int fbit)
-{
-    return has_feature(dev->features, fbit);
-}
-
 static bool
 vring_notify(VuDev *dev, VuVirtq *vq)
 {
@@ -1829,12 +2064,6 @@ virtqueue_map_desc(VuDev *dev,
     *p_num_sg = num_sg;
 }
 
-/* Round number down to multiple */
-#define ALIGN_DOWN(n, m) ((n) / (m) * (m))
-
-/* Round number up to multiple */
-#define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m))
-
 static void *
 virtqueue_alloc_element(size_t sz,
                                      unsigned out_num, unsigned in_num)
@@ -1853,49 +2082,20 @@ virtqueue_alloc_element(size_t sz,
     return elem;
 }
 
-void *
-vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz)
+static void *
+vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int idx, size_t sz)
 {
-    unsigned int i, head, max, desc_len;
+    struct vring_desc *desc = vq->vring.desc;
     uint64_t desc_addr, read_len;
+    unsigned int desc_len;
+    unsigned int max = vq->vring.num;
+    unsigned int i = idx;
     VuVirtqElement *elem;
-    unsigned out_num, in_num;
+    unsigned int out_num = 0, in_num = 0;
     struct iovec iov[VIRTQUEUE_MAX_SIZE];
     struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE];
-    struct vring_desc *desc;
     int rc;
 
-    if (unlikely(dev->broken) ||
-        unlikely(!vq->vring.avail)) {
-        return NULL;
-    }
-
-    if (vu_queue_empty(dev, vq)) {
-        return NULL;
-    }
-    /* Needed after virtio_queue_empty(), see comment in
-     * virtqueue_num_heads(). */
-    smp_rmb();
-
-    /* When we start there are none of either input nor output. */
-    out_num = in_num = 0;
-
-    max = vq->vring.num;
-    if (vq->inuse >= vq->vring.num) {
-        vu_panic(dev, "Virtqueue size exceeded");
-        return NULL;
-    }
-
-    if (!virtqueue_get_head(dev, vq, vq->last_avail_idx++, &head)) {
-        return NULL;
-    }
-
-    if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
-        vring_set_avail_event(vq, vq->last_avail_idx);
-    }
-
-    i = head;
-    desc = vq->vring.desc;
     if (desc[i].flags & VRING_DESC_F_INDIRECT) {
         if (desc[i].len % sizeof(struct vring_desc)) {
             vu_panic(dev, "Invalid size for indirect buffer table");
@@ -1947,12 +2147,13 @@ vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz)
     } while (rc == VIRTQUEUE_READ_DESC_MORE);
 
     if (rc == VIRTQUEUE_READ_DESC_ERROR) {
+        vu_panic(dev, "read descriptor error");
         return NULL;
     }
 
     /* Now copy what we have collected and mapped */
     elem = virtqueue_alloc_element(sz, out_num, in_num);
-    elem->index = head;
+    elem->index = idx;
     for (i = 0; i < out_num; i++) {
         elem->out_sg[i] = iov[i];
     }
@@ -1960,11 +2161,142 @@ vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz)
         elem->in_sg[i] = iov[out_num + i];
     }
 
+    return elem;
+}
+
+static int
+vu_queue_inflight_get(VuDev *dev, VuVirtq *vq, int desc_idx)
+{
+    if (!has_feature(dev->protocol_features,
+        VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
+        return 0;
+    }
+
+    if (unlikely(!vq->inflight)) {
+        return -1;
+    }
+
+    vq->inflight->desc[desc_idx].counter = vq->counter++;
+    vq->inflight->desc[desc_idx].inflight = 1;
+
+    return 0;
+}
+
+static int
+vu_queue_inflight_pre_put(VuDev *dev, VuVirtq *vq, int desc_idx)
+{
+    if (!has_feature(dev->protocol_features,
+        VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
+        return 0;
+    }
+
+    if (unlikely(!vq->inflight)) {
+        return -1;
+    }
+
+    vq->inflight->last_batch_head = desc_idx;
+
+    return 0;
+}
+
+static int
+vu_queue_inflight_post_put(VuDev *dev, VuVirtq *vq, int desc_idx)
+{
+    if (!has_feature(dev->protocol_features,
+        VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
+        return 0;
+    }
+
+    if (unlikely(!vq->inflight)) {
+        return -1;
+    }
+
+    barrier();
+
+    vq->inflight->desc[desc_idx].inflight = 0;
+
+    barrier();
+
+    vq->inflight->used_idx = vq->used_idx;
+
+    return 0;
+}
+
+void *
+vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz)
+{
+    int i;
+    unsigned int head;
+    VuVirtqElement *elem;
+
+    if (unlikely(dev->broken) ||
+        unlikely(!vq->vring.avail)) {
+        return NULL;
+    }
+
+    if (unlikely(vq->resubmit_list && vq->resubmit_num > 0)) {
+        i = (--vq->resubmit_num);
+        elem = vu_queue_map_desc(dev, vq, vq->resubmit_list[i].index, sz);
+
+        if (!vq->resubmit_num) {
+            free(vq->resubmit_list);
+            vq->resubmit_list = NULL;
+        }
+
+        return elem;
+    }
+
+    if (vu_queue_empty(dev, vq)) {
+        return NULL;
+    }
+    /*
+     * Needed after virtio_queue_empty(), see comment in
+     * virtqueue_num_heads().
+     */
+    smp_rmb();
+
+    if (vq->inuse >= vq->vring.num) {
+        vu_panic(dev, "Virtqueue size exceeded");
+        return NULL;
+    }
+
+    if (!virtqueue_get_head(dev, vq, vq->last_avail_idx++, &head)) {
+        return NULL;
+    }
+
+    if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
+        vring_set_avail_event(vq, vq->last_avail_idx);
+    }
+
+    elem = vu_queue_map_desc(dev, vq, head, sz);
+
+    if (!elem) {
+        return NULL;
+    }
+
     vq->inuse++;
 
+    vu_queue_inflight_get(dev, vq, head);
+
     return elem;
 }
 
+static void
+vu_queue_detach_element(VuDev *dev, VuVirtq *vq, VuVirtqElement *elem,
+                        size_t len)
+{
+    vq->inuse--;
+    /* unmap, when DMA support is added */
+}
+
+void
+vu_queue_unpop(VuDev *dev, VuVirtq *vq, VuVirtqElement *elem,
+               size_t len)
+{
+    vq->last_avail_idx--;
+    vu_queue_detach_element(dev, vq, elem, len);
+}
+
 bool
 vu_queue_rewind(VuDev *dev, VuVirtq *vq, unsigned int num)
 {
@@ -2106,5 +2438,7 @@ vu_queue_push(VuDev *dev, VuVirtq *vq,
               const VuVirtqElement *elem, unsigned int len)
 {
     vu_queue_fill(dev, vq, elem, len, 0);
+    vu_queue_inflight_pre_put(dev, vq, elem->index);
     vu_queue_flush(dev, vq, 1);
+    vu_queue_inflight_post_put(dev, vq, elem->index);
 }
diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h
index 4aa55b4d2d..414ceb0a2f 100644
--- a/contrib/libvhost-user/libvhost-user.h
+++ b/contrib/libvhost-user/libvhost-user.h
@@ -53,6 +53,7 @@ enum VhostUserProtocolFeature {
     VHOST_USER_PROTOCOL_F_CONFIG = 9,
     VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
     VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
+    VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
 
     VHOST_USER_PROTOCOL_F_MAX
 };
@@ -91,6 +92,8 @@ typedef enum VhostUserRequest {
     VHOST_USER_POSTCOPY_ADVISE  = 28,
     VHOST_USER_POSTCOPY_LISTEN  = 29,
     VHOST_USER_POSTCOPY_END     = 30,
+    VHOST_USER_GET_INFLIGHT_FD = 31,
+    VHOST_USER_SET_INFLIGHT_FD = 32,
     VHOST_USER_MAX
 } VhostUserRequest;
 
@@ -138,6 +141,13 @@ typedef struct VhostUserVringArea {
     uint64_t offset;
 } VhostUserVringArea;
 
+typedef struct VhostUserInflight {
+    uint64_t mmap_size;
+    uint64_t mmap_offset;
+    uint16_t num_queues;
+    uint16_t queue_size;
+} VhostUserInflight;
+
 #if defined(_WIN32)
 # define VU_PACKED __attribute__((gcc_struct, packed))
 #else
@@ -145,7 +155,7 @@ typedef struct VhostUserVringArea {
 #endif
 
 typedef struct VhostUserMsg {
-    VhostUserRequest request;
+    int request;
 
 #define VHOST_USER_VERSION_MASK     (0x3)
 #define VHOST_USER_REPLY_MASK       (0x1 << 2)
@@ -163,6 +173,7 @@ typedef struct VhostUserMsg {
         VhostUserLog log;
         VhostUserConfig config;
         VhostUserVringArea area;
+        VhostUserInflight inflight;
     } payload;
 
     int fds[VHOST_MEMORY_MAX_NREGIONS];
@@ -234,9 +245,61 @@ typedef struct VuRing {
     uint32_t flags;
 } VuRing;
 
+typedef struct VuDescStateSplit {
+    /* Indicate whether this descriptor is inflight or not.
+     * Only available for head-descriptor. */
+    uint8_t inflight;
+
+    /* Padding */
+    uint8_t padding[5];
+
+    /* Maintain a list for the last batch of used descriptors.
+     * Only available when batching is used for submitting */
+    uint16_t next;
+
+    /* Used to preserve the order of fetching available descriptors.
+     * Only available for head-descriptor. */
+    uint64_t counter;
+} VuDescStateSplit;
+
+typedef struct VuVirtqInflight {
+    /* The feature flags of this region. Now it's initialized to 0. */
+    uint64_t features;
+
+    /* The version of this region. It's 1 currently.
+     * Zero value indicates a vm reset happened. */
+    uint16_t version;
+
+    /* The size of VuDescStateSplit array. It's equal to the virtqueue
+     * size. Slave could get it from queue size field of VhostUserInflight. */
+    uint16_t desc_num;
+
+    /* The head of list that track the last batch of used descriptors. */
+    uint16_t last_batch_head;
+
+    /* Storing the idx value of used ring */
+    uint16_t used_idx;
+
+    /* Used to track the state of each descriptor in descriptor table */
+    VuDescStateSplit desc[0];
+} VuVirtqInflight;
+
+typedef struct VuVirtqInflightDesc {
+    uint16_t index;
+    uint64_t counter;
+} VuVirtqInflightDesc;
+
 typedef struct VuVirtq {
     VuRing vring;
 
+    VuVirtqInflight *inflight;
+
+    VuVirtqInflightDesc *resubmit_list;
+
+    uint16_t resubmit_num;
+
+    uint64_t counter;
+
     /* Next head to pop */
     uint16_t last_avail_idx;
 
@@ -279,11 +342,18 @@ typedef void (*vu_set_watch_cb) (VuDev *dev, int fd, int condition,
                                  vu_watch_cb cb, void *data);
 typedef void (*vu_remove_watch_cb) (VuDev *dev, int fd);
 
+typedef struct VuDevInflightInfo {
+    int fd;
+    void *addr;
+    uint64_t size;
+} VuDevInflightInfo;
+
 struct VuDev {
     int sock;
     uint32_t nregions;
     VuDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
     VuVirtq vq[VHOST_MAX_NR_VIRTQUEUE];
+    VuDevInflightInfo inflight_info;
     int log_call_fd;
     int slave_fd;
     uint64_t log_size;
@@ -458,6 +528,20 @@ void vu_queue_notify(VuDev *dev, VuVirtq *vq);
  */
 void *vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz);
 
+
+/**
+ * vu_queue_unpop:
+ * @dev: a VuDev context
+ * @vq: a VuVirtq queue
+ * @elem: The #VuVirtqElement
+ * @len: number of bytes written
+ *
+ * Pretend the most recent element wasn't popped from the virtqueue.  The next
+ * call to vu_queue_pop() will refetch the element.
+ */
+void vu_queue_unpop(VuDev *dev, VuVirtq *vq, VuVirtqElement *elem,
+                    size_t len);
+
 /**
  * vu_queue_rewind:
  * @dev: a VuDev context
diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt
index 8c3098d8d9..af5711e533 100644
--- a/docs/interop/qcow2.txt
+++ b/docs/interop/qcow2.txt
@@ -633,7 +633,10 @@ Structure of a bitmap directory entry:
                     Bit
                       0: in_use
                          The bitmap was not saved correctly and may be
-                         inconsistent.
+                         inconsistent. Although the bitmap metadata is still
+                         well-formed from a qcow2 perspective, the metadata
+                         (such as the auto flag or bitmap size) or data
+                         contents may be outdated.
 
                       1: auto
                          The bitmap must reflect all changes of the virtual
@@ -761,8 +764,8 @@ corresponding range of the virtual disk (see above) was written to while the
 bitmap was 'enabled'. An unset bit means that this range was not written to.
 
 The software doesn't have to sync the bitmap in the image file with its
-representation in RAM after each write. Flag 'in_use' should be set while the
-bitmap is not synced.
+representation in RAM after each write or metadata change. Flag 'in_use'
+should be set while the bitmap is not synced.
 
 In the image file the 'enabled' state is reflected by the 'auto' flag. If this
 flag is set, the software must consider the bitmap as 'enabled' and start
diff --git a/docs/interop/vhost-user.json b/docs/interop/vhost-user.json
new file mode 100644
index 0000000000..ae88c03117
--- /dev/null
+++ b/docs/interop/vhost-user.json
@@ -0,0 +1,232 @@
+# -*- Mode: Python -*-
+#
+# Copyright (C) 2018 Red Hat, Inc.
+#
+# Authors:
+#  Marc-André Lureau <marcandre.lureau@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later. See the COPYING file in the top-level directory.
+
+##
+# = vhost user backend discovery & capabilities
+##
+
+##
+# @VHostUserBackendType:
+#
+# List the various vhost user backend types.
+#
+# @9p: 9p virtio console
+# @balloon: virtio balloon
+# @block: virtio block
+# @caif: virtio caif
+# @console: virtio console
+# @crypto: virtio crypto
+# @gpu: virtio gpu
+# @input: virtio input
+# @net: virtio net
+# @rng: virtio rng
+# @rpmsg: virtio remote processor messaging
+# @rproc-serial: virtio remoteproc serial link
+# @scsi: virtio scsi
+# @vsock: virtio vsock transport
+#
+# Since: 4.0
+##
+{
+  'enum': 'VHostUserBackendType',
+  'data': [
+      '9p',
+      'balloon',
+      'block',
+      'caif',
+      'console',
+      'crypto',
+      'gpu',
+      'input',
+      'net',
+      'rng',
+      'rpmsg',
+      'rproc-serial',
+      'scsi',
+      'vsock'
+  ]
+}
+
+##
+# @VHostUserBackendInputFeature:
+#
+# List of vhost user "input" features.
+#
+# @evdev-path: The --evdev-path command line option is supported.
+# @no-grab: The --no-grab command line option is supported.
+#
+# Since: 4.0
+##
+{
+  'enum': 'VHostUserBackendInputFeature',
+  'data': [ 'evdev-path', 'no-grab' ]
+}
+
+##
+# @VHostUserBackendCapabilitiesInput:
+#
+# Capabilities reported by vhost user "input" backends
+#
+# @features: list of supported features.
+#
+# Since: 4.0
+##
+{
+  'struct': 'VHostUserBackendCapabilitiesInput',
+  'data': {
+    'features': [ 'VHostUserBackendInputFeature' ]
+  }
+}
+
+##
+# @VHostUserBackendGPUFeature:
+#
+# List of vhost user "gpu" features.
+#
+# @render-node: The --render-node command line option is supported.
+# @virgl: The --virgl command line option is supported.
+#
+# Since: 4.0
+##
+{
+  'enum': 'VHostUserBackendGPUFeature',
+  'data': [ 'render-node', 'virgl' ]
+}
+
+##
+# @VHostUserBackendCapabilitiesGPU:
+#
+# Capabilities reported by vhost user "gpu" backends.
+#
+# @features: list of supported features.
+#
+# Since: 4.0
+##
+{
+  'struct': 'VHostUserBackendCapabilitiesGPU',
+  'data': {
+    'features': [ 'VHostUserBackendGPUFeature' ]
+  }
+}
+
+##
+# @VHostUserBackendCapabilities:
+#
+# Capabilities reported by vhost user backends.
+#
+# @type: The vhost user backend type.
+#
+# Since: 4.0
+##
+{
+  'union': 'VHostUserBackendCapabilities',
+  'base': { 'type': 'VHostUserBackendType' },
+  'discriminator': 'type',
+  'data': {
+    'input': 'VHostUserBackendCapabilitiesInput',
+    'gpu': 'VHostUserBackendCapabilitiesGPU'
+  }
+}
+
+##
+# @VhostUserBackend:
+#
+# Describes a vhost user backend to management software.
+#
+# It is possible for multiple @VhostUserBackend elements to match the
+# search criteria of management software. Applications thus need rules
+# to pick one of the many matches, and users need the ability to
+# override distro defaults.
+#
+# It is recommended to create vhost user backend JSON files (each
+# containing a single @VhostUserBackend root element) with a
+# double-digit prefix, for example "50-qemu-gpu.json",
+# "50-crosvm-gpu.json", etc, so they can be sorted in predictable
+# order. The backend JSON files should be searched for in three
+# directories:
+#
+#   - /usr/share/qemu/vhost-user -- populated by distro-provided
+#                                   packages (XDG_DATA_DIRS covers
+#                                   /usr/share by default),
+#
+#   - /etc/qemu/vhost-user -- exclusively for sysadmins' local additions,
+#
+#   - $XDG_CONFIG_HOME/qemu/vhost-user -- exclusively for per-user local
+#                                         additions (XDG_CONFIG_HOME
+#                                         defaults to $HOME/.config).
+#
+# Top-down, the list of directories goes from general to specific.
+#
+# Management software should build a list of files from all three
+# locations, then sort the list by filename (i.e., basename
+# component). Management software should choose the first JSON file on
+# the sorted list that matches the search criteria. If a more specific
+# directory has a file with same name as a less specific directory,
+# then the file in the more specific directory takes effect. If the
+# more specific file is zero length, it hides the less specific one.
+#
+# For example, if a distro ships
+#
+#   - /usr/share/qemu/vhost-user/50-qemu-gpu.json
+#
+#   - /usr/share/qemu/vhost-user/50-crosvm-gpu.json
+#
+# then the sysadmin can prevent the default QEMU being used at all with
+#
+#   $ touch /etc/qemu/vhost-user/50-qemu-gpu.json
+#
+# The sysadmin can replace/alter the distro default OVMF with
+#
+#   $ vim /etc/qemu/vhost-user/50-qemu-gpu.json
+#
+# or they can provide a parallel QEMU GPU with higher priority
+#
+#   $ vim /etc/qemu/vhost-user/10-qemu-gpu.json
+#
+# or they can provide a parallel OVMF with lower priority
+#
+#   $ vim /etc/qemu/vhost-user/99-qemu-gpu.json
+#
+# @type: The vhost user backend type.
+#
+# @description: Provides a human-readable description of the backend.
+#               Management software may or may not display @description.
+#
+# @binary: Absolute path to the backend binary.
+#
+# @tags: An optional list of auxiliary strings associated with the
+#        backend for which @description is not appropriate, due to the
+#        latter's possible exposure to the end-user. @tags serves
+#        development and debugging purposes only, and management
+#        software shall explicitly ignore it.
+#
+# Since: 4.0
+#
+# Example:
+#
+# {
+#   "description": "QEMU vhost-user-gpu",
+#   "type": "gpu",
+#   "binary": "/usr/libexec/qemu/vhost-user-gpu",
+#   "tags": [
+#     "CONFIG_OPENGL_DMABUF=y"
+#   ]
+# }
+#
+##
+{
+  'struct' : 'VhostUserBackend',
+  'data'   : {
+    'description': 'str',
+    'type': 'VHostUserBackendType',
+    'binary': 'str',
+    '*tags': [ 'str' ]
+  }
+}
diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt
index c2194711d9..4dbd530cb9 100644
--- a/docs/interop/vhost-user.txt
+++ b/docs/interop/vhost-user.txt
@@ -17,8 +17,13 @@ The protocol defines 2 sides of the communication, master and slave. Master is
 the application that shares its virtqueues, in our case QEMU. Slave is the
 consumer of the virtqueues.
 
-In the current implementation QEMU is the Master, and the Slave is intended to
-be a software Ethernet switch running in user space, such as Snabbswitch.
+In the current implementation QEMU is the Master, and the Slave is the
+external process consuming the virtio queues, for example a software
+Ethernet switch running in user space, such as Snabbswitch, or a block
+device backend processing read & write to a virtual disk. In order to
+facilitate interoperability between various backend implementations,
+it is recommended to follow the "Backend program conventions"
+described in this document.
 
 Master and slave can be either a client (i.e. connecting) or server (listening)
 in the socket communication.
@@ -142,6 +147,17 @@ Depending on the request type, payload can be:
    Offset: a 64-bit offset of this area from the start of the
        supplied file descriptor
 
+ * Inflight description
+   -----------------------------------------------------
+   | mmap size | mmap offset | num queues | queue size |
+   -----------------------------------------------------
+
+   mmap size: a 64-bit size of area to track inflight I/O
+   mmap offset: a 64-bit offset of this area from the start
+                of the supplied file descriptor
+   num queues: a 16-bit number of virtqueues
+   queue size: a 16-bit size of virtqueues
+
 In QEMU the vhost-user message is implemented with the following struct:
 
 typedef struct VhostUserMsg {
@@ -157,6 +173,7 @@ typedef struct VhostUserMsg {
         struct vhost_iotlb_msg iotlb;
         VhostUserConfig config;
         VhostUserVringArea area;
+        VhostUserInflight inflight;
     };
 } QEMU_PACKED VhostUserMsg;
 
@@ -175,6 +192,7 @@ the ones that do:
  * VHOST_USER_GET_PROTOCOL_FEATURES
  * VHOST_USER_GET_VRING_BASE
  * VHOST_USER_SET_LOG_BASE (if VHOST_USER_PROTOCOL_F_LOG_SHMFD)
+ * VHOST_USER_GET_INFLIGHT_FD (if VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)
 
 [ Also see the section on REPLY_ACK protocol extension. ]
 
@@ -188,6 +206,7 @@ in the ancillary data:
  * VHOST_USER_SET_VRING_CALL
  * VHOST_USER_SET_VRING_ERR
  * VHOST_USER_SET_SLAVE_REQ_FD
+ * VHOST_USER_SET_INFLIGHT_FD (if VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)
 
 If Master is unable to send the full message or receives a wrong reply it will
 close the connection. An optional reconnection mechanism can be implemented.
@@ -382,6 +401,256 @@ If VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD protocol feature is negotiated,
 slave can send file descriptors (at most 8 descriptors in each message)
 to master via ancillary data using this fd communication channel.
 
+Inflight I/O tracking
+---------------------
+
+To support reconnecting after restart or crash, slave may need to resubmit
+inflight I/Os. If virtqueue is processed in order, we can easily achieve
+that by getting the inflight descriptors from descriptor table (split virtqueue)
+or descriptor ring (packed virtqueue). However, it can't work when we process
+descriptors out-of-order because some entries which store the information of
+inflight descriptors in available ring (split virtqueue) or descriptor
+ring (packed virtqueue) might be overrided by new entries. To solve this
+problem, slave need to allocate an extra buffer to store this information of inflight
+descriptors and share it with master for persistent. VHOST_USER_GET_INFLIGHT_FD and
+VHOST_USER_SET_INFLIGHT_FD are used to transfer this buffer between master
+and slave. And the format of this buffer is described below:
+
+-------------------------------------------------------
+| queue0 region | queue1 region | ... | queueN region |
+-------------------------------------------------------
+
+N is the number of available virtqueues. Slave could get it from num queues
+field of VhostUserInflight.
+
+For split virtqueue, queue region can be implemented as:
+
+typedef struct DescStateSplit {
+    /* Indicate whether this descriptor is inflight or not.
+     * Only available for head-descriptor. */
+    uint8_t inflight;
+
+    /* Padding */
+    uint8_t padding[5];
+
+    /* Maintain a list for the last batch of used descriptors.
+     * Only available when batching is used for submitting */
+    uint16_t next;
+
+    /* Used to preserve the order of fetching available descriptors.
+     * Only available for head-descriptor. */
+    uint64_t counter;
+} DescStateSplit;
+
+typedef struct QueueRegionSplit {
+    /* The feature flags of this region. Now it's initialized to 0. */
+    uint64_t features;
+
+    /* The version of this region. It's 1 currently.
+     * Zero value indicates an uninitialized buffer */
+    uint16_t version;
+
+    /* The size of DescStateSplit array. It's equal to the virtqueue
+     * size. Slave could get it from queue size field of VhostUserInflight. */
+    uint16_t desc_num;
+
+    /* The head of list that track the last batch of used descriptors. */
+    uint16_t last_batch_head;
+
+    /* Store the idx value of used ring */
+    uint16_t used_idx;
+
+    /* Used to track the state of each descriptor in descriptor table */
+    DescStateSplit desc[0];
+} QueueRegionSplit;
+
+To track inflight I/O, the queue region should be processed as follows:
+
+When receiving available buffers from the driver:
+
+    1. Get the next available head-descriptor index from available ring, i
+
+    2. Set desc[i].counter to the value of global counter
+
+    3. Increase global counter by 1
+
+    4. Set desc[i].inflight to 1
+
+When supplying used buffers to the driver:
+
+    1. Get corresponding used head-descriptor index, i
+
+    2. Set desc[i].next to last_batch_head
+
+    3. Set last_batch_head to i
+
+    4. Steps 1,2,3 may be performed repeatedly if batching is possible
+
+    5. Increase the idx value of used ring by the size of the batch
+
+    6. Set the inflight field of each DescStateSplit entry in the batch to 0
+
+    7. Set used_idx to the idx value of used ring
+
+When reconnecting:
+
+    1. If the value of used_idx does not match the idx value of used ring (means
+    the inflight field of DescStateSplit entries in last batch may be incorrect),
+
+        (a) Subtract the value of used_idx from the idx value of used ring to get
+        last batch size of DescStateSplit entries
+
+        (b) Set the inflight field of each DescStateSplit entry to 0 in last batch
+        list which starts from last_batch_head
+
+        (c) Set used_idx to the idx value of used ring
+
+    2. Resubmit inflight DescStateSplit entries in order of their counter value
+
+For packed virtqueue, queue region can be implemented as:
+
+typedef struct DescStatePacked {
+    /* Indicate whether this descriptor is inflight or not.
+     * Only available for head-descriptor. */
+    uint8_t inflight;
+
+    /* Padding */
+    uint8_t padding;
+
+    /* Link to the next free entry */
+    uint16_t next;
+
+    /* Link to the last entry of descriptor list.
+     * Only available for head-descriptor. */
+    uint16_t last;
+
+    /* The length of descriptor list.
+     * Only available for head-descriptor. */
+    uint16_t num;
+
+    /* Used to preserve the order of fetching available descriptors.
+     * Only available for head-descriptor. */
+    uint64_t counter;
+
+    /* The buffer id */
+    uint16_t id;
+
+    /* The descriptor flags */
+    uint16_t flags;
+
+    /* The buffer length */
+    uint32_t len;
+
+    /* The buffer address */
+    uint64_t addr;
+} DescStatePacked;
+
+typedef struct QueueRegionPacked {
+    /* The feature flags of this region. Now it's initialized to 0. */
+    uint64_t features;
+
+    /* The version of this region. It's 1 currently.
+     * Zero value indicates an uninitialized buffer */
+    uint16_t version;
+
+    /* The size of DescStatePacked array. It's equal to the virtqueue
+     * size. Slave could get it from queue size field of VhostUserInflight. */
+    uint16_t desc_num;
+
+    /* The head of free DescStatePacked entry list */
+    uint16_t free_head;
+
+    /* The old head of free DescStatePacked entry list */
+    uint16_t old_free_head;
+
+    /* The used index of descriptor ring */
+    uint16_t used_idx;
+
+    /* The old used index of descriptor ring */
+    uint16_t old_used_idx;
+
+    /* Device ring wrap counter */
+    uint8_t used_wrap_counter;
+
+    /* The old device ring wrap counter */
+    uint8_t old_used_wrap_counter;
+
+    /* Padding */
+    uint8_t padding[7];
+
+    /* Used to track the state of each descriptor fetched from descriptor ring */
+    DescStatePacked desc[0];
+} QueueRegionPacked;
+
+To track inflight I/O, the queue region should be processed as follows:
+
+When receiving available buffers from the driver:
+
+    1. Get the next available descriptor entry from descriptor ring, d
+
+    2. If d is head descriptor,
+
+        (a) Set desc[old_free_head].num to 0
+
+        (b) Set desc[old_free_head].counter to the value of global counter
+
+        (c) Increase global counter by 1
+
+        (d) Set desc[old_free_head].inflight to 1
+
+    3. If d is last descriptor, set desc[old_free_head].last to free_head
+
+    4. Increase desc[old_free_head].num by 1
+
+    5. Set desc[free_head].addr, desc[free_head].len, desc[free_head].flags,
+    desc[free_head].id to d.addr, d.len, d.flags, d.id
+
+    6. Set free_head to desc[free_head].next
+
+    7. If d is last descriptor, set old_free_head to free_head
+
+When supplying used buffers to the driver:
+
+    1. Get corresponding used head-descriptor entry from descriptor ring, d
+
+    2. Get corresponding DescStatePacked entry, e
+
+    3. Set desc[e.last].next to free_head
+
+    4. Set free_head to the index of e
+
+    5. Steps 1,2,3,4 may be performed repeatedly if batching is possible
+
+    6. Increase used_idx by the size of the batch and update used_wrap_counter if needed
+
+    7. Update d.flags
+
+    8. Set the inflight field of each head DescStatePacked entry in the batch to 0
+
+    9. Set old_free_head, old_used_idx, old_used_wrap_counter to free_head, used_idx,
+    used_wrap_counter
+
+When reconnecting:
+
+    1. If used_idx does not match old_used_idx (means the inflight field of DescStatePacked
+    entries in last batch may be incorrect),
+
+        (a) Get the next descriptor ring entry through old_used_idx, d
+
+        (b) Use old_used_wrap_counter to calculate the available flags
+
+        (c) If d.flags is not equal to the calculated flags value (means slave has
+        submitted the buffer to guest driver before crash, so it has to commit the
+        in-progres update), set old_free_head, old_used_idx, old_used_wrap_counter
+        to free_head, used_idx, used_wrap_counter
+
+    2. Set free_head, used_idx, used_wrap_counter to old_free_head, old_used_idx,
+    old_used_wrap_counter (roll back any in-progress update)
+
+    3. Set the inflight field of each DescStatePacked entry in free list to 0
+
+    4. Resubmit inflight DescStatePacked entries in order of their counter value
+
 Protocol features
 -----------------
 
@@ -397,6 +666,7 @@ Protocol features
 #define VHOST_USER_PROTOCOL_F_CONFIG         9
 #define VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD  10
 #define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER  11
+#define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12
 
 Master message types
 --------------------
@@ -761,6 +1031,26 @@ Master message types
       was previously sent.
       The value returned is an error indication; 0 is success.
 
+ * VHOST_USER_GET_INFLIGHT_FD
+      Id: 31
+      Equivalent ioctl: N/A
+      Master payload: inflight description
+
+      When VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD protocol feature has been
+      successfully negotiated, this message is submitted by master to get
+      a shared buffer from slave. The shared buffer will be used to track
+      inflight I/O by slave. QEMU should retrieve a new one when vm reset.
+
+ * VHOST_USER_SET_INFLIGHT_FD
+      Id: 32
+      Equivalent ioctl: N/A
+      Master payload: inflight description
+
+      When VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD protocol feature has been
+      successfully negotiated, this message is submitted by master to send
+      the shared inflight buffer back to slave so that slave could get
+      inflight I/O after a crash or restart.
+
 Slave message types
 -------------------
 
@@ -835,3 +1125,95 @@ resilient for selective requests.
 For the message types that already solicit a reply from the client, the
 presence of VHOST_USER_PROTOCOL_F_REPLY_ACK or need_reply bit being set brings
 no behavioural change. (See the 'Communication' section for details.)
+
+Backend program conventions
+---------------------------
+
+vhost-user backends can provide various devices & services and may
+need to be configured manually depending on the use case. However, it
+is a good idea to follow the conventions listed here when
+possible. Users, QEMU or libvirt, can then rely on some common
+behaviour to avoid heterogenous configuration and management of the
+backend programs and facilitate interoperability.
+
+Each backend installed on a host system should come with at least one
+JSON file that conforms to the vhost-user.json schema. Each file
+informs the management applications about the backend type, and binary
+location. In addition, it defines rules for management apps for
+picking the highest priority backend when multiple match the search
+criteria (see @VhostUserBackend documentation in the schema file).
+
+If the backend is not capable of enabling a requested feature on the
+host (such as 3D acceleration with virgl), or the initialization
+failed, the backend should fail to start early and exit with a status
+!= 0. It may also print a message to stderr for further details.
+
+The backend program must not daemonize itself, but it may be
+daemonized by the management layer. It may also have a restricted
+access to the system.
+
+File descriptors 0, 1 and 2 will exist, and have regular
+stdin/stdout/stderr usage (they may have been redirected to /dev/null
+by the management layer, or to a log handler).
+
+The backend program must end (as quickly and cleanly as possible) when
+the SIGTERM signal is received. Eventually, it may receive SIGKILL by
+the management layer after a few seconds.
+
+The following command line options have an expected behaviour. They
+are mandatory, unless explicitly said differently:
+
+* --socket-path=PATH
+
+This option specify the location of the vhost-user Unix domain socket.
+It is incompatible with --fd.
+
+* --fd=FDNUM
+
+When this argument is given, the backend program is started with the
+vhost-user socket as file descriptor FDNUM. It is incompatible with
+--socket-path.
+
+* --print-capabilities
+
+Output to stdout the backend capabilities in JSON format, and then
+exit successfully. Other options and arguments should be ignored, and
+the backend program should not perform its normal function.  The
+capabilities can be reported dynamically depending on the host
+capabilities.
+
+The JSON output is described in the vhost-user.json schema, by
+@VHostUserBackendCapabilities.  Example:
+{
+  "type": "foo",
+  "features": [
+    "feature-a",
+    "feature-b"
+  ]
+}
+
+vhost-user-input
+----------------
+
+Command line options:
+
+* --evdev-path=PATH (optional)
+
+Specify the linux input device.
+
+* --no-grab (optional)
+
+Do no request exclusive access to the input device.
+
+vhost-user-gpu
+--------------
+
+Command line options:
+
+* --render-node=PATH (optional)
+
+Specify the GPU DRM render node.
+
+* --virgl (optional)
+
+Enable virgl rendering support.
diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c
index c5d8646abc..e53dfe1ee3 100644
--- a/hw/acpi/ich9.c
+++ b/hw/acpi/ich9.c
@@ -483,13 +483,24 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp)
                              NULL);
 }
 
+void ich9_pm_device_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
+                                Error **errp)
+{
+    ICH9LPCState *lpc = ICH9_LPC_DEVICE(hotplug_dev);
+
+    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) &&
+        !lpc->pm.acpi_memory_hotplug.is_enabled)
+        error_setg(errp,
+                   "memory hotplug is not enabled: %s.memory-hotplug-support "
+                   "is not set", object_get_typename(OBJECT(lpc)));
+}
+
 void ich9_pm_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
                             Error **errp)
 {
     ICH9LPCState *lpc = ICH9_LPC_DEVICE(hotplug_dev);
 
-    if (lpc->pm.acpi_memory_hotplug.is_enabled &&
-        object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
+    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
         if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) {
             nvdimm_acpi_plug_cb(hotplug_dev, dev);
         } else {
diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index f73cfb9d90..9fdad6dc3f 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -992,7 +992,7 @@ static void nvdimm_build_common_dsm(Aml *dev)
     field = aml_field(NVDIMM_DSM_IOPORT, AML_DWORD_ACC, AML_NOLOCK,
                       AML_PRESERVE);
     aml_append(field, aml_named_field(NVDIMM_DSM_NOTIFY,
-               sizeof(uint32_t) * BITS_PER_BYTE));
+               NVDIMM_ACPI_IO_LEN * BITS_PER_BYTE));
     aml_append(method, field);
 
     /*
@@ -1086,7 +1086,7 @@ static void nvdimm_build_common_dsm(Aml *dev)
      */
     aml_append(method, aml_store(handle, aml_name(NVDIMM_DSM_HANDLE)));
     aml_append(method, aml_store(aml_arg(1), aml_name(NVDIMM_DSM_REVISION)));
-    aml_append(method, aml_store(aml_arg(2), aml_name(NVDIMM_DSM_FUNCTION)));
+    aml_append(method, aml_store(function, aml_name(NVDIMM_DSM_FUNCTION)));
 
     /*
      * The fourth parameter (Arg3) of _DSM is a package which contains
@@ -1260,7 +1260,7 @@ static void nvdimm_build_nvdimm_devices(Aml *root_dev, uint32_t ram_slots)
 }
 
 static void nvdimm_build_ssdt(GArray *table_offsets, GArray *table_data,
-                              BIOSLinker *linker, GArray *dsm_dma_arrea,
+                              BIOSLinker *linker, GArray *dsm_dma_area,
                               uint32_t ram_slots)
 {
     Aml *ssdt, *sb_scope, *dev;
@@ -1307,7 +1307,7 @@ static void nvdimm_build_ssdt(GArray *table_offsets, GArray *table_data,
                                                NVDIMM_ACPI_MEM_ADDR);
 
     bios_linker_loader_alloc(linker,
-                             NVDIMM_DSM_MEM_FILE, dsm_dma_arrea,
+                             NVDIMM_DSM_MEM_FILE, dsm_dma_area,
                              sizeof(NvdimmDsmIn), false /* high memory */);
     bios_linker_loader_add_pointer(linker,
         ACPI_BUILD_TABLE_FILE, mem_addr_offset, sizeof(uint32_t),
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index 7b98121070..9c079d6834 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -380,9 +380,17 @@ static void piix4_pm_powerdown_req(Notifier *n, void *opaque)
 static void piix4_device_pre_plug_cb(HotplugHandler *hotplug_dev,
                                     DeviceState *dev, Error **errp)
 {
+    PIIX4PMState *s = PIIX4_PM(hotplug_dev);
+
     if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
         acpi_pcihp_device_pre_plug_cb(hotplug_dev, dev, errp);
-    } else if (!object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) &&
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
+        if (!s->acpi_memory_hotplug.is_enabled) {
+            error_setg(errp,
+                "memory hotplug is not enabled: %s.memory-hotplug-support "
+                "is not set", object_get_typename(OBJECT(s)));
+        }
+    } else if (
                !object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
         error_setg(errp, "acpi: device pre plug request for not supported"
                    " device type: %s", object_get_typename(OBJECT(dev)));
@@ -394,8 +402,7 @@ static void piix4_device_plug_cb(HotplugHandler *hotplug_dev,
 {
     PIIX4PMState *s = PIIX4_PM(hotplug_dev);
 
-    if (s->acpi_memory_hotplug.is_enabled &&
-        object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
+    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
         if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) {
             nvdimm_acpi_plug_cb(hotplug_dev, dev);
         } else {
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 8325b5e88a..7caf92532a 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -324,8 +324,8 @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
     const uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
     uint64_t slba = le64_to_cpu(rw->slba);
     uint32_t nlb  = le16_to_cpu(rw->nlb) + 1;
-    uint64_t aio_slba = slba << (data_shift - BDRV_SECTOR_BITS);
-    uint32_t aio_nlb = nlb << (data_shift - BDRV_SECTOR_BITS);
+    uint64_t offset = slba << data_shift;
+    uint32_t count = nlb << data_shift;
 
     if (unlikely(slba + nlb > ns->id_ns.nsze)) {
         trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
@@ -335,7 +335,7 @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
     req->has_sg = false;
     block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
                      BLOCK_ACCT_WRITE);
-    req->aiocb = blk_aio_pwrite_zeroes(n->conf.blk, aio_slba, aio_nlb,
+    req->aiocb = blk_aio_pwrite_zeroes(n->conf.blk, offset, count,
                                         BDRV_REQ_MAY_UNMAP, nvme_rw_cb, req);
     return NVME_NO_COMPLETE;
 }
diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index 44ac814016..28b81368f7 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -128,6 +128,21 @@ static void vhost_user_blk_start(VirtIODevice *vdev)
     }
 
     s->dev.acked_features = vdev->guest_features;
+
+    if (!s->inflight->addr) {
+        ret = vhost_dev_get_inflight(&s->dev, s->queue_size, s->inflight);
+        if (ret < 0) {
+            error_report("Error get inflight: %d", -ret);
+            goto err_guest_notifiers;
+        }
+    }
+
+    ret = vhost_dev_set_inflight(&s->dev, s->inflight);
+    if (ret < 0) {
+        error_report("Error set inflight: %d", -ret);
+        goto err_guest_notifiers;
+    }
+
     ret = vhost_dev_start(&s->dev, vdev);
     if (ret < 0) {
         error_report("Error starting vhost: %d", -ret);
@@ -249,11 +264,17 @@ static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
     }
 }
 
+static void vhost_user_blk_reset(VirtIODevice *vdev)
+{
+    VHostUserBlk *s = VHOST_USER_BLK(vdev);
+
+    vhost_dev_free_inflight(s->inflight);
+}
+
 static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VHostUserBlk *s = VHOST_USER_BLK(vdev);
-    VhostUserState *user;
     struct vhost_virtqueue *vqs = NULL;
     int i, ret;
 
@@ -272,15 +293,10 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
         return;
     }
 
-    user = vhost_user_init();
-    if (!user) {
-        error_setg(errp, "vhost-user-blk: failed to init vhost_user");
+    if (!vhost_user_init(&s->vhost_user, &s->chardev, errp)) {
         return;
     }
 
-    user->chr = &s->chardev;
-    s->vhost_user = user;
-
     virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK,
                 sizeof(struct virtio_blk_config));
 
@@ -289,6 +305,8 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
                          vhost_user_blk_handle_output);
     }
 
+    s->inflight = g_new0(struct vhost_inflight, 1);
+
     s->dev.nvqs = s->num_queues;
     s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs);
     s->dev.vq_index = 0;
@@ -297,7 +315,7 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
 
     vhost_dev_set_config_notifier(&s->dev, &blk_ops);
 
-    ret = vhost_dev_init(&s->dev, s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
+    ret = vhost_dev_init(&s->dev, &s->vhost_user, VHOST_BACKEND_TYPE_USER, 0);
     if (ret < 0) {
         error_setg(errp, "vhost-user-blk: vhost initialization failed: %s",
                    strerror(-ret));
@@ -321,11 +339,9 @@ vhost_err:
     vhost_dev_cleanup(&s->dev);
 virtio_err:
     g_free(vqs);
+    g_free(s->inflight);
     virtio_cleanup(vdev);
-
-    vhost_user_cleanup(user);
-    g_free(user);
-    s->vhost_user = NULL;
+    vhost_user_cleanup(&s->vhost_user);
 }
 
 static void vhost_user_blk_device_unrealize(DeviceState *dev, Error **errp)
@@ -336,14 +352,11 @@ static void vhost_user_blk_device_unrealize(DeviceState *dev, Error **errp)
 
     vhost_user_blk_set_status(vdev, 0);
     vhost_dev_cleanup(&s->dev);
+    vhost_dev_free_inflight(s->inflight);
     g_free(vqs);
+    g_free(s->inflight);
     virtio_cleanup(vdev);
-
-    if (s->vhost_user) {
-        vhost_user_cleanup(s->vhost_user);
-        g_free(s->vhost_user);
-        s->vhost_user = NULL;
-    }
+    vhost_user_cleanup(&s->vhost_user);
 }
 
 static void vhost_user_blk_instance_init(Object *obj)
@@ -386,6 +399,7 @@ static void vhost_user_blk_class_init(ObjectClass *klass, void *data)
     vdc->set_config = vhost_user_blk_set_config;
     vdc->get_features = vhost_user_blk_get_features;
     vdc->set_status = vhost_user_blk_set_status;
+    vdc->reset = vhost_user_blk_reset;
 }
 
 static const TypeInfo vhost_user_blk_info = {
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index ee22e754f0..b90de6c664 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -37,6 +37,27 @@
 #include "kvm_i386.h"
 #include "trace.h"
 
+/* context entry operations */
+#define VTD_CE_GET_RID2PASID(ce) \
+    ((ce)->val[1] & VTD_SM_CONTEXT_ENTRY_RID2PASID_MASK)
+#define VTD_CE_GET_PASID_DIR_TABLE(ce) \
+    ((ce)->val[0] & VTD_PASID_DIR_BASE_ADDR_MASK)
+
+/* pe operations */
+#define VTD_PE_GET_TYPE(pe) ((pe)->val[0] & VTD_SM_PASID_ENTRY_PGTT)
+#define VTD_PE_GET_LEVEL(pe) (2 + (((pe)->val[0] >> 2) & VTD_SM_PASID_ENTRY_AW))
+#define VTD_PE_GET_FPD_ERR(ret_fr, is_fpd_set, s, source_id, addr, is_write) {\
+    if (ret_fr) {                                                             \
+        ret_fr = -ret_fr;                                                     \
+        if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {                   \
+            trace_vtd_fault_disabled();                                       \
+        } else {                                                              \
+            vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write);      \
+        }                                                                     \
+        goto error;                                                           \
+    }                                                                         \
+}
+
 static void vtd_address_space_refresh_all(IntelIOMMUState *s);
 static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
 
@@ -512,9 +533,15 @@ static void vtd_generate_completion_event(IntelIOMMUState *s)
     }
 }
 
-static inline bool vtd_root_entry_present(VTDRootEntry *root)
+static inline bool vtd_root_entry_present(IntelIOMMUState *s,
+                                          VTDRootEntry *re,
+                                          uint8_t devfn)
 {
-    return root->val & VTD_ROOT_ENTRY_P;
+    if (s->root_scalable && devfn > UINT8_MAX / 2) {
+        return re->hi & VTD_ROOT_ENTRY_P;
+    }
+
+    return re->lo & VTD_ROOT_ENTRY_P;
 }
 
 static int vtd_get_root_entry(IntelIOMMUState *s, uint8_t index,
@@ -524,10 +551,11 @@ static int vtd_get_root_entry(IntelIOMMUState *s, uint8_t index,
 
     addr = s->root + index * sizeof(*re);
     if (dma_memory_read(&address_space_memory, addr, re, sizeof(*re))) {
-        re->val = 0;
+        re->lo = 0;
         return -VTD_FR_ROOT_TABLE_INV;
     }
-    re->val = le64_to_cpu(re->val);
+    re->lo = le64_to_cpu(re->lo);
+    re->hi = le64_to_cpu(re->hi);
     return 0;
 }
 
@@ -536,18 +564,35 @@ static inline bool vtd_ce_present(VTDContextEntry *context)
     return context->lo & VTD_CONTEXT_ENTRY_P;
 }
 
-static int vtd_get_context_entry_from_root(VTDRootEntry *root, uint8_t index,
+static int vtd_get_context_entry_from_root(IntelIOMMUState *s,
+                                           VTDRootEntry *re,
+                                           uint8_t index,
                                            VTDContextEntry *ce)
 {
-    dma_addr_t addr;
+    dma_addr_t addr, ce_size;
 
     /* we have checked that root entry is present */
-    addr = (root->val & VTD_ROOT_ENTRY_CTP) + index * sizeof(*ce);
-    if (dma_memory_read(&address_space_memory, addr, ce, sizeof(*ce))) {
+    ce_size = s->root_scalable ? VTD_CTX_ENTRY_SCALABLE_SIZE :
+              VTD_CTX_ENTRY_LEGACY_SIZE;
+
+    if (s->root_scalable && index > UINT8_MAX / 2) {
+        index = index & (~VTD_DEVFN_CHECK_MASK);
+        addr = re->hi & VTD_ROOT_ENTRY_CTP;
+    } else {
+        addr = re->lo & VTD_ROOT_ENTRY_CTP;
+    }
+
+    addr = addr + index * ce_size;
+    if (dma_memory_read(&address_space_memory, addr, ce, ce_size)) {
         return -VTD_FR_CONTEXT_TABLE_INV;
     }
+
     ce->lo = le64_to_cpu(ce->lo);
     ce->hi = le64_to_cpu(ce->hi);
+    if (ce_size == VTD_CTX_ENTRY_SCALABLE_SIZE) {
+        ce->val[2] = le64_to_cpu(ce->val[2]);
+        ce->val[3] = le64_to_cpu(ce->val[3]);
+    }
     return 0;
 }
 
@@ -600,6 +645,144 @@ static inline bool vtd_is_level_supported(IntelIOMMUState *s, uint32_t level)
            (1ULL << (level - 2 + VTD_CAP_SAGAW_SHIFT));
 }
 
+/* Return true if check passed, otherwise false */
+static inline bool vtd_pe_type_check(X86IOMMUState *x86_iommu,
+                                     VTDPASIDEntry *pe)
+{
+    switch (VTD_PE_GET_TYPE(pe)) {
+    case VTD_SM_PASID_ENTRY_FLT:
+    case VTD_SM_PASID_ENTRY_SLT:
+    case VTD_SM_PASID_ENTRY_NESTED:
+        break;
+    case VTD_SM_PASID_ENTRY_PT:
+        if (!x86_iommu->pt_supported) {
+            return false;
+        }
+        break;
+    default:
+        /* Unknwon type */
+        return false;
+    }
+    return true;
+}
+
+static int vtd_get_pasid_dire(dma_addr_t pasid_dir_base,
+                              uint32_t pasid,
+                              VTDPASIDDirEntry *pdire)
+{
+    uint32_t index;
+    dma_addr_t addr, entry_size;
+
+    index = VTD_PASID_DIR_INDEX(pasid);
+    entry_size = VTD_PASID_DIR_ENTRY_SIZE;
+    addr = pasid_dir_base + index * entry_size;
+    if (dma_memory_read(&address_space_memory, addr, pdire, entry_size)) {
+        return -VTD_FR_PASID_TABLE_INV;
+    }
+
+    return 0;
+}
+
+static int vtd_get_pasid_entry(IntelIOMMUState *s,
+                               uint32_t pasid,
+                               VTDPASIDDirEntry *pdire,
+                               VTDPASIDEntry *pe)
+{
+    uint32_t index;
+    dma_addr_t addr, entry_size;
+    X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
+
+    index = VTD_PASID_TABLE_INDEX(pasid);
+    entry_size = VTD_PASID_ENTRY_SIZE;
+    addr = pdire->val & VTD_PASID_TABLE_BASE_ADDR_MASK;
+    addr = addr + index * entry_size;
+    if (dma_memory_read(&address_space_memory, addr, pe, entry_size)) {
+        return -VTD_FR_PASID_TABLE_INV;
+    }
+
+    /* Do translation type check */
+    if (!vtd_pe_type_check(x86_iommu, pe)) {
+        return -VTD_FR_PASID_TABLE_INV;
+    }
+
+    if (!vtd_is_level_supported(s, VTD_PE_GET_LEVEL(pe))) {
+        return -VTD_FR_PASID_TABLE_INV;
+    }
+
+    return 0;
+}
+
+static int vtd_get_pasid_entry_from_pasid(IntelIOMMUState *s,
+                                          dma_addr_t pasid_dir_base,
+                                          uint32_t pasid,
+                                          VTDPASIDEntry *pe)
+{
+    int ret;
+    VTDPASIDDirEntry pdire;
+
+    ret = vtd_get_pasid_dire(pasid_dir_base, pasid, &pdire);
+    if (ret) {
+        return ret;
+    }
+
+    ret = vtd_get_pasid_entry(s, pasid, &pdire, pe);
+    if (ret) {
+        return ret;
+    }
+
+    return ret;
+}
+
+static int vtd_ce_get_rid2pasid_entry(IntelIOMMUState *s,
+                                      VTDContextEntry *ce,
+                                      VTDPASIDEntry *pe)
+{
+    uint32_t pasid;
+    dma_addr_t pasid_dir_base;
+    int ret = 0;
+
+    pasid = VTD_CE_GET_RID2PASID(ce);
+    pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce);
+    ret = vtd_get_pasid_entry_from_pasid(s, pasid_dir_base, pasid, pe);
+
+    return ret;
+}
+
+static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s,
+                                VTDContextEntry *ce,
+                                bool *pe_fpd_set)
+{
+    int ret;
+    uint32_t pasid;
+    dma_addr_t pasid_dir_base;
+    VTDPASIDDirEntry pdire;
+    VTDPASIDEntry pe;
+
+    pasid = VTD_CE_GET_RID2PASID(ce);
+    pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce);
+
+    ret = vtd_get_pasid_dire(pasid_dir_base, pasid, &pdire);
+    if (ret) {
+        return ret;
+    }
+
+    if (pdire.val & VTD_PASID_DIR_FPD) {
+        *pe_fpd_set = true;
+        return 0;
+    }
+
+    ret = vtd_get_pasid_entry(s, pasid, &pdire, &pe);
+    if (ret) {
+        return ret;
+    }
+
+    if (pe.val[0] & VTD_PASID_ENTRY_FPD) {
+        *pe_fpd_set = true;
+    }
+
+    return 0;
+}
+
 /* Get the page-table level that hardware should use for the second-level
  * page-table walk from the Address Width field of context-entry.
  */
@@ -608,17 +791,43 @@ static inline uint32_t vtd_ce_get_level(VTDContextEntry *ce)
     return 2 + (ce->hi & VTD_CONTEXT_ENTRY_AW);
 }
 
+static uint32_t vtd_get_iova_level(IntelIOMMUState *s,
+                                   VTDContextEntry *ce)
+{
+    VTDPASIDEntry pe;
+
+    if (s->root_scalable) {
+        vtd_ce_get_rid2pasid_entry(s, ce, &pe);
+        return VTD_PE_GET_LEVEL(&pe);
+    }
+
+    return vtd_ce_get_level(ce);
+}
+
 static inline uint32_t vtd_ce_get_agaw(VTDContextEntry *ce)
 {
     return 30 + (ce->hi & VTD_CONTEXT_ENTRY_AW) * 9;
 }
 
+static uint32_t vtd_get_iova_agaw(IntelIOMMUState *s,
+                                  VTDContextEntry *ce)
+{
+    VTDPASIDEntry pe;
+
+    if (s->root_scalable) {
+        vtd_ce_get_rid2pasid_entry(s, ce, &pe);
+        return 30 + ((pe.val[0] >> 2) & VTD_SM_PASID_ENTRY_AW) * 9;
+    }
+
+    return vtd_ce_get_agaw(ce);
+}
+
 static inline uint32_t vtd_ce_get_type(VTDContextEntry *ce)
 {
     return ce->lo & VTD_CONTEXT_ENTRY_TT;
 }
 
-/* Return true if check passed, otherwise false */
+/* Only for Legacy Mode. Return true if check passed, otherwise false */
 static inline bool vtd_ce_type_check(X86IOMMUState *x86_iommu,
                                      VTDContextEntry *ce)
 {
@@ -639,7 +848,7 @@ static inline bool vtd_ce_type_check(X86IOMMUState *x86_iommu,
         }
         break;
     default:
-        /* Unknwon type */
+        /* Unknown type */
         error_report_once("%s: unknown ce type: %"PRIu32, __func__,
                           vtd_ce_get_type(ce));
         return false;
@@ -647,21 +856,36 @@ static inline bool vtd_ce_type_check(X86IOMMUState *x86_iommu,
     return true;
 }
 
-static inline uint64_t vtd_iova_limit(VTDContextEntry *ce, uint8_t aw)
+static inline uint64_t vtd_iova_limit(IntelIOMMUState *s,
+                                      VTDContextEntry *ce, uint8_t aw)
 {
-    uint32_t ce_agaw = vtd_ce_get_agaw(ce);
+    uint32_t ce_agaw = vtd_get_iova_agaw(s, ce);
     return 1ULL << MIN(ce_agaw, aw);
 }
 
 /* Return true if IOVA passes range check, otherwise false. */
-static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce,
+static inline bool vtd_iova_range_check(IntelIOMMUState *s,
+                                        uint64_t iova, VTDContextEntry *ce,
                                         uint8_t aw)
 {
     /*
      * Check if @iova is above 2^X-1, where X is the minimum of MGAW
      * in CAP_REG and AW in context-entry.
      */
-    return !(iova & ~(vtd_iova_limit(ce, aw) - 1));
+    return !(iova & ~(vtd_iova_limit(s, ce, aw) - 1));
+}
+
+static dma_addr_t vtd_get_iova_pgtbl_base(IntelIOMMUState *s,
+                                          VTDContextEntry *ce)
+{
+    VTDPASIDEntry pe;
+
+    if (s->root_scalable) {
+        vtd_ce_get_rid2pasid_entry(s, ce, &pe);
+        return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR;
+    }
+
+    return vtd_ce_get_slpt_base(ce);
 }
 
 /*
@@ -707,17 +931,18 @@ static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num)
 /* Given the @iova, get relevant @slptep. @slpte_level will be the last level
  * of the translation, can be used for deciding the size of large page.
  */
-static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
+static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce,
+                             uint64_t iova, bool is_write,
                              uint64_t *slptep, uint32_t *slpte_level,
                              bool *reads, bool *writes, uint8_t aw_bits)
 {
-    dma_addr_t addr = vtd_ce_get_slpt_base(ce);
-    uint32_t level = vtd_ce_get_level(ce);
+    dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce);
+    uint32_t level = vtd_get_iova_level(s, ce);
     uint32_t offset;
     uint64_t slpte;
     uint64_t access_right_check;
 
-    if (!vtd_iova_range_check(iova, ce, aw_bits)) {
+    if (!vtd_iova_range_check(s, iova, ce, aw_bits)) {
         error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 ")",
                           __func__, iova);
         return -VTD_FR_ADDR_BEYOND_MGAW;
@@ -733,7 +958,7 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
         if (slpte == (uint64_t)-1) {
             error_report_once("%s: detected read error on DMAR slpte "
                               "(iova=0x%" PRIx64 ")", __func__, iova);
-            if (level == vtd_ce_get_level(ce)) {
+            if (level == vtd_get_iova_level(s, ce)) {
                 /* Invalid programming of context-entry */
                 return -VTD_FR_CONTEXT_ENTRY_INV;
             } else {
@@ -962,29 +1187,96 @@ next:
 /**
  * vtd_page_walk - walk specific IOVA range, and call the hook
  *
+ * @s: intel iommu state
  * @ce: context entry to walk upon
  * @start: IOVA address to start the walk
  * @end: IOVA range end address (start <= addr < end)
  * @info: page walking information struct
  */
-static int vtd_page_walk(VTDContextEntry *ce, uint64_t start, uint64_t end,
+static int vtd_page_walk(IntelIOMMUState *s, VTDContextEntry *ce,
+                         uint64_t start, uint64_t end,
                          vtd_page_walk_info *info)
 {
-    dma_addr_t addr = vtd_ce_get_slpt_base(ce);
-    uint32_t level = vtd_ce_get_level(ce);
+    dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce);
+    uint32_t level = vtd_get_iova_level(s, ce);
 
-    if (!vtd_iova_range_check(start, ce, info->aw)) {
+    if (!vtd_iova_range_check(s, start, ce, info->aw)) {
         return -VTD_FR_ADDR_BEYOND_MGAW;
     }
 
-    if (!vtd_iova_range_check(end, ce, info->aw)) {
+    if (!vtd_iova_range_check(s, end, ce, info->aw)) {
         /* Fix end so that it reaches the maximum */
-        end = vtd_iova_limit(ce, info->aw);
+        end = vtd_iova_limit(s, ce, info->aw);
     }
 
     return vtd_page_walk_level(addr, start, end, level, true, true, info);
 }
 
+static int vtd_root_entry_rsvd_bits_check(IntelIOMMUState *s,
+                                          VTDRootEntry *re)
+{
+    /* Legacy Mode reserved bits check */
+    if (!s->root_scalable &&
+        (re->hi || (re->lo & VTD_ROOT_ENTRY_RSVD(s->aw_bits))))
+        goto rsvd_err;
+
+    /* Scalable Mode reserved bits check */
+    if (s->root_scalable &&
+        ((re->lo & VTD_ROOT_ENTRY_RSVD(s->aw_bits)) ||
+         (re->hi & VTD_ROOT_ENTRY_RSVD(s->aw_bits))))
+        goto rsvd_err;
+
+    return 0;
+
+rsvd_err:
+    error_report_once("%s: invalid root entry: hi=0x%"PRIx64
+                      ", lo=0x%"PRIx64,
+                      __func__, re->hi, re->lo);
+    return -VTD_FR_ROOT_ENTRY_RSVD;
+}
+
+static inline int vtd_context_entry_rsvd_bits_check(IntelIOMMUState *s,
+                                                    VTDContextEntry *ce)
+{
+    if (!s->root_scalable &&
+        (ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI ||
+         ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO(s->aw_bits))) {
+        error_report_once("%s: invalid context entry: hi=%"PRIx64
+                          ", lo=%"PRIx64" (reserved nonzero)",
+                          __func__, ce->hi, ce->lo);
+        return -VTD_FR_CONTEXT_ENTRY_RSVD;
+    }
+
+    if (s->root_scalable &&
+        (ce->val[0] & VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(s->aw_bits) ||
+         ce->val[1] & VTD_SM_CONTEXT_ENTRY_RSVD_VAL1 ||
+         ce->val[2] ||
+         ce->val[3])) {
+        error_report_once("%s: invalid context entry: val[3]=%"PRIx64
+                          ", val[2]=%"PRIx64
+                          ", val[1]=%"PRIx64
+                          ", val[0]=%"PRIx64" (reserved nonzero)",
+                          __func__, ce->val[3], ce->val[2],
+                          ce->val[1], ce->val[0]);
+        return -VTD_FR_CONTEXT_ENTRY_RSVD;
+    }
+
+    return 0;
+}
+
+static int vtd_ce_rid2pasid_check(IntelIOMMUState *s,
+                                  VTDContextEntry *ce)
+{
+    VTDPASIDEntry pe;
+
+    /*
+     * Make sure in Scalable Mode, a present context entry
+     * has valid rid2pasid setting, which includes valid
+     * rid2pasid field and corresponding pasid entry setting
+     */
+    return vtd_ce_get_rid2pasid_entry(s, ce, &pe);
+}
+
 /* Map a device to its corresponding domain (context-entry) */
 static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
                                     uint8_t devfn, VTDContextEntry *ce)
@@ -998,20 +1290,18 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
         return ret_fr;
     }
 
-    if (!vtd_root_entry_present(&re)) {
+    if (!vtd_root_entry_present(s, &re, devfn)) {
         /* Not error - it's okay we don't have root entry. */
         trace_vtd_re_not_present(bus_num);
         return -VTD_FR_ROOT_ENTRY_P;
     }
 
-    if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD(s->aw_bits))) {
-        error_report_once("%s: invalid root entry: rsvd=0x%"PRIx64
-                          ", val=0x%"PRIx64" (reserved nonzero)",
-                          __func__, re.rsvd, re.val);
-        return -VTD_FR_ROOT_ENTRY_RSVD;
+    ret_fr = vtd_root_entry_rsvd_bits_check(s, &re);
+    if (ret_fr) {
+        return ret_fr;
     }
 
-    ret_fr = vtd_get_context_entry_from_root(&re, devfn, ce);
+    ret_fr = vtd_get_context_entry_from_root(s, &re, devfn, ce);
     if (ret_fr) {
         return ret_fr;
     }
@@ -1022,26 +1312,38 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
         return -VTD_FR_CONTEXT_ENTRY_P;
     }
 
-    if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) ||
-               (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO(s->aw_bits))) {
-        error_report_once("%s: invalid context entry: hi=%"PRIx64
-                          ", lo=%"PRIx64" (reserved nonzero)",
-                          __func__, ce->hi, ce->lo);
-        return -VTD_FR_CONTEXT_ENTRY_RSVD;
+    ret_fr = vtd_context_entry_rsvd_bits_check(s, ce);
+    if (ret_fr) {
+        return ret_fr;
     }
 
     /* Check if the programming of context-entry is valid */
-    if (!vtd_is_level_supported(s, vtd_ce_get_level(ce))) {
+    if (!s->root_scalable &&
+        !vtd_is_level_supported(s, vtd_ce_get_level(ce))) {
         error_report_once("%s: invalid context entry: hi=%"PRIx64
                           ", lo=%"PRIx64" (level %d not supported)",
-                          __func__, ce->hi, ce->lo, vtd_ce_get_level(ce));
+                          __func__, ce->hi, ce->lo,
+                          vtd_ce_get_level(ce));
         return -VTD_FR_CONTEXT_ENTRY_INV;
     }
 
-    /* Do translation type check */
-    if (!vtd_ce_type_check(x86_iommu, ce)) {
-        /* Errors dumped in vtd_ce_type_check() */
-        return -VTD_FR_CONTEXT_ENTRY_INV;
+    if (!s->root_scalable) {
+        /* Do translation type check */
+        if (!vtd_ce_type_check(x86_iommu, ce)) {
+            /* Errors dumped in vtd_ce_type_check() */
+            return -VTD_FR_CONTEXT_ENTRY_INV;
+        }
+    } else {
+        /*
+         * Check if the programming of context-entry.rid2pasid
+         * and corresponding pasid setting is valid, and thus
+         * avoids to check pasid entry fetching result in future
+         * helper function calling.
+         */
+        ret_fr = vtd_ce_rid2pasid_check(s, ce);
+        if (ret_fr) {
+            return ret_fr;
+        }
     }
 
     return 0;
@@ -1054,6 +1356,19 @@ static int vtd_sync_shadow_page_hook(IOMMUTLBEntry *entry,
     return 0;
 }
 
+static uint16_t vtd_get_domain_id(IntelIOMMUState *s,
+                                  VTDContextEntry *ce)
+{
+    VTDPASIDEntry pe;
+
+    if (s->root_scalable) {
+        vtd_ce_get_rid2pasid_entry(s, ce, &pe);
+        return VTD_SM_PASID_ENTRY_DID(pe.val[1]);
+    }
+
+    return VTD_CONTEXT_ENTRY_DID(ce->hi);
+}
+
 static int vtd_sync_shadow_page_table_range(VTDAddressSpace *vtd_as,
                                             VTDContextEntry *ce,
                                             hwaddr addr, hwaddr size)
@@ -1065,10 +1380,10 @@ static int vtd_sync_shadow_page_table_range(VTDAddressSpace *vtd_as,
         .notify_unmap = true,
         .aw = s->aw_bits,
         .as = vtd_as,
-        .domain_id = VTD_CONTEXT_ENTRY_DID(ce->hi),
+        .domain_id = vtd_get_domain_id(s, ce),
     };
 
-    return vtd_page_walk(ce, addr, addr + size, &info);
+    return vtd_page_walk(s, ce, addr, addr + size, &info);
 }
 
 static int vtd_sync_shadow_page_table(VTDAddressSpace *vtd_as)
@@ -1103,35 +1418,24 @@ static int vtd_sync_shadow_page_table(VTDAddressSpace *vtd_as)
 }
 
 /*
- * Fetch translation type for specific device. Returns <0 if error
- * happens, otherwise return the shifted type to check against
- * VTD_CONTEXT_TT_*.
+ * Check if specific device is configed to bypass address
+ * translation for DMA requests. In Scalable Mode, bypass
+ * 1st-level translation or 2nd-level translation, it depends
+ * on PGTT setting.
  */
-static int vtd_dev_get_trans_type(VTDAddressSpace *as)
+static bool vtd_dev_pt_enabled(VTDAddressSpace *as)
 {
     IntelIOMMUState *s;
     VTDContextEntry ce;
+    VTDPASIDEntry pe;
     int ret;
 
-    s = as->iommu_state;
+    assert(as);
 
+    s = as->iommu_state;
     ret = vtd_dev_to_context_entry(s, pci_bus_num(as->bus),
                                    as->devfn, &ce);
     if (ret) {
-        return ret;
-    }
-
-    return vtd_ce_get_type(&ce);
-}
-
-static bool vtd_dev_pt_enabled(VTDAddressSpace *as)
-{
-    int ret;
-
-    assert(as);
-
-    ret = vtd_dev_get_trans_type(as);
-    if (ret < 0) {
         /*
          * Possibly failed to parse the context entry for some reason
          * (e.g., during init, or any guest configuration errors on
@@ -1141,7 +1445,17 @@ static bool vtd_dev_pt_enabled(VTDAddressSpace *as)
         return false;
     }
 
-    return ret == VTD_CONTEXT_TT_PASS_THROUGH;
+    if (s->root_scalable) {
+        ret = vtd_ce_get_rid2pasid_entry(s, &ce, &pe);
+        if (ret) {
+            error_report_once("%s: vtd_ce_get_rid2pasid_entry error: %"PRId32,
+                              __func__, ret);
+            return false;
+        }
+        return (VTD_PE_GET_TYPE(&pe) == VTD_SM_PASID_ENTRY_PT);
+    }
+
+    return (vtd_ce_get_type(&ce) == VTD_CONTEXT_TT_PASS_THROUGH);
 }
 
 /* Return whether the device is using IOMMU translation. */
@@ -1221,6 +1535,7 @@ static const bool vtd_qualified_faults[] = {
     [VTD_FR_ROOT_ENTRY_RSVD] = false,
     [VTD_FR_PAGING_ENTRY_RSVD] = true,
     [VTD_FR_CONTEXT_ENTRY_TT] = true,
+    [VTD_FR_PASID_TABLE_INV] = false,
     [VTD_FR_RESERVED_ERR] = false,
     [VTD_FR_MAX] = false,
 };
@@ -1322,18 +1637,17 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
                                cc_entry->context_cache_gen);
         ce = cc_entry->context_entry;
         is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
+        if (!is_fpd_set && s->root_scalable) {
+            ret_fr = vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set);
+            VTD_PE_GET_FPD_ERR(ret_fr, is_fpd_set, s, source_id, addr, is_write);
+        }
     } else {
         ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, &ce);
         is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
-        if (ret_fr) {
-            ret_fr = -ret_fr;
-            if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
-                trace_vtd_fault_disabled();
-            } else {
-                vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write);
-            }
-            goto error;
+        if (!ret_fr && !is_fpd_set && s->root_scalable) {
+            ret_fr = vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set);
         }
+        VTD_PE_GET_FPD_ERR(ret_fr, is_fpd_set, s, source_id, addr, is_write);
         /* Update context-cache */
         trace_vtd_iotlb_cc_update(bus_num, devfn, ce.hi, ce.lo,
                                   cc_entry->context_cache_gen,
@@ -1367,21 +1681,13 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
         return true;
     }
 
-    ret_fr = vtd_iova_to_slpte(&ce, addr, is_write, &slpte, &level,
+    ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &slpte, &level,
                                &reads, &writes, s->aw_bits);
-    if (ret_fr) {
-        ret_fr = -ret_fr;
-        if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
-            trace_vtd_fault_disabled();
-        } else {
-            vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write);
-        }
-        goto error;
-    }
+    VTD_PE_GET_FPD_ERR(ret_fr, is_fpd_set, s, source_id, addr, is_write);
 
     page_mask = vtd_slpt_level_page_mask(level);
     access_flags = IOMMU_ACCESS_FLAG(reads, writes);
-    vtd_update_iotlb(s, source_id, VTD_CONTEXT_ENTRY_DID(ce.hi), addr, slpte,
+    vtd_update_iotlb(s, source_id, vtd_get_domain_id(s, &ce), addr, slpte,
                      access_flags, level);
 out:
     vtd_iommu_unlock(s);
@@ -1404,6 +1710,9 @@ static void vtd_root_table_setup(IntelIOMMUState *s)
 {
     s->root = vtd_get_quad_raw(s, DMAR_RTADDR_REG);
     s->root_extended = s->root & VTD_RTADDR_RTT;
+    if (s->scalable_mode) {
+        s->root_scalable = s->root & VTD_RTADDR_SMT;
+    }
     s->root &= VTD_RTADDR_ADDR_MASK(s->aw_bits);
 
     trace_vtd_reg_dmar_root(s->root, s->root_extended);
@@ -1573,7 +1882,7 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
     QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
         if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
                                       vtd_as->devfn, &ce) &&
-            domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) {
+            domain_id == vtd_get_domain_id(s, &ce)) {
             vtd_sync_shadow_page_table(vtd_as);
         }
     }
@@ -1591,7 +1900,7 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
     QLIST_FOREACH(vtd_as, &(s->vtd_as_with_notifiers), next) {
         ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
                                        vtd_as->devfn, &ce);
-        if (!ret && domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) {
+        if (!ret && domain_id == vtd_get_domain_id(s, &ce)) {
             if (vtd_as_has_map_notifier(vtd_as)) {
                 /*
                  * As long as we have MAP notifications registered in
@@ -1699,7 +2008,7 @@ static void vtd_handle_gcmd_qie(IntelIOMMUState *s, bool en)
     if (en) {
         s->iq = iqa_val & VTD_IQA_IQA_MASK(s->aw_bits);
         /* 2^(x+8) entries */
-        s->iq_size = 1UL << ((iqa_val & VTD_IQA_QS) + 8);
+        s->iq_size = 1UL << ((iqa_val & VTD_IQA_QS) + 8 - (s->iq_dw ? 1 : 0));
         s->qi_enabled = true;
         trace_vtd_inv_qi_setup(s->iq, s->iq_size);
         /* Ok - report back to driver */
@@ -1866,19 +2175,24 @@ static void vtd_handle_iotlb_write(IntelIOMMUState *s)
 }
 
 /* Fetch an Invalidation Descriptor from the Invalidation Queue */
-static bool vtd_get_inv_desc(dma_addr_t base_addr, uint32_t offset,
+static bool vtd_get_inv_desc(IntelIOMMUState *s,
                              VTDInvDesc *inv_desc)
 {
-    dma_addr_t addr = base_addr + offset * sizeof(*inv_desc);
-    if (dma_memory_read(&address_space_memory, addr, inv_desc,
-        sizeof(*inv_desc))) {
-        error_report_once("Read INV DESC failed");
-        inv_desc->lo = 0;
-        inv_desc->hi = 0;
+    dma_addr_t base_addr = s->iq;
+    uint32_t offset = s->iq_head;
+    uint32_t dw = s->iq_dw ? 32 : 16;
+    dma_addr_t addr = base_addr + offset * dw;
+
+    if (dma_memory_read(&address_space_memory, addr, inv_desc, dw)) {
+        error_report_once("Read INV DESC failed.");
         return false;
     }
     inv_desc->lo = le64_to_cpu(inv_desc->lo);
     inv_desc->hi = le64_to_cpu(inv_desc->hi);
+    if (dw == 32) {
+        inv_desc->val[2] = le64_to_cpu(inv_desc->val[2]);
+        inv_desc->val[3] = le64_to_cpu(inv_desc->val[3]);
+    }
     return true;
 }
 
@@ -2084,10 +2398,11 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
     uint8_t desc_type;
 
     trace_vtd_inv_qi_head(s->iq_head);
-    if (!vtd_get_inv_desc(s->iq, s->iq_head, &inv_desc)) {
+    if (!vtd_get_inv_desc(s, &inv_desc)) {
         s->iq_last_desc_type = VTD_INV_DESC_NONE;
         return false;
     }
+
     desc_type = inv_desc.lo & VTD_INV_DESC_TYPE;
     /* FIXME: should update at first or at last? */
     s->iq_last_desc_type = desc_type;
@@ -2107,6 +2422,17 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
         }
         break;
 
+    /*
+     * TODO: the entity of below two cases will be implemented in future series.
+     * To make guest (which integrates scalable mode support patch set in
+     * iommu driver) work, just return true is enough so far.
+     */
+    case VTD_INV_DESC_PC:
+        break;
+
+    case VTD_INV_DESC_PIOTLB:
+        break;
+
     case VTD_INV_DESC_WAIT:
         trace_vtd_inv_desc("wait", inv_desc.hi, inv_desc.lo);
         if (!vtd_process_wait_desc(s, &inv_desc)) {
@@ -2172,7 +2498,12 @@ static void vtd_handle_iqt_write(IntelIOMMUState *s)
 {
     uint64_t val = vtd_get_quad_raw(s, DMAR_IQT_REG);
 
-    s->iq_tail = VTD_IQT_QT(val);
+    if (s->iq_dw && (val & VTD_IQT_QT_256_RSV_BIT)) {
+        error_report_once("%s: RSV bit is set: val=0x%"PRIx64,
+                          __func__, val);
+        return;
+    }
+    s->iq_tail = VTD_IQT_QT(s->iq_dw, val);
     trace_vtd_inv_qi_tail(s->iq_tail);
 
     if (s->qi_enabled && !(vtd_get_long_raw(s, DMAR_FSTS_REG) & VTD_FSTS_IQE)) {
@@ -2441,6 +2772,12 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
         } else {
             vtd_set_quad(s, addr, val);
         }
+        if (s->ecap & VTD_ECAP_SMTS &&
+            val & VTD_IQA_DW_MASK) {
+            s->iq_dw = true;
+        } else {
+            s->iq_dw = false;
+        }
         break;
 
     case DMAR_IQA_REG_HI:
@@ -2629,6 +2966,7 @@ static const VMStateDescription vtd_vmstate = {
         VMSTATE_UINT8_ARRAY(csr, IntelIOMMUState, DMAR_REG_SIZE),
         VMSTATE_UINT8(iq_last_desc_type, IntelIOMMUState),
         VMSTATE_BOOL(root_extended, IntelIOMMUState),
+        VMSTATE_BOOL(root_scalable, IntelIOMMUState),
         VMSTATE_BOOL(dmar_enabled, IntelIOMMUState),
         VMSTATE_BOOL(qi_enabled, IntelIOMMUState),
         VMSTATE_BOOL(intr_enabled, IntelIOMMUState),
@@ -2659,6 +2997,7 @@ static Property vtd_properties[] = {
     DEFINE_PROP_UINT8("aw-bits", IntelIOMMUState, aw_bits,
                       VTD_HOST_ADDRESS_WIDTH),
     DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE),
+    DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE),
     DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
     DEFINE_PROP_END_OF_LIST(),
 };
@@ -3098,9 +3437,11 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n)
     vtd_address_space_unmap(vtd_as, n);
 
     if (vtd_dev_to_context_entry(s, bus_n, vtd_as->devfn, &ce) == 0) {
-        trace_vtd_replay_ce_valid(bus_n, PCI_SLOT(vtd_as->devfn),
+        trace_vtd_replay_ce_valid(s->root_scalable ? "scalable mode" :
+                                  "legacy mode",
+                                  bus_n, PCI_SLOT(vtd_as->devfn),
                                   PCI_FUNC(vtd_as->devfn),
-                                  VTD_CONTEXT_ENTRY_DID(ce.hi),
+                                  vtd_get_domain_id(s, &ce),
                                   ce.hi, ce.lo);
         if (vtd_as_has_map_notifier(vtd_as)) {
             /* This is required only for MAP typed notifiers */
@@ -3110,10 +3451,10 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n)
                 .notify_unmap = false,
                 .aw = s->aw_bits,
                 .as = vtd_as,
-                .domain_id = VTD_CONTEXT_ENTRY_DID(ce.hi),
+                .domain_id = vtd_get_domain_id(s, &ce),
             };
 
-            vtd_page_walk(&ce, 0, ~0ULL, &info);
+            vtd_page_walk(s, &ce, 0, ~0ULL, &info);
         }
     } else {
         trace_vtd_replay_ce_invalid(bus_n, PCI_SLOT(vtd_as->devfn),
@@ -3137,6 +3478,7 @@ static void vtd_init(IntelIOMMUState *s)
 
     s->root = 0;
     s->root_extended = false;
+    s->root_scalable = false;
     s->dmar_enabled = false;
     s->intr_enabled = false;
     s->iq_head = 0;
@@ -3145,6 +3487,7 @@ static void vtd_init(IntelIOMMUState *s)
     s->iq_size = 0;
     s->qi_enabled = false;
     s->iq_last_desc_type = VTD_INV_DESC_NONE;
+    s->iq_dw = false;
     s->next_frcd_reg = 0;
     s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND |
              VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS |
@@ -3190,6 +3533,11 @@ static void vtd_init(IntelIOMMUState *s)
         s->cap |= VTD_CAP_CM;
     }
 
+    /* TODO: read cap/ecap from host to decide which cap to be exposed. */
+    if (s->scalable_mode) {
+        s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS;
+    }
+
     vtd_reset_caches(s);
 
     /* Define registers with default values and bit semantics */
@@ -3199,7 +3547,7 @@ static void vtd_init(IntelIOMMUState *s)
     vtd_define_long(s, DMAR_GCMD_REG, 0, 0xff800000UL, 0);
     vtd_define_long_wo(s, DMAR_GCMD_REG, 0xff800000UL);
     vtd_define_long(s, DMAR_GSTS_REG, 0, 0, 0);
-    vtd_define_quad(s, DMAR_RTADDR_REG, 0, 0xfffffffffffff000ULL, 0);
+    vtd_define_quad(s, DMAR_RTADDR_REG, 0, 0xfffffffffffffc00ULL, 0);
     vtd_define_quad(s, DMAR_CCMD_REG, 0, 0xe0000003ffffffffULL, 0);
     vtd_define_quad_wo(s, DMAR_CCMD_REG, 0x3ffff0000ULL);
 
@@ -3222,7 +3570,7 @@ static void vtd_init(IntelIOMMUState *s)
 
     vtd_define_quad(s, DMAR_IQH_REG, 0, 0, 0);
     vtd_define_quad(s, DMAR_IQT_REG, 0, 0x7fff0ULL, 0);
-    vtd_define_quad(s, DMAR_IQA_REG, 0, 0xfffffffffffff007ULL, 0);
+    vtd_define_quad(s, DMAR_IQA_REG, 0, 0xfffffffffffff807ULL, 0);
     vtd_define_long(s, DMAR_ICS_REG, 0, 0, 0x1UL);
     vtd_define_long(s, DMAR_IECTL_REG, 0x80000000UL, 0x80000000UL, 0);
     vtd_define_long(s, DMAR_IEDATA_REG, 0, 0xffffffffUL, 0);
@@ -3301,6 +3649,11 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
         return false;
     }
 
+    if (s->scalable_mode && !s->dma_drain) {
+        error_setg(errp, "Need to set dma_drain for scalable mode");
+        return false;
+    }
+
     return true;
 }
 
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 00e9edbc66..1160618177 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -172,6 +172,7 @@
 
 /* RTADDR_REG */
 #define VTD_RTADDR_RTT              (1ULL << 11)
+#define VTD_RTADDR_SMT              (1ULL << 10)
 #define VTD_RTADDR_ADDR_MASK(aw)    (VTD_HAW_MASK(aw) ^ 0xfffULL)
 
 /* IRTA_REG */
@@ -189,6 +190,9 @@
 #define VTD_ECAP_EIM                (1ULL << 4)
 #define VTD_ECAP_PT                 (1ULL << 6)
 #define VTD_ECAP_MHMV               (15ULL << 20)
+#define VTD_ECAP_SRS                (1ULL << 31)
+#define VTD_ECAP_SMTS               (1ULL << 43)
+#define VTD_ECAP_SLTS               (1ULL << 46)
 
 /* CAP_REG */
 /* (offset >> 4) << 24 */
@@ -217,11 +221,14 @@
 #define VTD_CAP_SAGAW_48bit         (0x4ULL << VTD_CAP_SAGAW_SHIFT)
 
 /* IQT_REG */
-#define VTD_IQT_QT(val)             (((val) >> 4) & 0x7fffULL)
+#define VTD_IQT_QT(dw_bit, val)     (dw_bit ? (((val) >> 5) & 0x3fffULL) : \
+                                     (((val) >> 4) & 0x7fffULL))
+#define VTD_IQT_QT_256_RSV_BIT      0x10
 
 /* IQA_REG */
 #define VTD_IQA_IQA_MASK(aw)        (VTD_HAW_MASK(aw) ^ 0xfffULL)
 #define VTD_IQA_QS                  0x7ULL
+#define VTD_IQA_DW_MASK             0x800
 
 /* IQH_REG */
 #define VTD_IQH_QH_SHIFT            4
@@ -294,6 +301,8 @@ typedef enum VTDFaultReason {
                                   * request while disabled */
     VTD_FR_IR_SID_ERR = 0x26,   /* Invalid Source-ID */
 
+    VTD_FR_PASID_TABLE_INV = 0x58,  /*Invalid PASID table entry */
+
     /* This is not a normal fault reason. We use this to indicate some faults
      * that are not referenced by the VT-d specification.
      * Fault event with such reason should not be recorded.
@@ -321,6 +330,9 @@ union VTDInvDesc {
         uint64_t lo;
         uint64_t hi;
     };
+    struct {
+        uint64_t val[4];
+    };
     union {
         VTDInvDescIEC iec;
     };
@@ -335,6 +347,8 @@ typedef union VTDInvDesc VTDInvDesc;
 #define VTD_INV_DESC_IEC                0x4 /* Interrupt Entry Cache
                                                Invalidate Descriptor */
 #define VTD_INV_DESC_WAIT               0x5 /* Invalidation Wait Descriptor */
+#define VTD_INV_DESC_PIOTLB             0x6 /* PASID-IOTLB Invalidate Desc */
+#define VTD_INV_DESC_PC                 0x7 /* PASID-cache Invalidate Desc */
 #define VTD_INV_DESC_NONE               0   /* Not an Invalidate Descriptor */
 
 /* Masks for Invalidation Wait Descriptor*/
@@ -411,8 +425,8 @@ typedef struct VTDIOTLBPageInvInfo VTDIOTLBPageInvInfo;
 #define VTD_PAGE_MASK_1G            (~((1ULL << VTD_PAGE_SHIFT_1G) - 1))
 
 struct VTDRootEntry {
-    uint64_t val;
-    uint64_t rsvd;
+    uint64_t lo;
+    uint64_t hi;
 };
 typedef struct VTDRootEntry VTDRootEntry;
 
@@ -423,6 +437,8 @@ typedef struct VTDRootEntry VTDRootEntry;
 #define VTD_ROOT_ENTRY_NR           (VTD_PAGE_SIZE / sizeof(VTDRootEntry))
 #define VTD_ROOT_ENTRY_RSVD(aw)     (0xffeULL | ~VTD_HAW_MASK(aw))
 
+#define VTD_DEVFN_CHECK_MASK        0x80
+
 /* Masks for struct VTDContextEntry */
 /* lo */
 #define VTD_CONTEXT_ENTRY_P         (1ULL << 0)
@@ -441,6 +457,38 @@ typedef struct VTDRootEntry VTDRootEntry;
 
 #define VTD_CONTEXT_ENTRY_NR        (VTD_PAGE_SIZE / sizeof(VTDContextEntry))
 
+#define VTD_CTX_ENTRY_LEGACY_SIZE     16
+#define VTD_CTX_ENTRY_SCALABLE_SIZE   32
+
+#define VTD_SM_CONTEXT_ENTRY_RID2PASID_MASK 0xfffff
+#define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw)  (0x1e0ULL | ~VTD_HAW_MASK(aw))
+#define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1      0xffffffffffe00000ULL
+
+/* PASID Table Related Definitions */
+#define VTD_PASID_DIR_BASE_ADDR_MASK  (~0xfffULL)
+#define VTD_PASID_TABLE_BASE_ADDR_MASK (~0xfffULL)
+#define VTD_PASID_DIR_ENTRY_SIZE      8
+#define VTD_PASID_ENTRY_SIZE          64
+#define VTD_PASID_DIR_BITS_MASK       (0x3fffULL)
+#define VTD_PASID_DIR_INDEX(pasid)    (((pasid) >> 6) & VTD_PASID_DIR_BITS_MASK)
+#define VTD_PASID_DIR_FPD             (1ULL << 1) /* Fault Processing Disable */
+#define VTD_PASID_TABLE_BITS_MASK     (0x3fULL)
+#define VTD_PASID_TABLE_INDEX(pasid)  ((pasid) & VTD_PASID_TABLE_BITS_MASK)
+#define VTD_PASID_ENTRY_FPD           (1ULL << 1) /* Fault Processing Disable */
+
+/* PASID Granular Translation Type Mask */
+#define VTD_SM_PASID_ENTRY_PGTT        (7ULL << 6)
+#define VTD_SM_PASID_ENTRY_FLT         (1ULL << 6)
+#define VTD_SM_PASID_ENTRY_SLT         (2ULL << 6)
+#define VTD_SM_PASID_ENTRY_NESTED      (3ULL << 6)
+#define VTD_SM_PASID_ENTRY_PT          (4ULL << 6)
+
+#define VTD_SM_PASID_ENTRY_AW          7ULL /* Adjusted guest-address-width */
+#define VTD_SM_PASID_ENTRY_DID(val)    ((val) & VTD_DOMAIN_ID_MASK)
+
+/* Second Level Page Translation Pointer*/
+#define VTD_SM_PASID_ENTRY_SLPTPTR     (~0xfffULL)
+
 /* Paging Structure common */
 #define VTD_SL_PT_PAGE_SIZE_MASK    (1ULL << 7)
 /* Bits to decide the offset for each level */
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 1cdaff5f4d..6077d27361 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -2090,6 +2090,8 @@ static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
         return;
     }
 
+    hotplug_handler_pre_plug(pcms->acpi_dev, dev, errp);
+
     if (is_nvdimm && !ms->nvdimms_state->is_enabled) {
         error_setg(errp, "nvdimm is not enabled: missing 'nvdimm' in '-M'");
         return;
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index 77244fc384..cae1b76fde 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -30,7 +30,7 @@ vtd_iotlb_cc_hit(uint8_t bus, uint8_t devfn, uint64_t high, uint64_t low, uint32
 vtd_iotlb_cc_update(uint8_t bus, uint8_t devfn, uint64_t high, uint64_t low, uint32_t gen1, uint32_t gen2) "IOTLB context update bus 0x%"PRIx8" devfn 0x%"PRIx8" high 0x%"PRIx64" low 0x%"PRIx64" gen %"PRIu32" -> gen %"PRIu32
 vtd_iotlb_reset(const char *reason) "IOTLB reset (reason: %s)"
 vtd_fault_disabled(void) "Fault processing disabled for context entry"
-vtd_replay_ce_valid(uint8_t bus, uint8_t dev, uint8_t fn, uint16_t domain, uint64_t hi, uint64_t lo) "replay valid context device %02"PRIx8":%02"PRIx8".%02"PRIx8" domain 0x%"PRIx16" hi 0x%"PRIx64" lo 0x%"PRIx64
+vtd_replay_ce_valid(const char *mode, uint8_t bus, uint8_t dev, uint8_t fn, uint16_t domain, uint64_t hi, uint64_t lo) "%s: replay valid context device %02"PRIx8":%02"PRIx8".%02"PRIx8" domain 0x%"PRIx16" hi 0x%"PRIx64" lo 0x%"PRIx64
 vtd_replay_ce_invalid(uint8_t bus, uint8_t dev, uint8_t fn) "replay invalid context device %02"PRIx8":%02"PRIx8".%02"PRIx8
 vtd_page_walk_level(uint64_t addr, uint32_t level, uint64_t start, uint64_t end) "walk (base=0x%"PRIx64", level=%"PRIu32") iova range 0x%"PRIx64" - 0x%"PRIx64
 vtd_page_walk_one(uint16_t domain, uint64_t iova, uint64_t gpa, uint64_t mask, int perm) "domain 0x%"PRIu16" iova 0x%"PRIx64" -> gpa 0x%"PRIx64" mask 0x%"PRIx64" perm %d"
diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c
index e692b9fdc1..ac44aa53be 100644
--- a/hw/isa/lpc_ich9.c
+++ b/hw/isa/lpc_ich9.c
@@ -805,6 +805,7 @@ static void ich9_lpc_class_init(ObjectClass *klass, void *data)
      * pc_q35_init()
      */
     dc->user_creatable = false;
+    hc->pre_plug = ich9_pm_device_pre_plug_cb;
     hc->plug = ich9_pm_device_plug_cb;
     hc->unplug_request = ich9_pm_device_unplug_request_cb;
     hc->unplug = ich9_pm_device_unplug_cb;
diff --git a/hw/pci-bridge/gen_pcie_root_port.c b/hw/pci-bridge/gen_pcie_root_port.c
index 9766edb445..26bda73eae 100644
--- a/hw/pci-bridge/gen_pcie_root_port.c
+++ b/hw/pci-bridge/gen_pcie_root_port.c
@@ -20,6 +20,9 @@
         OBJECT_CHECK(GenPCIERootPort, (obj), TYPE_GEN_PCIE_ROOT_PORT)
 
 #define GEN_PCIE_ROOT_PORT_AER_OFFSET           0x100
+#define GEN_PCIE_ROOT_PORT_ACS_OFFSET \
+        (GEN_PCIE_ROOT_PORT_AER_OFFSET + PCI_ERR_SIZEOF)
+
 #define GEN_PCIE_ROOT_PORT_MSIX_NR_VECTOR       1
 
 typedef struct GenPCIERootPort {
@@ -149,6 +152,7 @@ static void gen_rp_dev_class_init(ObjectClass *klass, void *data)
     rpc->interrupts_init = gen_rp_interrupts_init;
     rpc->interrupts_uninit = gen_rp_interrupts_uninit;
     rpc->aer_offset = GEN_PCIE_ROOT_PORT_AER_OFFSET;
+    rpc->acs_offset = GEN_PCIE_ROOT_PORT_ACS_OFFSET;
 }
 
 static const TypeInfo gen_rp_dev_info = {
diff --git a/hw/pci-bridge/pcie_root_port.c b/hw/pci-bridge/pcie_root_port.c
index 34ad76743c..e94d918b6d 100644
--- a/hw/pci-bridge/pcie_root_port.c
+++ b/hw/pci-bridge/pcie_root_port.c
@@ -47,6 +47,7 @@ static void rp_reset(DeviceState *qdev)
     pcie_cap_deverr_reset(d);
     pcie_cap_slot_reset(d);
     pcie_cap_arifwd_reset(d);
+    pcie_acs_reset(d);
     pcie_aer_root_reset(d);
     pci_bridge_reset(qdev);
     pci_bridge_disable_base_limit(d);
@@ -106,6 +107,9 @@ static void rp_realize(PCIDevice *d, Error **errp)
     pcie_aer_root_init(d);
     rp_aer_vector_update(d);
 
+    if (rpc->acs_offset) {
+        pcie_acs_init(d, rpc->acs_offset);
+    }
     return;
 
 err:
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 640f678773..cf1ca30f93 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -914,3 +914,41 @@ void pcie_ats_init(PCIDevice *dev, uint16_t offset)
 
     pci_set_word(dev->wmask + dev->exp.ats_cap + PCI_ATS_CTRL, 0x800f);
 }
+
+/* ACS (Access Control Services) */
+void pcie_acs_init(PCIDevice *dev, uint16_t offset)
+{
+    bool is_downstream = pci_is_express_downstream_port(dev);
+    uint16_t cap_bits = 0;
+
+    /* For endpoints, only multifunction devs may have an ACS capability: */
+    assert(is_downstream ||
+           (dev->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) ||
+           PCI_FUNC(dev->devfn));
+
+    pcie_add_capability(dev, PCI_EXT_CAP_ID_ACS, PCI_ACS_VER, offset,
+                        PCI_ACS_SIZEOF);
+    dev->exp.acs_cap = offset;
+
+    if (is_downstream) {
+        /*
+         * Downstream ports must implement SV, TB, RR, CR, UF, and DT (with
+         * caveats on the latter four that we ignore for simplicity).
+         * Endpoints may also implement a subset of ACS capabilities,
+         * but these are optional if the endpoint does not support
+         * peer-to-peer between functions and thus omitted here.
+         */
+        cap_bits = PCI_ACS_SV | PCI_ACS_TB | PCI_ACS_RR |
+            PCI_ACS_CR | PCI_ACS_UF | PCI_ACS_DT;
+    }
+
+    pci_set_word(dev->config + offset + PCI_ACS_CAP, cap_bits);
+    pci_set_word(dev->wmask + offset + PCI_ACS_CTRL, cap_bits);
+}
+
+void pcie_acs_reset(PCIDevice *dev)
+{
+    if (dev->exp.acs_cap) {
+        pci_set_word(dev->config + dev->exp.acs_cap + PCI_ACS_CTRL, 0);
+    }
+}
diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
index 6728878a52..8b1e6876db 100644
--- a/hw/scsi/vhost-user-scsi.c
+++ b/hw/scsi/vhost-user-scsi.c
@@ -69,7 +69,6 @@ static void vhost_user_scsi_realize(DeviceState *dev, Error **errp)
     VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
     VHostUserSCSI *s = VHOST_USER_SCSI(dev);
     VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
-    VhostUserState *user;
     Error *err = NULL;
     int ret;
 
@@ -86,30 +85,24 @@ static void vhost_user_scsi_realize(DeviceState *dev, Error **errp)
         return;
     }
 
-    user = vhost_user_init();
-    if (!user) {
-        error_setg(errp, "vhost-user-scsi: failed to init vhost_user");
+    if (!vhost_user_init(&s->vhost_user, &vs->conf.chardev, errp)) {
         return;
     }
-    user->chr = &vs->conf.chardev;
 
     vsc->dev.nvqs = 2 + vs->conf.num_queues;
     vsc->dev.vqs = g_new(struct vhost_virtqueue, vsc->dev.nvqs);
     vsc->dev.vq_index = 0;
     vsc->dev.backend_features = 0;
 
-    ret = vhost_dev_init(&vsc->dev, user,
+    ret = vhost_dev_init(&vsc->dev, &s->vhost_user,
                          VHOST_BACKEND_TYPE_USER, 0);
     if (ret < 0) {
         error_setg(errp, "vhost-user-scsi: vhost initialization failed: %s",
                    strerror(-ret));
-        vhost_user_cleanup(user);
-        g_free(user);
+        vhost_user_cleanup(&s->vhost_user);
         return;
     }
 
-    s->vhost_user = user;
-
     /* Channel and lun both are 0 for bootable vhost-user-scsi disk */
     vsc->channel = 0;
     vsc->lun = 0;
@@ -130,12 +123,7 @@ static void vhost_user_scsi_unrealize(DeviceState *dev, Error **errp)
     g_free(vqs);
 
     virtio_scsi_common_unrealize(dev, errp);
-
-    if (s->vhost_user) {
-        vhost_user_cleanup(s->vhost_user);
-        g_free(s->vhost_user);
-        s->vhost_user = NULL;
-    }
+    vhost_user_cleanup(&s->vhost_user);
 }
 
 static Property vhost_user_scsi_properties[] = {
diff --git a/hw/virtio/vhost-stub.c b/hw/virtio/vhost-stub.c
index 049089b5e2..c175148fce 100644
--- a/hw/virtio/vhost-stub.c
+++ b/hw/virtio/vhost-stub.c
@@ -7,9 +7,9 @@ bool vhost_has_free_slot(void)
     return true;
 }
 
-VhostUserState *vhost_user_init(void)
+bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
 {
-    return NULL;
+    return false;
 }
 
 void vhost_user_cleanup(VhostUserState *user)
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 0d6c64e5ca..553319c7ac 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -56,6 +56,7 @@ enum VhostUserProtocolFeature {
     VHOST_USER_PROTOCOL_F_CONFIG = 9,
     VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
     VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
+    VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
     VHOST_USER_PROTOCOL_F_MAX
 };
 
@@ -93,6 +94,8 @@ typedef enum VhostUserRequest {
     VHOST_USER_POSTCOPY_ADVISE  = 28,
     VHOST_USER_POSTCOPY_LISTEN  = 29,
     VHOST_USER_POSTCOPY_END     = 30,
+    VHOST_USER_GET_INFLIGHT_FD = 31,
+    VHOST_USER_SET_INFLIGHT_FD = 32,
     VHOST_USER_MAX
 } VhostUserRequest;
 
@@ -151,6 +154,13 @@ typedef struct VhostUserVringArea {
     uint64_t offset;
 } VhostUserVringArea;
 
+typedef struct VhostUserInflight {
+    uint64_t mmap_size;
+    uint64_t mmap_offset;
+    uint16_t num_queues;
+    uint16_t queue_size;
+} VhostUserInflight;
+
 typedef struct {
     VhostUserRequest request;
 
@@ -173,6 +183,7 @@ typedef union {
         VhostUserConfig config;
         VhostUserCryptoSession session;
         VhostUserVringArea area;
+        VhostUserInflight inflight;
 } VhostUserPayload;
 
 typedef struct VhostUserMsg {
@@ -214,7 +225,7 @@ static bool ioeventfd_enabled(void)
     return !kvm_enabled() || kvm_eventfds_enabled();
 }
 
-static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
+static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg)
 {
     struct vhost_user *u = dev->opaque;
     CharBackend *chr = u->user->chr;
@@ -225,7 +236,7 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
     if (r != size) {
         error_report("Failed to read msg header. Read %d instead of %d."
                      " Original request %d.", r, size, msg->hdr.request);
-        goto fail;
+        return -1;
     }
 
     /* validate received flags */
@@ -233,7 +244,21 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
         error_report("Failed to read msg header."
                 " Flags 0x%x instead of 0x%x.", msg->hdr.flags,
                 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
-        goto fail;
+        return -1;
+    }
+
+    return 0;
+}
+
+static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
+{
+    struct vhost_user *u = dev->opaque;
+    CharBackend *chr = u->user->chr;
+    uint8_t *p = (uint8_t *) msg;
+    int r, size;
+
+    if (vhost_user_read_header(dev, msg) < 0) {
+        return -1;
     }
 
     /* validate message size is sane */
@@ -241,7 +266,7 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
         error_report("Failed to read msg header."
                 " Size %d exceeds the maximum %zu.", msg->hdr.size,
                 VHOST_USER_PAYLOAD_SIZE);
-        goto fail;
+        return -1;
     }
 
     if (msg->hdr.size) {
@@ -251,14 +276,11 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
         if (r != size) {
             error_report("Failed to read msg payload."
                          " Read %d instead of %d.", r, msg->hdr.size);
-            goto fail;
+            return -1;
         }
     }
 
     return 0;
-
-fail:
-    return -1;
 }
 
 static int process_message_reply(struct vhost_dev *dev,
@@ -968,7 +990,10 @@ static void slave_read(void *opaque)
     iov.iov_base = &hdr;
     iov.iov_len = VHOST_USER_HDR_SIZE;
 
-    size = recvmsg(u->slave_fd, &msgh, 0);
+    do {
+        size = recvmsg(u->slave_fd, &msgh, 0);
+    } while (size < 0 && (errno == EINTR || errno == EAGAIN));
+
     if (size != VHOST_USER_HDR_SIZE) {
         error_report("Failed to read from slave.");
         goto err;
@@ -997,7 +1022,10 @@ static void slave_read(void *opaque)
     }
 
     /* Read payload */
-    size = read(u->slave_fd, &payload, hdr.size);
+    do {
+        size = read(u->slave_fd, &payload, hdr.size);
+    } while (size < 0 && (errno == EINTR || errno == EAGAIN));
+
     if (size != hdr.size) {
         error_report("Failed to read payload from slave.");
         goto err;
@@ -1045,7 +1073,10 @@ static void slave_read(void *opaque)
         iovec[1].iov_base = &payload;
         iovec[1].iov_len = hdr.size;
 
-        size = writev(u->slave_fd, iovec, ARRAY_SIZE(iovec));
+        do {
+            size = writev(u->slave_fd, iovec, ARRAY_SIZE(iovec));
+        } while (size < 0 && (errno == EINTR || errno == EAGAIN));
+
         if (size != VHOST_USER_HDR_SIZE + hdr.size) {
             error_report("Failed to send msg reply to slave.");
             goto err;
@@ -1750,17 +1781,118 @@ static bool vhost_user_mem_section_filter(struct vhost_dev *dev,
     return result;
 }
 
-VhostUserState *vhost_user_init(void)
+static int vhost_user_get_inflight_fd(struct vhost_dev *dev,
+                                      uint16_t queue_size,
+                                      struct vhost_inflight *inflight)
+{
+    void *addr;
+    int fd;
+    struct vhost_user *u = dev->opaque;
+    CharBackend *chr = u->user->chr;
+    VhostUserMsg msg = {
+        .hdr.request = VHOST_USER_GET_INFLIGHT_FD,
+        .hdr.flags = VHOST_USER_VERSION,
+        .payload.inflight.num_queues = dev->nvqs,
+        .payload.inflight.queue_size = queue_size,
+        .hdr.size = sizeof(msg.payload.inflight),
+    };
+
+    if (!virtio_has_feature(dev->protocol_features,
+                            VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
+        return 0;
+    }
+
+    if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
+        return -1;
+    }
+
+    if (vhost_user_read(dev, &msg) < 0) {
+        return -1;
+    }
+
+    if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) {
+        error_report("Received unexpected msg type. "
+                     "Expected %d received %d",
+                     VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request);
+        return -1;
+    }
+
+    if (msg.hdr.size != sizeof(msg.payload.inflight)) {
+        error_report("Received bad msg size.");
+        return -1;
+    }
+
+    if (!msg.payload.inflight.mmap_size) {
+        return 0;
+    }
+
+    fd = qemu_chr_fe_get_msgfd(chr);
+    if (fd < 0) {
+        error_report("Failed to get mem fd");
+        return -1;
+    }
+
+    addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE,
+                MAP_SHARED, fd, msg.payload.inflight.mmap_offset);
+
+    if (addr == MAP_FAILED) {
+        error_report("Failed to mmap mem fd");
+        close(fd);
+        return -1;
+    }
+
+    inflight->addr = addr;
+    inflight->fd = fd;
+    inflight->size = msg.payload.inflight.mmap_size;
+    inflight->offset = msg.payload.inflight.mmap_offset;
+    inflight->queue_size = queue_size;
+
+    return 0;
+}
+
+static int vhost_user_set_inflight_fd(struct vhost_dev *dev,
+                                      struct vhost_inflight *inflight)
 {
-    VhostUserState *user = g_new0(struct VhostUserState, 1);
+    VhostUserMsg msg = {
+        .hdr.request = VHOST_USER_SET_INFLIGHT_FD,
+        .hdr.flags = VHOST_USER_VERSION,
+        .payload.inflight.mmap_size = inflight->size,
+        .payload.inflight.mmap_offset = inflight->offset,
+        .payload.inflight.num_queues = dev->nvqs,
+        .payload.inflight.queue_size = inflight->queue_size,
+        .hdr.size = sizeof(msg.payload.inflight),
+    };
 
-    return user;
+    if (!virtio_has_feature(dev->protocol_features,
+                            VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
+        return 0;
+    }
+
+    if (vhost_user_write(dev, &msg, &inflight->fd, 1) < 0) {
+        return -1;
+    }
+
+    return 0;
+}
+
+bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
+{
+    if (user->chr) {
+        error_setg(errp, "Cannot initialize vhost-user state");
+        return false;
+    }
+    user->chr = chr;
+    return true;
 }
 
 void vhost_user_cleanup(VhostUserState *user)
 {
     int i;
 
+    if (!user->chr) {
+        return;
+    }
+
     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
         if (user->notifier[i].addr) {
             object_unparent(OBJECT(&user->notifier[i].mr));
@@ -1768,6 +1900,7 @@ void vhost_user_cleanup(VhostUserState *user)
             user->notifier[i].addr = NULL;
         }
     }
+    user->chr = NULL;
 }
 
 const VhostOps user_ops = {
@@ -1801,4 +1934,6 @@ const VhostOps user_ops = {
         .vhost_crypto_create_session = vhost_user_crypto_create_session,
         .vhost_crypto_close_session = vhost_user_crypto_close_session,
         .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
+        .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
+        .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
 };
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 311432f190..7f61018f2a 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -1481,6 +1481,102 @@ void vhost_dev_set_config_notifier(struct vhost_dev *hdev,
     hdev->config_ops = ops;
 }
 
+void vhost_dev_free_inflight(struct vhost_inflight *inflight)
+{
+    if (inflight->addr) {
+        qemu_memfd_free(inflight->addr, inflight->size, inflight->fd);
+        inflight->addr = NULL;
+        inflight->fd = -1;
+    }
+}
+
+static int vhost_dev_resize_inflight(struct vhost_inflight *inflight,
+                                     uint64_t new_size)
+{
+    Error *err = NULL;
+    int fd = -1;
+    void *addr = qemu_memfd_alloc("vhost-inflight", new_size,
+                                  F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
+                                  &fd, &err);
+
+    if (err) {
+        error_report_err(err);
+        return -1;
+    }
+
+    vhost_dev_free_inflight(inflight);
+    inflight->offset = 0;
+    inflight->addr = addr;
+    inflight->fd = fd;
+    inflight->size = new_size;
+
+    return 0;
+}
+
+void vhost_dev_save_inflight(struct vhost_inflight *inflight, QEMUFile *f)
+{
+    if (inflight->addr) {
+        qemu_put_be64(f, inflight->size);
+        qemu_put_be16(f, inflight->queue_size);
+        qemu_put_buffer(f, inflight->addr, inflight->size);
+    } else {
+        qemu_put_be64(f, 0);
+    }
+}
+
+int vhost_dev_load_inflight(struct vhost_inflight *inflight, QEMUFile *f)
+{
+    uint64_t size;
+
+    size = qemu_get_be64(f);
+    if (!size) {
+        return 0;
+    }
+
+    if (inflight->size != size) {
+        if (vhost_dev_resize_inflight(inflight, size)) {
+            return -1;
+        }
+    }
+    inflight->queue_size = qemu_get_be16(f);
+
+    qemu_get_buffer(f, inflight->addr, size);
+
+    return 0;
+}
+
+int vhost_dev_set_inflight(struct vhost_dev *dev,
+                           struct vhost_inflight *inflight)
+{
+    int r;
+
+    if (dev->vhost_ops->vhost_set_inflight_fd && inflight->addr) {
+        r = dev->vhost_ops->vhost_set_inflight_fd(dev, inflight);
+        if (r) {
+            VHOST_OPS_DEBUG("vhost_set_inflight_fd failed");
+            return -errno;
+        }
+    }
+
+    return 0;
+}
+
+int vhost_dev_get_inflight(struct vhost_dev *dev, uint16_t queue_size,
+                           struct vhost_inflight *inflight)
+{
+    int r;
+
+    if (dev->vhost_ops->vhost_get_inflight_fd) {
+        r = dev->vhost_ops->vhost_get_inflight_fd(dev, queue_size, inflight);
+        if (r) {
+            VHOST_OPS_DEBUG("vhost_get_inflight_fd failed");
+            return -errno;
+        }
+    }
+
+    return 0;
+}
+
 /* Host notifiers must be enabled at this point. */
 int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
 {
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index e3a65940ef..2112874055 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -82,7 +82,7 @@ static void balloon_inflate_page(VirtIOBalloon *balloon,
         /* We've partially ballooned part of a host page, but now
          * we're trying to balloon part of a different one.  Too hard,
          * give up on the old partial page */
-        free(balloon->pbp);
+        g_free(balloon->pbp);
         balloon->pbp = NULL;
     }
 
@@ -107,11 +107,61 @@ static void balloon_inflate_page(VirtIOBalloon *balloon,
          * has already reported them, and failing to discard a balloon
          * page is not fatal */
 
-        free(balloon->pbp);
+        g_free(balloon->pbp);
         balloon->pbp = NULL;
     }
 }
 
+static void balloon_deflate_page(VirtIOBalloon *balloon,
+                                 MemoryRegion *mr, hwaddr offset)
+{
+    void *addr = memory_region_get_ram_ptr(mr) + offset;
+    RAMBlock *rb;
+    size_t rb_page_size;
+    ram_addr_t ram_offset, host_page_base;
+    void *host_addr;
+    int ret;
+
+    /* XXX is there a better way to get to the RAMBlock than via a
+     * host address? */
+    rb = qemu_ram_block_from_host(addr, false, &ram_offset);
+    rb_page_size = qemu_ram_pagesize(rb);
+    host_page_base = ram_offset & ~(rb_page_size - 1);
+
+    if (balloon->pbp
+        && rb == balloon->pbp->rb
+        && host_page_base == balloon->pbp->base) {
+        int subpages = rb_page_size / BALLOON_PAGE_SIZE;
+
+        /*
+         * This means the guest has asked to discard some of the 4kiB
+         * subpages of a host page, but then changed its mind and
+         * asked to keep them after all.  It's exceedingly unlikely
+         * for a guest to do this in practice, but handle it anyway,
+         * since getting it wrong could mean discarding memory the
+         * guest is still using. */
+        bitmap_clear(balloon->pbp->bitmap,
+                     (ram_offset - balloon->pbp->base) / BALLOON_PAGE_SIZE,
+                     subpages);
+
+        if (bitmap_empty(balloon->pbp->bitmap, subpages)) {
+            g_free(balloon->pbp);
+            balloon->pbp = NULL;
+        }
+    }
+
+    host_addr = (void *)((uintptr_t)addr & ~(rb_page_size - 1));
+
+    /* When a page is deflated, we hint the whole host page it lives
+     * on, since we can't do anything smaller */
+    ret = qemu_madvise(host_addr, rb_page_size, QEMU_MADV_WILLNEED);
+    if (ret != 0) {
+        warn_report("Couldn't MADV_WILLNEED on balloon deflate: %s",
+                    strerror(errno));
+        /* Otherwise ignore, failing to page hint shouldn't be fatal */
+    }
+}
+
 static const char *balloon_stat_names[] = {
    [VIRTIO_BALLOON_S_SWAP_IN] = "stat-swap-in",
    [VIRTIO_BALLOON_S_SWAP_OUT] = "stat-swap-out",
@@ -315,8 +365,15 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
 
             trace_virtio_balloon_handle_output(memory_region_name(section.mr),
                                                pa);
-            if (!qemu_balloon_is_inhibited() && vq != s->dvq) {
-                balloon_inflate_page(s, section.mr, section.offset_within_region);
+            if (!qemu_balloon_is_inhibited()) {
+                if (vq == s->ivq) {
+                    balloon_inflate_page(s, section.mr,
+                                         section.offset_within_region);
+                } else if (vq == s->dvq) {
+                    balloon_deflate_page(s, section.mr, section.offset_within_region);
+                } else {
+                    g_assert_not_reached();
+                }
             }
             memory_region_unref(section.mr);
         }
@@ -391,6 +448,7 @@ static bool get_free_page_hints(VirtIOBalloon *dev)
     VirtQueueElement *elem;
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VirtQueue *vq = dev->free_page_vq;
+    bool ret = true;
 
     while (dev->block_iothread) {
         qemu_cond_wait(&dev->free_page_cond, &dev->free_page_lock);
@@ -405,13 +463,12 @@ static bool get_free_page_hints(VirtIOBalloon *dev)
         uint32_t id;
         size_t size = iov_to_buf(elem->out_sg, elem->out_num, 0,
                                  &id, sizeof(id));
-        virtqueue_push(vq, elem, size);
-        g_free(elem);
 
         virtio_tswap32s(vdev, &id);
         if (unlikely(size != sizeof(id))) {
             virtio_error(vdev, "received an incorrect cmd id");
-            return false;
+            ret = false;
+            goto out;
         }
         if (id == dev->free_page_report_cmd_id) {
             dev->free_page_report_status = FREE_PAGE_REPORT_S_START;
@@ -431,11 +488,12 @@ static bool get_free_page_hints(VirtIOBalloon *dev)
             qemu_guest_free_page_hint(elem->in_sg[0].iov_base,
                                       elem->in_sg[0].iov_len);
         }
-        virtqueue_push(vq, elem, 1);
-        g_free(elem);
     }
 
-    return true;
+out:
+    virtqueue_push(vq, elem, 1);
+    g_free(elem);
+    return ret;
 }
 
 static void virtio_ballloon_get_free_page_hints(void *opaque)
diff --git a/include/block/block.h b/include/block/block.h
index 6a758a76f8..e452988b66 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -187,6 +187,9 @@ typedef struct BDRVReopenState {
     BlockDriverState *bs;
     int flags;
     BlockdevDetectZeroesOptions detect_zeroes;
+    bool backing_missing;
+    bool replace_backing_bs;  /* new_backing_bs is ignored if this is false */
+    BlockDriverState *new_backing_bs; /* If NULL then detach the current bs */
     uint64_t perm, shared_perm;
     QDict *options;
     QDict *explicit_options;
@@ -299,8 +302,9 @@ BlockDriverState *bdrv_open(const char *filename, const char *reference,
 BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
                                        int flags, Error **errp);
 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
-                                    BlockDriverState *bs, QDict *options);
-int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **errp);
+                                    BlockDriverState *bs, QDict *options,
+                                    bool keep_old_opts);
+int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
 int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
                               Error **errp);
 int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
@@ -353,6 +357,11 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
                                     BlockDriverState *bs);
 BlockDriverState *bdrv_find_base(BlockDriverState *bs);
+bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base,
+                                  Error **errp);
+int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base,
+                              Error **errp);
+void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base);
 
 
 typedef struct BdrvCheckResult {
diff --git a/include/block/block_int.h b/include/block/block_int.h
index a23cabaddd..01e855a066 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -383,6 +383,14 @@ struct BlockDriver {
 
     /* List of options for creating images, terminated by name == NULL */
     QemuOptsList *create_opts;
+    /*
+     * If this driver supports reopening images this contains a
+     * NULL-terminated list of the runtime options that can be
+     * modified. If an option in this list is unspecified during
+     * reopen then it _must_ be reset to its default value or return
+     * an error.
+     */
+    const char *const *mutable_opts;
 
     /*
      * Returns 0 for completed check, -errno for internal errors.
@@ -711,6 +719,12 @@ struct BdrvChild {
     uint64_t backup_perm;
     uint64_t backup_shared_perm;
 
+    /*
+     * This link is frozen: the child can neither be replaced nor
+     * detached from the parent.
+     */
+    bool frozen;
+
     QLIST_ENTRY(BdrvChild) next;
     QLIST_ENTRY(BdrvChild) next_parent;
 };
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 04a117fc81..8044ace63e 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -5,6 +5,16 @@
 #include "qapi/qapi-types-block-core.h"
 #include "qemu/hbitmap.h"
 
+typedef enum BitmapCheckFlags {
+    BDRV_BITMAP_BUSY = 1,
+    BDRV_BITMAP_RO = 2,
+    BDRV_BITMAP_INCONSISTENT = 4,
+} BitmapCheckFlags;
+
+#define BDRV_BITMAP_DEFAULT (BDRV_BITMAP_BUSY | BDRV_BITMAP_RO |        \
+                             BDRV_BITMAP_INCONSISTENT)
+#define BDRV_BITMAP_ALLOW_RO (BDRV_BITMAP_BUSY | BDRV_BITMAP_INCONSISTENT)
+
 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
                                           uint32_t granularity,
                                           const char *name,
@@ -24,6 +34,8 @@ BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
 void bdrv_dirty_bitmap_enable_successor(BdrvDirtyBitmap *bitmap);
 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs,
                                         const char *name);
+int bdrv_dirty_bitmap_check(const BdrvDirtyBitmap *bitmap, uint32_t flags,
+                            Error **errp);
 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
 void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs);
 void bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs,
@@ -36,7 +48,7 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs);
 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs);
 uint32_t bdrv_dirty_bitmap_granularity(const BdrvDirtyBitmap *bitmap);
 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap);
-bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap);
+bool bdrv_dirty_bitmap_has_successor(BdrvDirtyBitmap *bitmap);
 const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap);
 int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap);
 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap);
@@ -66,9 +78,10 @@ void bdrv_dirty_bitmap_deserialize_ones(BdrvDirtyBitmap *bitmap,
 void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap);
 
 void bdrv_dirty_bitmap_set_readonly(BdrvDirtyBitmap *bitmap, bool value);
-void bdrv_dirty_bitmap_set_persistance(BdrvDirtyBitmap *bitmap,
+void bdrv_dirty_bitmap_set_persistence(BdrvDirtyBitmap *bitmap,
                                        bool persistent);
-void bdrv_dirty_bitmap_set_qmp_locked(BdrvDirtyBitmap *bitmap, bool qmp_locked);
+void bdrv_dirty_bitmap_set_inconsistent(BdrvDirtyBitmap *bitmap);
+void bdrv_dirty_bitmap_set_busy(BdrvDirtyBitmap *bitmap, bool busy);
 void bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src,
                              HBitmap **backup, Error **errp);
 void bdrv_dirty_bitmap_set_migration(BdrvDirtyBitmap *bitmap, bool migration);
@@ -90,9 +103,8 @@ void bdrv_dirty_bitmap_truncate(BlockDriverState *bs, int64_t bytes);
 bool bdrv_dirty_bitmap_readonly(const BdrvDirtyBitmap *bitmap);
 bool bdrv_has_readonly_bitmaps(BlockDriverState *bs);
 bool bdrv_dirty_bitmap_get_autoload(const BdrvDirtyBitmap *bitmap);
-bool bdrv_dirty_bitmap_get_persistance(BdrvDirtyBitmap *bitmap);
-bool bdrv_dirty_bitmap_qmp_locked(BdrvDirtyBitmap *bitmap);
-bool bdrv_dirty_bitmap_user_locked(BdrvDirtyBitmap *bitmap);
+bool bdrv_dirty_bitmap_get_persistence(BdrvDirtyBitmap *bitmap);
+bool bdrv_dirty_bitmap_inconsistent(const BdrvDirtyBitmap *bitmap);
 bool bdrv_has_changed_persistent_bitmaps(BlockDriverState *bs);
 BdrvDirtyBitmap *bdrv_dirty_bitmap_next(BlockDriverState *bs,
                                         BdrvDirtyBitmap *bitmap);
diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h
index 59aeb06393..41568d1837 100644
--- a/include/hw/acpi/ich9.h
+++ b/include/hw/acpi/ich9.h
@@ -74,6 +74,8 @@ extern const VMStateDescription vmstate_ich9_pm;
 
 void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm, Error **errp);
 
+void ich9_pm_device_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
+                                Error **errp);
 void ich9_pm_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
                             Error **errp);
 void ich9_pm_device_unplug_request_cb(HotplugHandler *hotplug_dev,
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index a321cc9691..c11e3d5b34 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -66,11 +66,20 @@ typedef struct VTDIOTLBEntry VTDIOTLBEntry;
 typedef struct VTDBus VTDBus;
 typedef union VTD_IR_TableEntry VTD_IR_TableEntry;
 typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress;
+typedef struct VTDPASIDDirEntry VTDPASIDDirEntry;
+typedef struct VTDPASIDEntry VTDPASIDEntry;
 
 /* Context-Entry */
 struct VTDContextEntry {
-    uint64_t lo;
-    uint64_t hi;
+    union {
+        struct {
+            uint64_t lo;
+            uint64_t hi;
+        };
+        struct {
+            uint64_t val[4];
+        };
+    };
 };
 
 struct VTDContextCacheEntry {
@@ -81,6 +90,16 @@ struct VTDContextCacheEntry {
     struct VTDContextEntry context_entry;
 };
 
+/* PASID Directory Entry */
+struct VTDPASIDDirEntry {
+    uint64_t val;
+};
+
+/* PASID Table Entry */
+struct VTDPASIDEntry {
+    uint64_t val[8];
+};
+
 struct VTDAddressSpace {
     PCIBus *bus;
     uint8_t devfn;
@@ -208,16 +227,19 @@ struct IntelIOMMUState {
     uint8_t womask[DMAR_REG_SIZE];  /* WO (write only - read returns 0) */
     uint32_t version;
 
-    bool caching_mode;          /* RO - is cap CM enabled? */
+    bool caching_mode;              /* RO - is cap CM enabled? */
+    bool scalable_mode;             /* RO - is Scalable Mode supported? */
 
     dma_addr_t root;                /* Current root table pointer */
     bool root_extended;             /* Type of root table (extended or not) */
+    bool root_scalable;             /* Type of root table (scalable or not) */
     bool dmar_enabled;              /* Set if DMA remapping is enabled */
 
     uint16_t iq_head;               /* Current invalidation queue head */
     uint16_t iq_tail;               /* Current invalidation queue tail */
     dma_addr_t iq;                  /* Current invalidation queue pointer */
     uint16_t iq_size;               /* IQ Size in number of entries */
+    bool iq_dw;                     /* IQ descriptor width 256bit or not */
     bool qi_enabled;                /* Set if the QI is enabled */
     uint8_t iq_last_desc_type;      /* The type of last completed descriptor */
 
diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
index 5b82a0d244..e30334d74d 100644
--- a/include/hw/pci/pcie.h
+++ b/include/hw/pci/pcie.h
@@ -79,6 +79,9 @@ struct PCIExpressDevice {
 
     /* Offset of ATS capability in config space */
     uint16_t ats_cap;
+
+    /* ACS */
+    uint16_t acs_cap;
 };
 
 #define COMPAT_PROP_PCP "power_controller_present"
@@ -128,6 +131,9 @@ void pcie_add_capability(PCIDevice *dev,
                          uint16_t offset, uint16_t size);
 void pcie_sync_bridge_lnk(PCIDevice *dev);
 
+void pcie_acs_init(PCIDevice *dev, uint16_t offset);
+void pcie_acs_reset(PCIDevice *dev);
+
 void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn);
 void pcie_dev_ser_num_init(PCIDevice *dev, uint16_t offset, uint64_t ser_num);
 void pcie_ats_init(PCIDevice *dev, uint16_t offset);
diff --git a/include/hw/pci/pcie_port.h b/include/hw/pci/pcie_port.h
index df242a0caf..09586f4641 100644
--- a/include/hw/pci/pcie_port.h
+++ b/include/hw/pci/pcie_port.h
@@ -78,6 +78,7 @@ typedef struct PCIERootPortClass {
     int exp_offset;
     int aer_offset;
     int ssvid_offset;
+    int acs_offset;    /* If nonzero, optional ACS capability offset */
     int ssid;
 } PCIERootPortClass;
 
diff --git a/include/hw/pci/pcie_regs.h b/include/hw/pci/pcie_regs.h
index ad4e7808b8..1db86b0ec4 100644
--- a/include/hw/pci/pcie_regs.h
+++ b/include/hw/pci/pcie_regs.h
@@ -175,4 +175,8 @@ typedef enum PCIExpLinkWidth {
                                          PCI_ERR_COR_INTERNAL |         \
                                          PCI_ERR_COR_HL_OVERFLOW)
 
+/* ACS */
+#define PCI_ACS_VER                     0x1
+#define PCI_ACS_SIZEOF                  8
+
 #endif /* QEMU_PCIE_REGS_H */
diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
index 81283ec50f..d6632a18e6 100644
--- a/include/hw/virtio/vhost-backend.h
+++ b/include/hw/virtio/vhost-backend.h
@@ -25,6 +25,7 @@ typedef enum VhostSetConfigType {
     VHOST_SET_CONFIG_TYPE_MIGRATION = 1,
 } VhostSetConfigType;
 
+struct vhost_inflight;
 struct vhost_dev;
 struct vhost_log;
 struct vhost_memory;
@@ -104,6 +105,13 @@ typedef int (*vhost_crypto_close_session_op)(struct vhost_dev *dev,
 typedef bool (*vhost_backend_mem_section_filter_op)(struct vhost_dev *dev,
                                                 MemoryRegionSection *section);
 
+typedef int (*vhost_get_inflight_fd_op)(struct vhost_dev *dev,
+                                        uint16_t queue_size,
+                                        struct vhost_inflight *inflight);
+
+typedef int (*vhost_set_inflight_fd_op)(struct vhost_dev *dev,
+                                        struct vhost_inflight *inflight);
+
 typedef struct VhostOps {
     VhostBackendType backend_type;
     vhost_backend_init vhost_backend_init;
@@ -142,6 +150,8 @@ typedef struct VhostOps {
     vhost_crypto_create_session_op vhost_crypto_create_session;
     vhost_crypto_close_session_op vhost_crypto_close_session;
     vhost_backend_mem_section_filter_op vhost_backend_mem_section_filter;
+    vhost_get_inflight_fd_op vhost_get_inflight_fd;
+    vhost_set_inflight_fd_op vhost_set_inflight_fd;
 } VhostOps;
 
 extern const VhostOps user_ops;
diff --git a/include/hw/virtio/vhost-user-blk.h b/include/hw/virtio/vhost-user-blk.h
index d52944aeeb..68634bee61 100644
--- a/include/hw/virtio/vhost-user-blk.h
+++ b/include/hw/virtio/vhost-user-blk.h
@@ -36,7 +36,8 @@ typedef struct VHostUserBlk {
     uint32_t queue_size;
     uint32_t config_wce;
     struct vhost_dev dev;
-    VhostUserState *vhost_user;
+    struct vhost_inflight *inflight;
+    VhostUserState vhost_user;
 } VHostUserBlk;
 
 #endif
diff --git a/include/hw/virtio/vhost-user-scsi.h b/include/hw/virtio/vhost-user-scsi.h
index e429cacd8e..738f9288bd 100644
--- a/include/hw/virtio/vhost-user-scsi.h
+++ b/include/hw/virtio/vhost-user-scsi.h
@@ -30,7 +30,7 @@
 
 typedef struct VHostUserSCSI {
     VHostSCSICommon parent_obj;
-    VhostUserState *vhost_user;
+    VhostUserState vhost_user;
 } VHostUserSCSI;
 
 #endif /* VHOST_USER_SCSI_H */
diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h
index fd660393a0..811e325f42 100644
--- a/include/hw/virtio/vhost-user.h
+++ b/include/hw/virtio/vhost-user.h
@@ -22,7 +22,7 @@ typedef struct VhostUserState {
     VhostUserHostNotifier notifier[VIRTIO_QUEUE_MAX];
 } VhostUserState;
 
-VhostUserState *vhost_user_init(void);
+bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp);
 void vhost_user_cleanup(VhostUserState *user);
 
 #endif
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index a7f449fa87..619498c8f4 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -7,6 +7,15 @@
 #include "exec/memory.h"
 
 /* Generic structures common for any vhost based device. */
+
+struct vhost_inflight {
+    int fd;
+    void *addr;
+    uint64_t size;
+    uint64_t offset;
+    uint16_t queue_size;
+};
+
 struct vhost_virtqueue {
     int kick;
     int call;
@@ -120,4 +129,13 @@ int vhost_dev_set_config(struct vhost_dev *dev, const uint8_t *data,
  */
 void vhost_dev_set_config_notifier(struct vhost_dev *dev,
                                    const VhostDevConfigOps *ops);
+
+void vhost_dev_reset_inflight(struct vhost_inflight *inflight);
+void vhost_dev_free_inflight(struct vhost_inflight *inflight);
+void vhost_dev_save_inflight(struct vhost_inflight *inflight, QEMUFile *f);
+int vhost_dev_load_inflight(struct vhost_inflight *inflight, QEMUFile *f);
+int vhost_dev_set_inflight(struct vhost_dev *dev,
+                           struct vhost_inflight *inflight);
+int vhost_dev_get_inflight(struct vhost_dev *dev, uint16_t queue_size,
+                           struct vhost_inflight *inflight);
 #endif
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
index 6426151e4f..d1bb863cb6 100644
--- a/migration/block-dirty-bitmap.c
+++ b/migration/block-dirty-bitmap.c
@@ -261,7 +261,7 @@ static void dirty_bitmap_mig_cleanup(void)
 
     while ((dbms = QSIMPLEQ_FIRST(&dirty_bitmap_mig_state.dbms_list)) != NULL) {
         QSIMPLEQ_REMOVE_HEAD(&dirty_bitmap_mig_state.dbms_list, entry);
-        bdrv_dirty_bitmap_set_qmp_locked(dbms->bitmap, false);
+        bdrv_dirty_bitmap_set_busy(dbms->bitmap, false);
         bdrv_unref(dbms->bs);
         g_free(dbms);
     }
@@ -274,6 +274,7 @@ static int init_dirty_bitmap_migration(void)
     BdrvDirtyBitmap *bitmap;
     DirtyBitmapMigBitmapState *dbms;
     BdrvNextIterator it;
+    Error *local_err = NULL;
 
     dirty_bitmap_mig_state.bulk_completed = false;
     dirty_bitmap_mig_state.prev_bs = NULL;
@@ -301,20 +302,14 @@ static int init_dirty_bitmap_migration(void)
                 goto fail;
             }
 
-            if (bdrv_dirty_bitmap_user_locked(bitmap)) {
-                error_report("Can't migrate a bitmap that is in use by another operation: '%s'",
-                             bdrv_dirty_bitmap_name(bitmap));
-                goto fail;
-            }
-
-            if (bdrv_dirty_bitmap_readonly(bitmap)) {
-                error_report("Can't migrate read-only dirty bitmap: '%s",
-                             bdrv_dirty_bitmap_name(bitmap));
+            if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_DEFAULT,
+                                        &local_err)) {
+                error_report_err(local_err);
                 goto fail;
             }
 
             bdrv_ref(bs);
-            bdrv_dirty_bitmap_set_qmp_locked(bitmap, true);
+            bdrv_dirty_bitmap_set_busy(bitmap, true);
 
             dbms = g_new0(DirtyBitmapMigBitmapState, 1);
             dbms->bs = bs;
@@ -326,7 +321,7 @@ static int init_dirty_bitmap_migration(void)
             if (bdrv_dirty_bitmap_enabled(bitmap)) {
                 dbms->flags |= DIRTY_BITMAP_MIG_START_FLAG_ENABLED;
             }
-            if (bdrv_dirty_bitmap_get_persistance(bitmap)) {
+            if (bdrv_dirty_bitmap_get_persistence(bitmap)) {
                 dbms->flags |= DIRTY_BITMAP_MIG_START_FLAG_PERSISTENT;
             }
 
@@ -478,7 +473,7 @@ static int dirty_bitmap_load_start(QEMUFile *f, DirtyBitmapLoadState *s)
     }
 
     if (flags & DIRTY_BITMAP_MIG_START_FLAG_PERSISTENT) {
-        bdrv_dirty_bitmap_set_persistance(s->bitmap, true);
+        bdrv_dirty_bitmap_set_persistence(s->bitmap, true);
     }
 
     bdrv_disable_dirty_bitmap(s->bitmap);
@@ -542,7 +537,7 @@ static void dirty_bitmap_load_complete(QEMUFile *f, DirtyBitmapLoadState *s)
         }
     }
 
-    if (bdrv_dirty_bitmap_frozen(s->bitmap)) {
+    if (bdrv_dirty_bitmap_has_successor(s->bitmap)) {
         bdrv_dirty_bitmap_lock(s->bitmap);
         if (enabled_bitmaps == NULL) {
             /* in postcopy */
diff --git a/nbd/server.c b/nbd/server.c
index 8ddfd3e319..fd013a2817 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1510,6 +1510,10 @@ NBDExport *nbd_export_new(BlockDriverState *bs, uint64_t dev_offset,
             goto fail;
         }
 
+        if (bdrv_dirty_bitmap_check(bm, BDRV_BITMAP_ALLOW_RO, errp)) {
+            goto fail;
+        }
+
         if ((nbdflags & NBD_FLAG_READ_ONLY) && bdrv_is_writable(bs) &&
             bdrv_dirty_bitmap_enabled(bm)) {
             error_setg(errp,
@@ -1518,12 +1522,7 @@ NBDExport *nbd_export_new(BlockDriverState *bs, uint64_t dev_offset,
             goto fail;
         }
 
-        if (bdrv_dirty_bitmap_user_locked(bm)) {
-            error_setg(errp, "Bitmap '%s' is in use", bitmap);
-            goto fail;
-        }
-
-        bdrv_dirty_bitmap_set_qmp_locked(bm, true);
+        bdrv_dirty_bitmap_set_busy(bm, true);
         exp->export_bitmap = bm;
         exp->export_bitmap_context = g_strdup_printf("qemu:dirty-bitmap:%s",
                                                      bitmap);
@@ -1641,7 +1640,7 @@ void nbd_export_put(NBDExport *exp)
         }
 
         if (exp->export_bitmap) {
-            bdrv_dirty_bitmap_set_qmp_locked(exp->export_bitmap, false);
+            bdrv_dirty_bitmap_set_busy(exp->export_bitmap, false);
             g_free(exp->export_bitmap_context);
         }
 
diff --git a/net/vhost-user.c b/net/vhost-user.c
index cd9659df87..5a26a24708 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -304,19 +304,14 @@ static int net_vhost_user_init(NetClientState *peer, const char *device,
 {
     Error *err = NULL;
     NetClientState *nc, *nc0 = NULL;
-    VhostUserState *user = NULL;
     NetVhostUserState *s = NULL;
+    VhostUserState *user;
     int i;
 
     assert(name);
     assert(queues > 0);
 
-    user = vhost_user_init();
-    if (!user) {
-        error_report("failed to init vhost_user");
-        goto err;
-    }
-
+    user = g_new0(struct VhostUserState, 1);
     for (i = 0; i < queues; i++) {
         nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name);
         snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user%d to %s",
@@ -325,11 +320,11 @@ static int net_vhost_user_init(NetClientState *peer, const char *device,
         if (!nc0) {
             nc0 = nc;
             s = DO_UPCAST(NetVhostUserState, nc, nc);
-            if (!qemu_chr_fe_init(&s->chr, chr, &err)) {
+            if (!qemu_chr_fe_init(&s->chr, chr, &err) ||
+                !vhost_user_init(user, &s->chr, &err)) {
                 error_report_err(err);
                 goto err;
             }
-            user->chr = &s->chr;
         }
         s = DO_UPCAST(NetVhostUserState, nc, nc);
         s->vhost_user = user;
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 919d0530b2..ca684a8a04 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -451,10 +451,15 @@
 #          recording new writes. If the bitmap was @disabled, it is not
 #          recording new writes. (Since 2.12)
 #
+# @inconsistent: This is a persistent dirty bitmap that was marked in-use on
+#                disk, and is unusable by QEMU. It can only be deleted.
+#                Please rely on the inconsistent field in @BlockDirtyInfo
+#                instead, as the status field is deprecated. (Since 4.0)
+#
 # Since: 2.4
 ##
 { 'enum': 'DirtyBitmapStatus',
-  'data': ['active', 'disabled', 'frozen', 'locked'] }
+  'data': ['active', 'disabled', 'frozen', 'locked', 'inconsistent'] }
 
 ##
 # @BlockDirtyInfo:
@@ -467,16 +472,29 @@
 #
 # @granularity: granularity of the dirty bitmap in bytes (since 1.4)
 #
-# @status: current status of the dirty bitmap (since 2.4)
+# @status: Deprecated in favor of @recording and @locked. (since 2.4)
+#
+# @recording: true if the bitmap is recording new writes from the guest.
+#             Replaces `active` and `disabled` statuses. (since 4.0)
+#
+# @busy: true if the bitmap is in-use by some operation (NBD or jobs)
+#        and cannot be modified via QMP or used by another operation.
+#        Replaces `locked` and `frozen` statuses. (since 4.0)
 #
-# @persistent: true if the bitmap will eventually be flushed to persistent
-#              storage (since 4.0)
+# @persistent: true if the bitmap was stored on disk, is scheduled to be stored
+#              on disk, or both. (since 4.0)
+#
+# @inconsistent: true if this is a persistent bitmap that was improperly
+#                stored. Implies @persistent to be true; @recording and
+#                @busy to be false. This bitmap cannot be used. To remove
+#                it, use @block-dirty-bitmap-remove. (Since 4.0)
 #
 # Since: 1.3
 ##
 { 'struct': 'BlockDirtyInfo',
   'data': {'*name': 'str', 'count': 'int', 'granularity': 'uint32',
-           'status': 'DirtyBitmapStatus', 'persistent': 'bool' } }
+           'recording': 'bool', 'busy': 'bool', 'status': 'DirtyBitmapStatus',
+           'persistent': 'bool', '*inconsistent': 'bool' } }
 
 ##
 # @Qcow2BitmapInfoFlags:
@@ -537,20 +555,20 @@
 #         +------------------
 #             10   50   100
 #
-# Since: 2.12
+# Since: 4.0
 ##
 { 'struct': 'BlockLatencyHistogramInfo',
   'data': {'boundaries': ['uint64'], 'bins': ['uint64'] } }
 
 ##
-# @x-block-latency-histogram-set:
+# @block-latency-histogram-set:
 #
 # Manage read, write and flush latency histograms for the device.
 #
 # If only @device parameter is specified, remove all present latency histograms
 # for the device. Otherwise, add/reset some of (or all) latency histograms.
 #
-# @device: device name to set latency histogram for.
+# @id: The name or QOM path of the guest device.
 #
 # @boundaries: list of interval boundary values (see description in
 #              BlockLatencyHistogramInfo definition). If specified, all
@@ -573,7 +591,7 @@
 #
 # Returns: error if device is not found or any boundary arrays are invalid.
 #
-# Since: 2.12
+# Since: 4.0
 #
 # Example: set new histograms for all io types with intervals
 # [0, 10), [10, 50), [50, 100), [100, +inf):
@@ -607,8 +625,8 @@
 #      "arguments": { "device": "drive0" } }
 # <- { "return": {} }
 ##
-{ 'command': 'x-block-latency-histogram-set',
-  'data': {'device': 'str',
+{ 'command': 'block-latency-histogram-set',
+  'data': {'id': 'str',
            '*boundaries': ['uint64'],
            '*boundaries-read': ['uint64'],
            '*boundaries-write': ['uint64'],
@@ -894,11 +912,11 @@
 # @timed_stats: Statistics specific to the set of previously defined
 #               intervals of time (Since 2.5)
 #
-# @x_rd_latency_histogram: @BlockLatencyHistogramInfo. (Since 2.12)
+# @rd_latency_histogram: @BlockLatencyHistogramInfo. (Since 4.0)
 #
-# @x_wr_latency_histogram: @BlockLatencyHistogramInfo. (Since 2.12)
+# @wr_latency_histogram: @BlockLatencyHistogramInfo. (Since 4.0)
 #
-# @x_flush_latency_histogram: @BlockLatencyHistogramInfo. (Since 2.12)
+# @flush_latency_histogram: @BlockLatencyHistogramInfo. (Since 4.0)
 #
 # Since: 0.14.0
 ##
@@ -913,9 +931,9 @@
            'invalid_wr_operations': 'int', 'invalid_flush_operations': 'int',
            'account_invalid': 'bool', 'account_failed': 'bool',
            'timed_stats': ['BlockDeviceTimedStats'],
-           '*x_rd_latency_histogram': 'BlockLatencyHistogramInfo',
-           '*x_wr_latency_histogram': 'BlockLatencyHistogramInfo',
-           '*x_flush_latency_histogram': 'BlockLatencyHistogramInfo' } }
+           '*rd_latency_histogram': 'BlockLatencyHistogramInfo',
+           '*wr_latency_histogram': 'BlockLatencyHistogramInfo',
+           '*flush_latency_histogram': 'BlockLatencyHistogramInfo' } }
 
 ##
 # @BlockStats:
@@ -3998,6 +4016,48 @@
 { 'command': 'blockdev-add', 'data': 'BlockdevOptions', 'boxed': true }
 
 ##
+# @x-blockdev-reopen:
+#
+# Reopens a block device using the given set of options. Any option
+# not specified will be reset to its default value regardless of its
+# previous status. If an option cannot be changed or a particular
+# driver does not support reopening then the command will return an
+# error.
+#
+# The top-level @node-name option (from BlockdevOptions) must be
+# specified and is used to select the block device to be reopened.
+# Other @node-name options must be either omitted or set to the
+# current name of the appropriate node. This command won't change any
+# node name and any attempt to do it will result in an error.
+#
+# In the case of options that refer to child nodes, the behavior of
+# this command depends on the value:
+#
+#  1) A set of options (BlockdevOptions): the child is reopened with
+#     the specified set of options.
+#
+#  2) A reference to the current child: the child is reopened using
+#     its existing set of options.
+#
+#  3) A reference to a different node: the current child is replaced
+#     with the specified one.
+#
+#  4) NULL: the current child (if any) is detached.
+#
+# Options (1) and (2) are supported in all cases, but at the moment
+# only @backing allows replacing or detaching an existing child.
+#
+# Unlike with blockdev-add, the @backing option must always be present
+# unless the node being reopened does not have a backing file and its
+# image does not have a default backing file name as part of its
+# metadata.
+#
+# Since: 4.0
+##
+{ 'command': 'x-blockdev-reopen',
+  'data': 'BlockdevOptions', 'boxed': true }
+
+##
 # @blockdev-del:
 #
 # Deletes a block device that has been added using blockdev-add.
diff --git a/qapi/ui.json b/qapi/ui.json
index c5d1d7f099..59e412139a 100644
--- a/qapi/ui.json
+++ b/qapi/ui.json
@@ -1081,6 +1081,19 @@
    'data'    : [ 'off', 'on', 'core', 'es' ] }
 
 ##
+# @DisplayCurses:
+#
+# Curses display options.
+#
+# @charset:       Font charset used by guest (default: CP437).
+#
+# Since: 4.0
+#
+##
+{ 'struct'  : 'DisplayCurses',
+  'data'    : { '*charset'       : 'str' } }
+
+##
 # @DisplayType:
 #
 # Display (user interface) type.
@@ -1142,6 +1155,7 @@
                 '*gl'            : 'DisplayGLMode' },
   'discriminator' : 'type',
   'data'    : { 'gtk'            : 'DisplayGTK',
+                'curses'         : 'DisplayCurses',
                 'egl-headless'   : 'DisplayEGLHeadless'} }
 
 ##
diff --git a/qemu-deprecated.texi b/qemu-deprecated.texi
index 1cf10fc78b..2219386769 100644
--- a/qemu-deprecated.texi
+++ b/qemu-deprecated.texi
@@ -79,6 +79,12 @@ the current values of the environment variables to ``-audiodev'' options.
 "autoload" parameter is now ignored. All bitmaps are automatically loaded
 from qcow2 images.
 
+@subsection query-block result field dirty-bitmaps[i].status (since 4.0)
+
+The ``status'' field of the ``BlockDirtyInfo'' structure, returned by
+the query-block command is deprecated. Two new boolean fields,
+``recording'' and ``busy'' effectively replace it.
+
 @subsection query-cpus (since 2.12.0)
 
 The ``query-cpus'' command is replaced by the ``query-cpus-fast'' command.
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index b9f189f09b..35dcdcf413 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -2080,8 +2080,8 @@ static int reopen_f(BlockBackend *blk, int argc, char **argv)
     }
 
     bdrv_subtree_drained_begin(bs);
-    brq = bdrv_reopen_queue(NULL, bs, opts);
-    bdrv_reopen_multiple(bdrv_get_aio_context(bs), brq, &local_err);
+    brq = bdrv_reopen_queue(NULL, bs, opts, true);
+    bdrv_reopen_multiple(brq, &local_err);
     bdrv_subtree_drained_end(bs);
 
     if (local_err) {
diff --git a/qemu-options.hx b/qemu-options.hx
index 8693f5fa3c..08749a3391 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1446,7 +1446,7 @@ DEF("display", HAS_ARG, QEMU_OPTION_display,
     "            [,window_close=on|off][,gl=on|core|es|off]\n"
     "-display gtk[,grab_on_hover=on|off][,gl=on|off]|\n"
     "-display vnc=<display>[,<optargs>]\n"
-    "-display curses\n"
+    "-display curses[,charset=<encoding>]\n"
     "-display none\n"
     "-display egl-headless[,rendernode=<file>]"
     "                select display type\n"
@@ -1478,6 +1478,9 @@ support a text mode, QEMU can display this output using a
 curses/ncurses interface. Nothing is displayed when the graphics
 device is in graphical mode or if the graphics device does not support
 a text mode. Generally only the VGA device models support text mode.
+The font charset used by the guest can be specified with the
+@code{charset} option, for example @code{charset=CP850} for IBM CP850
+encoding. The default is @code{CP437}.
 @item none
 Do not display video output. The guest will still see an emulated
 graphics card, but its output will not be displayed to the QEMU
diff --git a/target/riscv/Makefile.objs b/target/riscv/Makefile.objs
index 4072abe3e4..9c6c109327 100644
--- a/target/riscv/Makefile.objs
+++ b/target/riscv/Makefile.objs
@@ -1 +1,20 @@
 obj-y += translate.o op_helper.o cpu_helper.o cpu.o csr.o fpu_helper.o gdbstub.o pmp.o
+
+DECODETREE = $(SRC_PATH)/scripts/decodetree.py
+
+decode32-y = $(SRC_PATH)/target/riscv/insn32.decode
+decode32-$(TARGET_RISCV64) += $(SRC_PATH)/target/riscv/insn32-64.decode
+
+target/riscv/decode_insn32.inc.c: $(decode32-y) $(DECODETREE)
+	$(call quiet-command, \
+	  $(PYTHON) $(DECODETREE) -o $@ --decode decode_insn32 $(decode32-y), \
+	  "GEN", $(TARGET_DIR)$@)
+
+target/riscv/decode_insn16.inc.c: \
+  $(SRC_PATH)/target/riscv/insn16.decode $(DECODETREE)
+	$(call quiet-command, \
+	  $(PYTHON) $(DECODETREE) -o $@ --decode decode_insn16 --insnwidth 16 $<, \
+	  "GEN", $(TARGET_DIR)$@)
+
+target/riscv/translate.o: target/riscv/decode_insn32.inc.c \
+	target/riscv/decode_insn16.inc.c
diff --git a/target/riscv/insn16.decode b/target/riscv/insn16.decode
new file mode 100644
index 0000000000..17cc52cf2a
--- /dev/null
+++ b/target/riscv/insn16.decode
@@ -0,0 +1,129 @@
+#
+# RISC-V translation routines for the RVXI Base Integer Instruction Set.
+#
+# Copyright (c) 2018 Peer Adelt, peer.adelt@hni.uni-paderborn.de
+#                    Bastian Koppelmann, kbastian@mail.uni-paderborn.de
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2 or later, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# Fields:
+%rd        7:5
+%rs1_3     7:3                !function=ex_rvc_register
+%rs2_3     2:3                !function=ex_rvc_register
+%rs2_5     2:5
+
+# Immediates:
+%imm_ci        12:s1 2:5
+%nzuimm_ciw    7:4 11:2 5:1 6:1   !function=ex_shift_2
+%uimm_cl_d     5:2 10:3           !function=ex_shift_3
+%uimm_cl_w     5:1 10:3 6:1       !function=ex_shift_2
+%imm_cb        12:s1 5:2 2:1 10:2 3:2 !function=ex_shift_1
+%imm_cj        12:s1 8:1 9:2 6:1 7:1 2:1 11:1 3:3 !function=ex_shift_1
+
+%nzuimm_6bit   12:1 2:5
+%uimm_6bit_ld 2:3 12:1 5:2           !function=ex_shift_3
+%uimm_6bit_lw 2:2 12:1 4:3           !function=ex_shift_2
+%uimm_6bit_sd 7:3 10:3               !function=ex_shift_3
+%uimm_6bit_sw 7:2 9:4                !function=ex_shift_2
+
+%imm_addi16sp  12:s1 3:2 5:1 2:1 6:1 !function=ex_shift_4
+%imm_lui       12:s1 2:5             !function=ex_shift_12
+
+
+
+# Argument sets:
+&cl               rs1 rd
+&cl_dw     uimm   rs1 rd
+&ci        imm        rd
+&ciw       nzuimm     rd
+&cs               rs1 rs2
+&cs_dw     uimm   rs1 rs2
+&cb        imm    rs1
+&cr               rd  rs2
+&cj       imm
+&c_shift   shamt      rd
+
+&c_ld      uimm  rd
+&c_sd      uimm  rs2
+
+&caddi16sp_lui  imm_lui imm_addi16sp rd
+&cflwsp_ldsp    uimm_flwsp uimm_ldsp rd
+&cfswsp_sdsp    uimm_fswsp uimm_sdsp rs2
+
+# Formats 16:
+@cr        ....  ..... .....  .. &cr                      rs2=%rs2_5  %rd
+@ci        ... . ..... .....  .. &ci     imm=%imm_ci                  %rd
+@ciw       ...   ........ ... .. &ciw    nzuimm=%nzuimm_ciw           rd=%rs2_3
+@cl_d      ... ... ... .. ... .. &cl_dw  uimm=%uimm_cl_d  rs1=%rs1_3  rd=%rs2_3
+@cl_w      ... ... ... .. ... .. &cl_dw  uimm=%uimm_cl_w  rs1=%rs1_3  rd=%rs2_3
+@cl        ... ... ... .. ... .. &cl                      rs1=%rs1_3  rd=%rs2_3
+@cs        ... ... ... .. ... .. &cs                      rs1=%rs1_3  rs2=%rs2_3
+@cs_2      ... ... ... .. ... .. &cr                      rd=%rs1_3   rs2=%rs2_3
+@cs_d      ... ... ... .. ... .. &cs_dw  uimm=%uimm_cl_d  rs1=%rs1_3  rs2=%rs2_3
+@cs_w      ... ... ... .. ... .. &cs_dw  uimm=%uimm_cl_w  rs1=%rs1_3  rs2=%rs2_3
+@cb        ... ... ... .. ... .. &cb     imm=%imm_cb      rs1=%rs1_3
+@cj        ...    ........... .. &cj     imm=%imm_cj
+
+@c_ld      ... . .....  ..... .. &c_ld     uimm=%uimm_6bit_ld  %rd
+@c_lw      ... . .....  ..... .. &c_ld     uimm=%uimm_6bit_lw  %rd
+@c_sd      ... . .....  ..... .. &c_sd     uimm=%uimm_6bit_sd  rs2=%rs2_5
+@c_sw      ... . .....  ..... .. &c_sd     uimm=%uimm_6bit_sw  rs2=%rs2_5
+
+@c_addi16sp_lui ... .  ..... ..... .. &caddi16sp_lui %imm_lui %imm_addi16sp %rd
+@c_flwsp_ldsp   ... .  ..... ..... .. &cflwsp_ldsp uimm_flwsp=%uimm_6bit_lw \
+    uimm_ldsp=%uimm_6bit_ld %rd
+@c_fswsp_sdsp   ... .  ..... ..... .. &cfswsp_sdsp uimm_fswsp=%uimm_6bit_sw \
+    uimm_sdsp=%uimm_6bit_sd rs2=%rs2_5
+
+@c_shift        ... . .. ... ..... .. &c_shift rd=%rs1_3 shamt=%nzuimm_6bit
+@c_shift2       ... . .. ... ..... .. &c_shift rd=%rd    shamt=%nzuimm_6bit
+
+@c_andi         ... . .. ... ..... .. &ci imm=%imm_ci rd=%rs1_3
+
+# *** RV64C Standard Extension (Quadrant 0) ***
+c_addi4spn        000    ........ ... 00 @ciw
+c_fld             001  ... ... .. ... 00 @cl_d
+c_lw              010  ... ... .. ... 00 @cl_w
+c_flw_ld          011  --- ... -- ... 00 @cl    #Note: Must parse uimm manually
+c_fsd             101  ... ... .. ... 00 @cs_d
+c_sw              110  ... ... .. ... 00 @cs_w
+c_fsw_sd          111  --- ... -- ... 00 @cs    #Note: Must parse uimm manually
+
+# *** RV64C Standard Extension (Quadrant 1) ***
+c_addi            000 .  .....  ..... 01 @ci
+c_jal_addiw       001 .  .....  ..... 01 @ci #Note: parse rd and/or imm manually
+c_li              010 .  .....  ..... 01 @ci
+c_addi16sp_lui    011 .  .....  ..... 01 @c_addi16sp_lui # shares opc with C.LUI
+c_srli            100 . 00 ...  ..... 01 @c_shift
+c_srai            100 . 01 ...  ..... 01 @c_shift
+c_andi            100 . 10 ...  ..... 01 @c_andi
+c_sub             100 0 11 ... 00 ... 01 @cs_2
+c_xor             100 0 11 ... 01 ... 01 @cs_2
+c_or              100 0 11 ... 10 ... 01 @cs_2
+c_and             100 0 11 ... 11 ... 01 @cs_2
+c_subw            100 1 11 ... 00 ... 01 @cs_2
+c_addw            100 1 11 ... 01 ... 01 @cs_2
+c_j               101     ........... 01 @cj
+c_beqz            110  ... ...  ..... 01 @cb
+c_bnez            111  ... ...  ..... 01 @cb
+
+# *** RV64C Standard Extension (Quadrant 2) ***
+c_slli            000 .  .....  ..... 10 @c_shift2
+c_fldsp           001 .  .....  ..... 10 @c_ld
+c_lwsp            010 .  .....  ..... 10 @c_lw
+c_flwsp_ldsp      011 .  .....  ..... 10 @c_flwsp_ldsp #C.LDSP:RV64;C.FLWSP:RV32
+c_jr_mv           100 0  .....  ..... 10 @cr
+c_ebreak_jalr_add 100 1  .....  ..... 10 @cr
+c_fsdsp           101   ......  ..... 10 @c_sd
+c_swsp            110 .  .....  ..... 10 @c_sw
+c_fswsp_sdsp      111 .  .....  ..... 10 @c_fswsp_sdsp #C.SDSP:RV64;C.FSWSP:RV32
diff --git a/target/riscv/insn32-64.decode b/target/riscv/insn32-64.decode
new file mode 100644
index 0000000000..380bf791bc
--- /dev/null
+++ b/target/riscv/insn32-64.decode
@@ -0,0 +1,72 @@
+#
+# RISC-V translation routines for the RV Instruction Set.
+#
+# Copyright (c) 2018 Peer Adelt, peer.adelt@hni.uni-paderborn.de
+#                    Bastian Koppelmann, kbastian@mail.uni-paderborn.de
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2 or later, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# This is concatenated with insn32.decode for risc64 targets.
+# Most of the fields and formats are there.
+
+%sh5    20:5
+
+@sh5     .......  ..... .....  ... ..... ....... &shift  shamt=%sh5      %rs1 %rd
+
+# *** RV64I Base Instruction Set (in addition to RV32I) ***
+lwu      ............   ..... 110 ..... 0000011 @i
+ld       ............   ..... 011 ..... 0000011 @i
+sd       ....... .....  ..... 011 ..... 0100011 @s
+addiw    ............   ..... 000 ..... 0011011 @i
+slliw    0000000 .....  ..... 001 ..... 0011011 @sh5
+srliw    0000000 .....  ..... 101 ..... 0011011 @sh5
+sraiw    0100000 .....  ..... 101 ..... 0011011 @sh5
+addw     0000000 .....  ..... 000 ..... 0111011 @r
+subw     0100000 .....  ..... 000 ..... 0111011 @r
+sllw     0000000 .....  ..... 001 ..... 0111011 @r
+srlw     0000000 .....  ..... 101 ..... 0111011 @r
+sraw     0100000 .....  ..... 101 ..... 0111011 @r
+
+# *** RV64M Standard Extension (in addition to RV32M) ***
+mulw     0000001 .....  ..... 000 ..... 0111011 @r
+divw     0000001 .....  ..... 100 ..... 0111011 @r
+divuw    0000001 .....  ..... 101 ..... 0111011 @r
+remw     0000001 .....  ..... 110 ..... 0111011 @r
+remuw    0000001 .....  ..... 111 ..... 0111011 @r
+
+# *** RV64A Standard Extension (in addition to RV32A) ***
+lr_d       00010 . . 00000 ..... 011 ..... 0101111 @atom_ld
+sc_d       00011 . . ..... ..... 011 ..... 0101111 @atom_st
+amoswap_d  00001 . . ..... ..... 011 ..... 0101111 @atom_st
+amoadd_d   00000 . . ..... ..... 011 ..... 0101111 @atom_st
+amoxor_d   00100 . . ..... ..... 011 ..... 0101111 @atom_st
+amoand_d   01100 . . ..... ..... 011 ..... 0101111 @atom_st
+amoor_d    01000 . . ..... ..... 011 ..... 0101111 @atom_st
+amomin_d   10000 . . ..... ..... 011 ..... 0101111 @atom_st
+amomax_d   10100 . . ..... ..... 011 ..... 0101111 @atom_st
+amominu_d  11000 . . ..... ..... 011 ..... 0101111 @atom_st
+amomaxu_d  11100 . . ..... ..... 011 ..... 0101111 @atom_st
+
+# *** RV64F Standard Extension (in addition to RV32F) ***
+fcvt_l_s   1100000  00010 ..... ... ..... 1010011 @r2_rm
+fcvt_lu_s  1100000  00011 ..... ... ..... 1010011 @r2_rm
+fcvt_s_l   1101000  00010 ..... ... ..... 1010011 @r2_rm
+fcvt_s_lu  1101000  00011 ..... ... ..... 1010011 @r2_rm
+
+# *** RV64D Standard Extension (in addition to RV32D) ***
+fcvt_l_d   1100001  00010 ..... ... ..... 1010011 @r2_rm
+fcvt_lu_d  1100001  00011 ..... ... ..... 1010011 @r2_rm
+fmv_x_d    1110001  00000 ..... 000 ..... 1010011 @r2
+fcvt_d_l   1101001  00010 ..... ... ..... 1010011 @r2_rm
+fcvt_d_lu  1101001  00011 ..... ... ..... 1010011 @r2_rm
+fmv_d_x    1111001  00000 ..... 000 ..... 1010011 @r2
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
new file mode 100644
index 0000000000..6f3ab7aa52
--- /dev/null
+++ b/target/riscv/insn32.decode
@@ -0,0 +1,201 @@
+#
+# RISC-V translation routines for the RVXI Base Integer Instruction Set.
+#
+# Copyright (c) 2018 Peer Adelt, peer.adelt@hni.uni-paderborn.de
+#                    Bastian Koppelmann, kbastian@mail.uni-paderborn.de
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2 or later, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# Fields:
+%rs3       27:5
+%rs2       20:5
+%rs1       15:5
+%rd        7:5
+
+%sh10    20:10
+%csr    20:12
+%rm     12:3
+
+# immediates:
+%imm_i    20:s12
+%imm_s    25:s7 7:5
+%imm_b    31:s1 7:1 25:6 8:4     !function=ex_shift_1
+%imm_j    31:s1 12:8 20:1 21:10  !function=ex_shift_1
+%imm_u    12:s20                 !function=ex_shift_12
+
+# Argument sets:
+&b    imm rs2 rs1
+&i    imm rs1 rd
+&r    rd rs1 rs2
+&shift     shamt rs1 rd
+&atomic    aq rl rs2 rs1 rd
+
+# Formats 32:
+@r       .......   ..... ..... ... ..... ....... &r                %rs2 %rs1 %rd
+@i       ............    ..... ... ..... ....... &i      imm=%imm_i     %rs1 %rd
+@b       .......   ..... ..... ... ..... ....... &b      imm=%imm_b %rs2 %rs1
+@s       .......   ..... ..... ... ..... .......         imm=%imm_s %rs2 %rs1
+@u       ....................      ..... .......         imm=%imm_u          %rd
+@j       ....................      ..... .......         imm=%imm_j          %rd
+
+@sh      ......  ...... .....  ... ..... ....... &shift  shamt=%sh10      %rs1 %rd
+@csr     ............   .....  ... ..... .......               %csr     %rs1 %rd
+
+@atom_ld ..... aq:1 rl:1 ..... ........ ..... ....... &atomic rs2=0     %rs1 %rd
+@atom_st ..... aq:1 rl:1 ..... ........ ..... ....... &atomic %rs2      %rs1 %rd
+
+@r4_rm   ..... ..  ..... ..... ... ..... ....... %rs3 %rs2 %rs1 %rm %rd
+@r_rm    .......   ..... ..... ... ..... ....... %rs2 %rs1 %rm %rd
+@r2_rm   .......   ..... ..... ... ..... ....... %rs1 %rm %rd
+@r2      .......   ..... ..... ... ..... ....... %rs1 %rd
+
+@sfence_vma ....... ..... .....   ... ..... ....... %rs2 %rs1
+@sfence_vm  ....... ..... .....   ... ..... ....... %rs1
+
+
+# *** Privileged Instructions ***
+ecall      000000000000     00000 000 00000 1110011
+ebreak     000000000001     00000 000 00000 1110011
+uret       0000000    00010 00000 000 00000 1110011
+sret       0001000    00010 00000 000 00000 1110011
+hret       0010000    00010 00000 000 00000 1110011
+mret       0011000    00010 00000 000 00000 1110011
+wfi        0001000    00101 00000 000 00000 1110011
+sfence_vma 0001001    ..... ..... 000 00000 1110011 @sfence_vma
+sfence_vm  0001000    00100 ..... 000 00000 1110011 @sfence_vm
+
+# *** RV32I Base Instruction Set ***
+lui      ....................       ..... 0110111 @u
+auipc    ....................       ..... 0010111 @u
+jal      ....................       ..... 1101111 @j
+jalr     ............     ..... 000 ..... 1100111 @i
+beq      ....... .....    ..... 000 ..... 1100011 @b
+bne      ....... .....    ..... 001 ..... 1100011 @b
+blt      ....... .....    ..... 100 ..... 1100011 @b
+bge      ....... .....    ..... 101 ..... 1100011 @b
+bltu     ....... .....    ..... 110 ..... 1100011 @b
+bgeu     ....... .....    ..... 111 ..... 1100011 @b
+lb       ............     ..... 000 ..... 0000011 @i
+lh       ............     ..... 001 ..... 0000011 @i
+lw       ............     ..... 010 ..... 0000011 @i
+lbu      ............     ..... 100 ..... 0000011 @i
+lhu      ............     ..... 101 ..... 0000011 @i
+sb       .......  .....   ..... 000 ..... 0100011 @s
+sh       .......  .....   ..... 001 ..... 0100011 @s
+sw       .......  .....   ..... 010 ..... 0100011 @s
+addi     ............     ..... 000 ..... 0010011 @i
+slti     ............     ..... 010 ..... 0010011 @i
+sltiu    ............     ..... 011 ..... 0010011 @i
+xori     ............     ..... 100 ..... 0010011 @i
+ori      ............     ..... 110 ..... 0010011 @i
+andi     ............     ..... 111 ..... 0010011 @i
+slli     00.... ......    ..... 001 ..... 0010011 @sh
+srli     00.... ......    ..... 101 ..... 0010011 @sh
+srai     01.... ......    ..... 101 ..... 0010011 @sh
+add      0000000 .....    ..... 000 ..... 0110011 @r
+sub      0100000 .....    ..... 000 ..... 0110011 @r
+sll      0000000 .....    ..... 001 ..... 0110011 @r
+slt      0000000 .....    ..... 010 ..... 0110011 @r
+sltu     0000000 .....    ..... 011 ..... 0110011 @r
+xor      0000000 .....    ..... 100 ..... 0110011 @r
+srl      0000000 .....    ..... 101 ..... 0110011 @r
+sra      0100000 .....    ..... 101 ..... 0110011 @r
+or       0000000 .....    ..... 110 ..... 0110011 @r
+and      0000000 .....    ..... 111 ..... 0110011 @r
+fence    ---- pred:4 succ:4 ----- 000 ----- 0001111
+fence_i  ---- ----   ----   ----- 001 ----- 0001111
+csrrw    ............     ..... 001 ..... 1110011 @csr
+csrrs    ............     ..... 010 ..... 1110011 @csr
+csrrc    ............     ..... 011 ..... 1110011 @csr
+csrrwi   ............     ..... 101 ..... 1110011 @csr
+csrrsi   ............     ..... 110 ..... 1110011 @csr
+csrrci   ............     ..... 111 ..... 1110011 @csr
+
+# *** RV32M Standard Extension ***
+mul      0000001 .....  ..... 000 ..... 0110011 @r
+mulh     0000001 .....  ..... 001 ..... 0110011 @r
+mulhsu   0000001 .....  ..... 010 ..... 0110011 @r
+mulhu    0000001 .....  ..... 011 ..... 0110011 @r
+div      0000001 .....  ..... 100 ..... 0110011 @r
+divu     0000001 .....  ..... 101 ..... 0110011 @r
+rem      0000001 .....  ..... 110 ..... 0110011 @r
+remu     0000001 .....  ..... 111 ..... 0110011 @r
+
+# *** RV32A Standard Extension ***
+lr_w       00010 . . 00000 ..... 010 ..... 0101111 @atom_ld
+sc_w       00011 . . ..... ..... 010 ..... 0101111 @atom_st
+amoswap_w  00001 . . ..... ..... 010 ..... 0101111 @atom_st
+amoadd_w   00000 . . ..... ..... 010 ..... 0101111 @atom_st
+amoxor_w   00100 . . ..... ..... 010 ..... 0101111 @atom_st
+amoand_w   01100 . . ..... ..... 010 ..... 0101111 @atom_st
+amoor_w    01000 . . ..... ..... 010 ..... 0101111 @atom_st
+amomin_w   10000 . . ..... ..... 010 ..... 0101111 @atom_st
+amomax_w   10100 . . ..... ..... 010 ..... 0101111 @atom_st
+amominu_w  11000 . . ..... ..... 010 ..... 0101111 @atom_st
+amomaxu_w  11100 . . ..... ..... 010 ..... 0101111 @atom_st
+
+# *** RV32F Standard Extension ***
+flw        ............   ..... 010 ..... 0000111 @i
+fsw        .......  ..... ..... 010 ..... 0100111 @s
+fmadd_s    ..... 00 ..... ..... ... ..... 1000011 @r4_rm
+fmsub_s    ..... 00 ..... ..... ... ..... 1000111 @r4_rm
+fnmsub_s   ..... 00 ..... ..... ... ..... 1001011 @r4_rm
+fnmadd_s   ..... 00 ..... ..... ... ..... 1001111 @r4_rm
+fadd_s     0000000  ..... ..... ... ..... 1010011 @r_rm
+fsub_s     0000100  ..... ..... ... ..... 1010011 @r_rm
+fmul_s     0001000  ..... ..... ... ..... 1010011 @r_rm
+fdiv_s     0001100  ..... ..... ... ..... 1010011 @r_rm
+fsqrt_s    0101100  00000 ..... ... ..... 1010011 @r2_rm
+fsgnj_s    0010000  ..... ..... 000 ..... 1010011 @r
+fsgnjn_s   0010000  ..... ..... 001 ..... 1010011 @r
+fsgnjx_s   0010000  ..... ..... 010 ..... 1010011 @r
+fmin_s     0010100  ..... ..... 000 ..... 1010011 @r
+fmax_s     0010100  ..... ..... 001 ..... 1010011 @r
+fcvt_w_s   1100000  00000 ..... ... ..... 1010011 @r2_rm
+fcvt_wu_s  1100000  00001 ..... ... ..... 1010011 @r2_rm
+fmv_x_w    1110000  00000 ..... 000 ..... 1010011 @r2
+feq_s      1010000  ..... ..... 010 ..... 1010011 @r
+flt_s      1010000  ..... ..... 001 ..... 1010011 @r
+fle_s      1010000  ..... ..... 000 ..... 1010011 @r
+fclass_s   1110000  00000 ..... 001 ..... 1010011 @r2
+fcvt_s_w   1101000  00000 ..... ... ..... 1010011 @r2_rm
+fcvt_s_wu  1101000  00001 ..... ... ..... 1010011 @r2_rm
+fmv_w_x    1111000  00000 ..... 000 ..... 1010011 @r2
+
+# *** RV32D Standard Extension ***
+fld        ............   ..... 011 ..... 0000111 @i
+fsd        ....... .....  ..... 011 ..... 0100111 @s
+fmadd_d    ..... 01 ..... ..... ... ..... 1000011 @r4_rm
+fmsub_d    ..... 01 ..... ..... ... ..... 1000111 @r4_rm
+fnmsub_d   ..... 01 ..... ..... ... ..... 1001011 @r4_rm
+fnmadd_d   ..... 01 ..... ..... ... ..... 1001111 @r4_rm
+fadd_d     0000001  ..... ..... ... ..... 1010011 @r_rm
+fsub_d     0000101  ..... ..... ... ..... 1010011 @r_rm
+fmul_d     0001001  ..... ..... ... ..... 1010011 @r_rm
+fdiv_d     0001101  ..... ..... ... ..... 1010011 @r_rm
+fsqrt_d    0101101  00000 ..... ... ..... 1010011 @r2_rm
+fsgnj_d    0010001  ..... ..... 000 ..... 1010011 @r
+fsgnjn_d   0010001  ..... ..... 001 ..... 1010011 @r
+fsgnjx_d   0010001  ..... ..... 010 ..... 1010011 @r
+fmin_d     0010101  ..... ..... 000 ..... 1010011 @r
+fmax_d     0010101  ..... ..... 001 ..... 1010011 @r
+fcvt_s_d   0100000  00001 ..... ... ..... 1010011 @r2_rm
+fcvt_d_s   0100001  00000 ..... ... ..... 1010011 @r2_rm
+feq_d      1010001  ..... ..... 010 ..... 1010011 @r
+flt_d      1010001  ..... ..... 001 ..... 1010011 @r
+fle_d      1010001  ..... ..... 000 ..... 1010011 @r
+fclass_d   1110001  00000 ..... 001 ..... 1010011 @r2
+fcvt_w_d   1100001  00000 ..... ... ..... 1010011 @r2_rm
+fcvt_wu_d  1100001  00001 ..... ... ..... 1010011 @r2_rm
+fcvt_d_w   1101001  00000 ..... ... ..... 1010011 @r2_rm
+fcvt_d_wu  1101001  00001 ..... ... ..... 1010011 @r2_rm
diff --git a/target/riscv/insn_trans/trans_privileged.inc.c b/target/riscv/insn_trans/trans_privileged.inc.c
new file mode 100644
index 0000000000..acb605923e
--- /dev/null
+++ b/target/riscv/insn_trans/trans_privileged.inc.c
@@ -0,0 +1,110 @@
+/*
+ * RISC-V translation routines for the RISC-V privileged instructions.
+ *
+ * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu
+ * Copyright (c) 2018 Peer Adelt, peer.adelt@hni.uni-paderborn.de
+ *                    Bastian Koppelmann, kbastian@mail.uni-paderborn.de
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+static bool trans_ecall(DisasContext *ctx, arg_ecall *a)
+{
+    /* always generates U-level ECALL, fixed in do_interrupt handler */
+    generate_exception(ctx, RISCV_EXCP_U_ECALL);
+    tcg_gen_exit_tb(NULL, 0); /* no chaining */
+    ctx->base.is_jmp = DISAS_NORETURN;
+    return true;
+}
+
+static bool trans_ebreak(DisasContext *ctx, arg_ebreak *a)
+{
+    generate_exception(ctx, RISCV_EXCP_BREAKPOINT);
+    tcg_gen_exit_tb(NULL, 0); /* no chaining */
+    ctx->base.is_jmp = DISAS_NORETURN;
+    return true;
+}
+
+static bool trans_uret(DisasContext *ctx, arg_uret *a)
+{
+    return false;
+}
+
+static bool trans_sret(DisasContext *ctx, arg_sret *a)
+{
+#ifndef CONFIG_USER_ONLY
+    tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);
+
+    if (has_ext(ctx, RVS)) {
+        gen_helper_sret(cpu_pc, cpu_env, cpu_pc);
+        tcg_gen_exit_tb(NULL, 0); /* no chaining */
+        ctx->base.is_jmp = DISAS_NORETURN;
+    } else {
+        return false;
+    }
+    return true;
+#else
+    return false;
+#endif
+}
+
+static bool trans_hret(DisasContext *ctx, arg_hret *a)
+{
+    return false;
+}
+
+static bool trans_mret(DisasContext *ctx, arg_mret *a)
+{
+#ifndef CONFIG_USER_ONLY
+    tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);
+    gen_helper_mret(cpu_pc, cpu_env, cpu_pc);
+    tcg_gen_exit_tb(NULL, 0); /* no chaining */
+    ctx->base.is_jmp = DISAS_NORETURN;
+    return true;
+#else
+    return false;
+#endif
+}
+
+static bool trans_wfi(DisasContext *ctx, arg_wfi *a)
+{
+#ifndef CONFIG_USER_ONLY
+    tcg_gen_movi_tl(cpu_pc, ctx->pc_succ_insn);
+    gen_helper_wfi(cpu_env);
+    return true;
+#else
+    return false;
+#endif
+}
+
+static bool trans_sfence_vma(DisasContext *ctx, arg_sfence_vma *a)
+{
+#ifndef CONFIG_USER_ONLY
+    if (ctx->priv_ver == PRIV_VERSION_1_10_0) {
+        gen_helper_tlb_flush(cpu_env);
+        return true;
+    }
+#endif
+    return false;
+}
+
+static bool trans_sfence_vm(DisasContext *ctx, arg_sfence_vm *a)
+{
+#ifndef CONFIG_USER_ONLY
+    if (ctx->priv_ver <= PRIV_VERSION_1_09_1) {
+        gen_helper_tlb_flush(cpu_env);
+        return true;
+    }
+#endif
+    return false;
+}
diff --git a/target/riscv/insn_trans/trans_rva.inc.c b/target/riscv/insn_trans/trans_rva.inc.c
new file mode 100644
index 0000000000..f6dbbc065e
--- /dev/null
+++ b/target/riscv/insn_trans/trans_rva.inc.c
@@ -0,0 +1,218 @@
+/*
+ * RISC-V translation routines for the RV64A Standard Extension.
+ *
+ * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu
+ * Copyright (c) 2018 Peer Adelt, peer.adelt@hni.uni-paderborn.de
+ *                    Bastian Koppelmann, kbastian@mail.uni-paderborn.de
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+static inline bool gen_lr(DisasContext *ctx, arg_atomic *a, TCGMemOp mop)
+{
+    TCGv src1 = tcg_temp_new();
+    /* Put addr in load_res, data in load_val.  */
+    gen_get_gpr(src1, a->rs1);
+    if (a->rl) {
+        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
+    }
+    tcg_gen_qemu_ld_tl(load_val, src1, ctx->mem_idx, mop);
+    if (a->aq) {
+        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
+    }
+    tcg_gen_mov_tl(load_res, src1);
+    gen_set_gpr(a->rd, load_val);
+
+    tcg_temp_free(src1);
+    return true;
+}
+
+static inline bool gen_sc(DisasContext *ctx, arg_atomic *a, TCGMemOp mop)
+{
+    TCGv src1 = tcg_temp_new();
+    TCGv src2 = tcg_temp_new();
+    TCGv dat = tcg_temp_new();
+    TCGLabel *l1 = gen_new_label();
+    TCGLabel *l2 = gen_new_label();
+
+    gen_get_gpr(src1, a->rs1);
+    tcg_gen_brcond_tl(TCG_COND_NE, load_res, src1, l1);
+
+    gen_get_gpr(src2, a->rs2);
+    /*
+     * Note that the TCG atomic primitives are SC,
+     * so we can ignore AQ/RL along this path.
+     */
+    tcg_gen_atomic_cmpxchg_tl(src1, load_res, load_val, src2,
+                              ctx->mem_idx, mop);
+    tcg_gen_setcond_tl(TCG_COND_NE, dat, src1, load_val);
+    gen_set_gpr(a->rd, dat);
+    tcg_gen_br(l2);
+
+    gen_set_label(l1);
+    /*
+     * Address comparion failure.  However, we still need to
+     * provide the memory barrier implied by AQ/RL.
+     */
+    tcg_gen_mb(TCG_MO_ALL + a->aq * TCG_BAR_LDAQ + a->rl * TCG_BAR_STRL);
+    tcg_gen_movi_tl(dat, 1);
+    gen_set_gpr(a->rd, dat);
+
+    gen_set_label(l2);
+    tcg_temp_free(dat);
+    tcg_temp_free(src1);
+    tcg_temp_free(src2);
+    return true;
+}
+
+static bool gen_amo(DisasContext *ctx, arg_atomic *a,
+                    void(*func)(TCGv, TCGv, TCGv, TCGArg, TCGMemOp),
+                    TCGMemOp mop)
+{
+    TCGv src1 = tcg_temp_new();
+    TCGv src2 = tcg_temp_new();
+
+    gen_get_gpr(src1, a->rs1);
+    gen_get_gpr(src2, a->rs2);
+
+    (*func)(src2, src1, src2, ctx->mem_idx, mop);
+
+    gen_set_gpr(a->rd, src2);
+    tcg_temp_free(src1);
+    tcg_temp_free(src2);
+    return true;
+}
+
+static bool trans_lr_w(DisasContext *ctx, arg_lr_w *a)
+{
+    REQUIRE_EXT(ctx, RVA);
+    return gen_lr(ctx, a, (MO_ALIGN | MO_TESL));
+}
+
+static bool trans_sc_w(DisasContext *ctx, arg_sc_w *a)
+{
+    REQUIRE_EXT(ctx, RVA);
+    return gen_sc(ctx, a, (MO_ALIGN | MO_TESL));
+}
+
+static bool trans_amoswap_w(DisasContext *ctx, arg_amoswap_w *a)
+{
+    REQUIRE_EXT(ctx, RVA);
+    return gen_amo(ctx, a, &tcg_gen_atomic_xchg_tl, (MO_ALIGN | MO_TESL));
+}
+
+static bool trans_amoadd_w(DisasContext *ctx, arg_amoadd_w *a)
+{
+    REQUIRE_EXT(ctx, RVA);
+    return gen_amo(ctx, a, &tcg_gen_atomic_fetch_add_tl, (MO_ALIGN | MO_TESL));
+}
+
+static bool trans_amoxor_w(DisasContext *ctx, arg_amoxor_w *a)
+{
+    REQUIRE_EXT(ctx, RVA);
+    return gen_amo(ctx, a, &tcg_gen_atomic_fetch_xor_tl, (MO_ALIGN | MO_TESL));
+}
+
+static bool trans_amoand_w(DisasContext *ctx, arg_amoand_w *a)
+{
+    REQUIRE_EXT(ctx, RVA);
+    return gen_amo(ctx, a, &tcg_gen_atomic_fetch_and_tl, (MO_ALIGN | MO_TESL));
+}
+
+static bool trans_amoor_w(DisasContext *ctx, arg_amoor_w *a)
+{
+    REQUIRE_EXT(ctx, RVA);
+    return gen_amo(ctx, a, &tcg_gen_atomic_fetch_or_tl, (MO_ALIGN | MO_TESL));
+}
+
+static bool trans_amomin_w(DisasContext *ctx, arg_amomin_w *a)
+{
+    REQUIRE_EXT(ctx, RVA);
+    return gen_amo(ctx, a, &tcg_gen_atomic_fetch_smin_tl, (MO_ALIGN | MO_TESL));
+}
+
+static bool trans_amomax_w(DisasContext *ctx, arg_amomax_w *a)
+{
+    REQUIRE_EXT(ctx, RVA);
+    return gen_amo(ctx, a, &tcg_gen_atomic_fetch_smax_tl, (MO_ALIGN | MO_TESL));
+}
+
+static bool trans_amominu_w(DisasContext *ctx, arg_amominu_w *a)
+{
+    REQUIRE_EXT(ctx, RVA);
+    return gen_amo(ctx, a, &tcg_gen_atomic_fetch_umin_tl, (MO_ALIGN | MO_TESL));
+}
+
+static bool trans_amomaxu_w(DisasContext *ctx, arg_amomaxu_w *a)
+{
+    REQUIRE_EXT(ctx, RVA);
+    return gen_amo(ctx, a, &tcg_gen_atomic_fetch_umax_tl, (MO_ALIGN | MO_TESL));
+}
+
+#ifdef TARGET_RISCV64
+
+static bool trans_lr_d(DisasContext *ctx, arg_lr_d *a)
+{
+    return gen_lr(ctx, a, MO_ALIGN | MO_TEQ);
+}
+
+static bool trans_sc_d(DisasContext *ctx, arg_sc_d *a)
+{
+    return gen_sc(ctx, a, (MO_ALIGN | MO_TEQ));
+}
+
+static bool trans_amoswap_d(DisasContext *ctx, arg_amoswap_d *a)
+{
+    return gen_amo(ctx, a, &tcg_gen_atomic_xchg_tl, (MO_ALIGN | MO_TEQ));
+}
+
+static bool trans_amoadd_d(DisasContext *ctx, arg_amoadd_d *a)
+{
+    return gen_amo(ctx, a, &tcg_gen_atomic_fetch_add_tl, (MO_ALIGN | MO_TEQ));
+}
+
+static bool trans_amoxor_d(DisasContext *ctx, arg_amoxor_d *a)
+{
+    return gen_amo(ctx, a, &tcg_gen_atomic_fetch_xor_tl, (MO_ALIGN | MO_TEQ));
+}
+
+static bool trans_amoand_d(DisasContext *ctx, arg_amoand_d *a)
+{
+    return gen_amo(ctx, a, &tcg_gen_atomic_fetch_and_tl, (MO_ALIGN | MO_TEQ));
+}
+
+static bool trans_amoor_d(DisasContext *ctx, arg_amoor_d *a)
+{
+    return gen_amo(ctx, a, &tcg_gen_atomic_fetch_or_tl, (MO_ALIGN | MO_TEQ));
+}
+
+static bool trans_amomin_d(DisasContext *ctx, arg_amomin_d *a)
+{
+    return gen_amo(ctx, a, &tcg_gen_atomic_fetch_smin_tl, (MO_ALIGN | MO_TEQ));
+}
+
+static bool trans_amomax_d(DisasContext *ctx, arg_amomax_d *a)
+{
+    return gen_amo(ctx, a, &tcg_gen_atomic_fetch_smax_tl, (MO_ALIGN | MO_TEQ));
+}
+
+static bool trans_amominu_d(DisasContext *ctx, arg_amominu_d *a)
+{
+    return gen_amo(ctx, a, &tcg_gen_atomic_fetch_umin_tl, (MO_ALIGN | MO_TEQ));
+}
+
+static bool trans_amomaxu_d(DisasContext *ctx, arg_amomaxu_d *a)
+{
+    return gen_amo(ctx, a, &tcg_gen_atomic_fetch_umax_tl, (MO_ALIGN | MO_TEQ));
+}
+#endif
diff --git a/target/riscv/insn_trans/trans_rvc.inc.c b/target/riscv/insn_trans/trans_rvc.inc.c
new file mode 100644
index 0000000000..bcdf64d3b7
--- /dev/null
+++ b/target/riscv/insn_trans/trans_rvc.inc.c
@@ -0,0 +1,327 @@
+/*
+ * RISC-V translation routines for the RVC Compressed Instruction Set.
+ *
+ * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu
+ * Copyright (c) 2018 Peer Adelt, peer.adelt@hni.uni-paderborn.de
+ *                    Bastian Koppelmann, kbastian@mail.uni-paderborn.de
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+static bool trans_c_addi4spn(DisasContext *ctx, arg_c_addi4spn *a)
+{
+    if (a->nzuimm == 0) {
+        /* Reserved in ISA */
+        return false;
+    }
+    arg_addi arg = { .rd = a->rd, .rs1 = 2, .imm = a->nzuimm };
+    return trans_addi(ctx, &arg);
+}
+
+static bool trans_c_fld(DisasContext *ctx, arg_c_fld *a)
+{
+    arg_fld arg = { .rd = a->rd, .rs1 = a->rs1, .imm = a->uimm };
+    return trans_fld(ctx, &arg);
+}
+
+static bool trans_c_lw(DisasContext *ctx, arg_c_lw *a)
+{
+    arg_lw arg = { .rd = a->rd, .rs1 = a->rs1, .imm = a->uimm };
+    return trans_lw(ctx, &arg);
+}
+
+static bool trans_c_flw_ld(DisasContext *ctx, arg_c_flw_ld *a)
+{
+#ifdef TARGET_RISCV32
+    /* C.FLW ( RV32FC-only ) */
+    return false;
+#else
+    /* C.LD ( RV64C/RV128C-only ) */
+    return false;
+#endif
+}
+
+static bool trans_c_fsd(DisasContext *ctx, arg_c_fsd *a)
+{
+    arg_fsd arg = { .rs1 = a->rs1, .rs2 = a->rs2, .imm = a->uimm };
+    return trans_fsd(ctx, &arg);
+}
+
+static bool trans_c_sw(DisasContext *ctx, arg_c_sw *a)
+{
+    arg_sw arg = { .rs1 = a->rs1, .rs2 = a->rs2, .imm = a->uimm };
+    return trans_sw(ctx, &arg);
+}
+
+static bool trans_c_fsw_sd(DisasContext *ctx, arg_c_fsw_sd *a)
+{
+#ifdef TARGET_RISCV32
+    /* C.FSW ( RV32FC-only ) */
+    return false;
+#else
+    /* C.SD ( RV64C/RV128C-only ) */
+    return false;
+#endif
+}
+
+static bool trans_c_addi(DisasContext *ctx, arg_c_addi *a)
+{
+    if (a->imm == 0) {
+        /* Hint: insn is valid but does not affect state */
+        return true;
+    }
+    arg_addi arg = { .rd = a->rd, .rs1 = a->rd, .imm = a->imm };
+    return trans_addi(ctx, &arg);
+}
+
+static bool trans_c_jal_addiw(DisasContext *ctx, arg_c_jal_addiw *a)
+{
+#ifdef TARGET_RISCV32
+    /* C.JAL */
+    arg_jal arg = { .rd = 1, .imm = a->imm };
+    return trans_jal(ctx, &arg);
+#else
+    /* C.ADDIW */
+    arg_addiw arg = { .rd = a->rd, .rs1 = a->rd, .imm = a->imm };
+    return trans_addiw(ctx, &arg);
+#endif
+}
+
+static bool trans_c_li(DisasContext *ctx, arg_c_li *a)
+{
+    if (a->rd == 0) {
+        /* Hint: insn is valid but does not affect state */
+        return true;
+    }
+    arg_addi arg = { .rd = a->rd, .rs1 = 0, .imm = a->imm };
+    return trans_addi(ctx, &arg);
+}
+
+static bool trans_c_addi16sp_lui(DisasContext *ctx, arg_c_addi16sp_lui *a)
+{
+    if (a->rd == 2) {
+        /* C.ADDI16SP */
+        arg_addi arg = { .rd = 2, .rs1 = 2, .imm = a->imm_addi16sp };
+        return trans_addi(ctx, &arg);
+    } else if (a->imm_lui != 0) {
+        /* C.LUI */
+        if (a->rd == 0) {
+            /* Hint: insn is valid but does not affect state */
+            return true;
+        }
+        arg_lui arg = { .rd = a->rd, .imm = a->imm_lui };
+        return trans_lui(ctx, &arg);
+    }
+    return false;
+}
+
+static bool trans_c_srli(DisasContext *ctx, arg_c_srli *a)
+{
+    int shamt = a->shamt;
+    if (shamt == 0) {
+        /* For RV128 a shamt of 0 means a shift by 64 */
+        shamt = 64;
+    }
+    /* Ensure, that shamt[5] is zero for RV32 */
+    if (shamt >= TARGET_LONG_BITS) {
+        return false;
+    }
+
+    arg_srli arg = { .rd = a->rd, .rs1 = a->rd, .shamt = a->shamt };
+    return trans_srli(ctx, &arg);
+}
+
+static bool trans_c_srai(DisasContext *ctx, arg_c_srai *a)
+{
+    int shamt = a->shamt;
+    if (shamt == 0) {
+        /* For RV128 a shamt of 0 means a shift by 64 */
+        shamt = 64;
+    }
+    /* Ensure, that shamt[5] is zero for RV32 */
+    if (shamt >= TARGET_LONG_BITS) {
+        return false;
+    }
+
+    arg_srai arg = { .rd = a->rd, .rs1 = a->rd, .shamt = a->shamt };
+    return trans_srai(ctx, &arg);
+}
+
+static bool trans_c_andi(DisasContext *ctx, arg_c_andi *a)
+{
+    arg_andi arg = { .rd = a->rd, .rs1 = a->rd, .imm = a->imm };
+    return trans_andi(ctx, &arg);
+}
+
+static bool trans_c_sub(DisasContext *ctx, arg_c_sub *a)
+{
+    arg_sub arg = { .rd = a->rd, .rs1 = a->rd, .rs2 = a->rs2 };
+    return trans_sub(ctx, &arg);
+}
+
+static bool trans_c_xor(DisasContext *ctx, arg_c_xor *a)
+{
+    arg_xor arg = { .rd = a->rd, .rs1 = a->rd, .rs2 = a->rs2 };
+    return trans_xor(ctx, &arg);
+}
+
+static bool trans_c_or(DisasContext *ctx, arg_c_or *a)
+{
+    arg_or arg = { .rd = a->rd, .rs1 = a->rd, .rs2 = a->rs2 };
+    return trans_or(ctx, &arg);
+}
+
+static bool trans_c_and(DisasContext *ctx, arg_c_and *a)
+{
+    arg_and arg = { .rd = a->rd, .rs1 = a->rd, .rs2 = a->rs2 };
+    return trans_and(ctx, &arg);
+}
+
+static bool trans_c_subw(DisasContext *ctx, arg_c_subw *a)
+{
+#ifdef TARGET_RISCV64
+    arg_subw arg = { .rd = a->rd, .rs1 = a->rd, .rs2 = a->rs2 };
+    return trans_subw(ctx, &arg);
+#else
+    return false;
+#endif
+}
+
+static bool trans_c_addw(DisasContext *ctx, arg_c_addw *a)
+{
+#ifdef TARGET_RISCV64
+    arg_addw arg = { .rd = a->rd, .rs1 = a->rd, .rs2 = a->rs2 };
+    return trans_addw(ctx, &arg);
+#else
+    return false;
+#endif
+}
+
+static bool trans_c_j(DisasContext *ctx, arg_c_j *a)
+{
+    arg_jal arg = { .rd = 0, .imm = a->imm };
+    return trans_jal(ctx, &arg);
+}
+
+static bool trans_c_beqz(DisasContext *ctx, arg_c_beqz *a)
+{
+    arg_beq arg = { .rs1 = a->rs1, .rs2 = 0, .imm = a->imm };
+    return trans_beq(ctx, &arg);
+}
+
+static bool trans_c_bnez(DisasContext *ctx, arg_c_bnez *a)
+{
+    arg_bne arg = { .rs1 = a->rs1, .rs2 = 0, .imm = a->imm };
+    return trans_bne(ctx, &arg);
+}
+
+static bool trans_c_slli(DisasContext *ctx, arg_c_slli *a)
+{
+    int shamt = a->shamt;
+    if (shamt == 0) {
+        /* For RV128 a shamt of 0 means a shift by 64 */
+        shamt = 64;
+    }
+    /* Ensure, that shamt[5] is zero for RV32 */
+    if (shamt >= TARGET_LONG_BITS) {
+        return false;
+    }
+
+    arg_slli arg = { .rd = a->rd, .rs1 = a->rd, .shamt = a->shamt };
+    return trans_slli(ctx, &arg);
+}
+
+static bool trans_c_fldsp(DisasContext *ctx, arg_c_fldsp *a)
+{
+    arg_fld arg = { .rd = a->rd, .rs1 = 2, .imm = a->uimm };
+    return trans_fld(ctx, &arg);
+}
+
+static bool trans_c_lwsp(DisasContext *ctx, arg_c_lwsp *a)
+{
+    arg_lw arg = { .rd = a->rd, .rs1 = 2, .imm = a->uimm };
+    return trans_lw(ctx, &arg);
+}
+
+static bool trans_c_flwsp_ldsp(DisasContext *ctx, arg_c_flwsp_ldsp *a)
+{
+#ifdef TARGET_RISCV32
+    /* C.FLWSP */
+    arg_flw arg_flw = { .rd = a->rd, .rs1 = 2, .imm = a->uimm_flwsp };
+    return trans_flw(ctx, &arg_flw);
+#else
+    /* C.LDSP */
+    arg_ld arg_ld = { .rd = a->rd, .rs1 = 2, .imm = a->uimm_ldsp };
+    return trans_ld(ctx, &arg_ld);
+#endif
+    return false;
+}
+
+static bool trans_c_jr_mv(DisasContext *ctx, arg_c_jr_mv *a)
+{
+    if (a->rd != 0 && a->rs2 == 0) {
+        /* C.JR */
+        arg_jalr arg = { .rd = 0, .rs1 = a->rd, .imm = 0 };
+        return trans_jalr(ctx, &arg);
+    } else if (a->rd != 0 && a->rs2 != 0) {
+        /* C.MV */
+        arg_add arg = { .rd = a->rd, .rs1 = 0, .rs2 = a->rs2 };
+        return trans_add(ctx, &arg);
+    }
+    return false;
+}
+
+static bool trans_c_ebreak_jalr_add(DisasContext *ctx, arg_c_ebreak_jalr_add *a)
+{
+    if (a->rd == 0 && a->rs2 == 0) {
+        /* C.EBREAK */
+        arg_ebreak arg = { };
+        return trans_ebreak(ctx, &arg);
+    } else if (a->rd != 0) {
+        if (a->rs2 == 0) {
+            /* C.JALR */
+            arg_jalr arg = { .rd = 1, .rs1 = a->rd, .imm = 0 };
+            return trans_jalr(ctx, &arg);
+        } else {
+            /* C.ADD */
+            arg_add arg = { .rd = a->rd, .rs1 = a->rd, .rs2 = a->rs2 };
+            return trans_add(ctx, &arg);
+        }
+    }
+    return false;
+}
+
+static bool trans_c_fsdsp(DisasContext *ctx, arg_c_fsdsp *a)
+{
+    arg_fsd arg = { .rs1 = 2, .rs2 = a->rs2, .imm = a->uimm };
+    return trans_fsd(ctx, &arg);
+}
+
+static bool trans_c_swsp(DisasContext *ctx, arg_c_swsp *a)
+{
+    arg_sw arg = { .rs1 = 2, .rs2 = a->rs2, .imm = a->uimm };
+    return trans_sw(ctx, &arg);
+}
+
+static bool trans_c_fswsp_sdsp(DisasContext *ctx, arg_c_fswsp_sdsp *a)
+{
+#ifdef TARGET_RISCV32
+    /* C.FSWSP */
+    arg_fsw a_fsw = { .rs1 = a->rs2, .rs2 = 2, .imm = a->uimm_fswsp };
+    return trans_fsw(ctx, &a_fsw);
+#else
+    /* C.SDSP */
+    arg_sd a_sd = { .rs1 = 2, .rs2 = a->rs2, .imm = a->uimm_sdsp };
+    return trans_sd(ctx, &a_sd);
+#endif
+}
diff --git a/target/riscv/insn_trans/trans_rvd.inc.c b/target/riscv/insn_trans/trans_rvd.inc.c
new file mode 100644
index 0000000000..393fa0248c
--- /dev/null
+++ b/target/riscv/insn_trans/trans_rvd.inc.c
@@ -0,0 +1,442 @@
+/*
+ * RISC-V translation routines for the RV64D Standard Extension.
+ *
+ * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu
+ * Copyright (c) 2018 Peer Adelt, peer.adelt@hni.uni-paderborn.de
+ *                    Bastian Koppelmann, kbastian@mail.uni-paderborn.de
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+static bool trans_fld(DisasContext *ctx, arg_fld *a)
+{
+    TCGv t0 = tcg_temp_new();
+    gen_get_gpr(t0, a->rs1);
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVD);
+    tcg_gen_addi_tl(t0, t0, a->imm);
+
+    tcg_gen_qemu_ld_i64(cpu_fpr[a->rd], t0, ctx->mem_idx, MO_TEQ);
+
+    mark_fs_dirty(ctx);
+    tcg_temp_free(t0);
+    return true;
+}
+
+static bool trans_fsd(DisasContext *ctx, arg_fsd *a)
+{
+    TCGv t0 = tcg_temp_new();
+    gen_get_gpr(t0, a->rs1);
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVD);
+    tcg_gen_addi_tl(t0, t0, a->imm);
+
+    tcg_gen_qemu_st_i64(cpu_fpr[a->rs2], t0, ctx->mem_idx, MO_TEQ);
+
+    mark_fs_dirty(ctx);
+    tcg_temp_free(t0);
+    return true;
+}
+
+static bool trans_fmadd_d(DisasContext *ctx, arg_fmadd_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVD);
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fmadd_d(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
+                       cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fmsub_d(DisasContext *ctx, arg_fmsub_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVD);
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fmsub_d(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
+                       cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fnmsub_d(DisasContext *ctx, arg_fnmsub_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVD);
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fnmsub_d(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
+                        cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fnmadd_d(DisasContext *ctx, arg_fnmadd_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVD);
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fnmadd_d(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
+                        cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fadd_d(DisasContext *ctx, arg_fadd_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVD);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fadd_d(cpu_fpr[a->rd], cpu_env,
+                      cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fsub_d(DisasContext *ctx, arg_fsub_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVD);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fsub_d(cpu_fpr[a->rd], cpu_env,
+                      cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fmul_d(DisasContext *ctx, arg_fmul_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVD);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fmul_d(cpu_fpr[a->rd], cpu_env,
+                      cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fdiv_d(DisasContext *ctx, arg_fdiv_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVD);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fdiv_d(cpu_fpr[a->rd], cpu_env,
+                      cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fsqrt_d(DisasContext *ctx, arg_fsqrt_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVD);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fsqrt_d(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]);
+
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fsgnj_d(DisasContext *ctx, arg_fsgnj_d *a)
+{
+    if (a->rs1 == a->rs2) { /* FMOV */
+        tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
+    } else {
+        tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2],
+                            cpu_fpr[a->rs1], 0, 63);
+    }
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fsgnjn_d(DisasContext *ctx, arg_fsgnjn_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVD);
+    if (a->rs1 == a->rs2) { /* FNEG */
+        tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], INT64_MIN);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        tcg_gen_not_i64(t0, cpu_fpr[a->rs2]);
+        tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1], 0, 63);
+        tcg_temp_free_i64(t0);
+    }
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fsgnjx_d(DisasContext *ctx, arg_fsgnjx_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVD);
+    if (a->rs1 == a->rs2) { /* FABS */
+        tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], ~INT64_MIN);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT64_MIN);
+        tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0);
+        tcg_temp_free_i64(t0);
+    }
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fmin_d(DisasContext *ctx, arg_fmin_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVD);
+
+    gen_helper_fmin_d(cpu_fpr[a->rd], cpu_env,
+                      cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fmax_d(DisasContext *ctx, arg_fmax_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVD);
+
+    gen_helper_fmax_d(cpu_fpr[a->rd], cpu_env,
+                      cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fcvt_s_d(DisasContext *ctx, arg_fcvt_s_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVD);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_s_d(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]);
+
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fcvt_d_s(DisasContext *ctx, arg_fcvt_d_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVD);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_d_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]);
+
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_feq_d(DisasContext *ctx, arg_feq_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVD);
+
+    TCGv t0 = tcg_temp_new();
+    gen_helper_feq_d(t0, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+    gen_set_gpr(a->rd, t0);
+    tcg_temp_free(t0);
+
+    return true;
+}
+
+static bool trans_flt_d(DisasContext *ctx, arg_flt_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVD);
+
+    TCGv t0 = tcg_temp_new();
+    gen_helper_flt_d(t0, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+    gen_set_gpr(a->rd, t0);
+    tcg_temp_free(t0);
+
+    return true;
+}
+
+static bool trans_fle_d(DisasContext *ctx, arg_fle_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVD);
+
+    TCGv t0 = tcg_temp_new();
+    gen_helper_fle_d(t0, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+    gen_set_gpr(a->rd, t0);
+    tcg_temp_free(t0);
+
+    return true;
+}
+
+static bool trans_fclass_d(DisasContext *ctx, arg_fclass_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVD);
+
+    TCGv t0 = tcg_temp_new();
+    gen_helper_fclass_d(t0, cpu_fpr[a->rs1]);
+    gen_set_gpr(a->rd, t0);
+    tcg_temp_free(t0);
+    return true;
+}
+
+static bool trans_fcvt_w_d(DisasContext *ctx, arg_fcvt_w_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVD);
+
+    TCGv t0 = tcg_temp_new();
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_w_d(t0, cpu_env, cpu_fpr[a->rs1]);
+    gen_set_gpr(a->rd, t0);
+    tcg_temp_free(t0);
+
+    return true;
+}
+
+static bool trans_fcvt_wu_d(DisasContext *ctx, arg_fcvt_wu_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVD);
+
+    TCGv t0 = tcg_temp_new();
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_wu_d(t0, cpu_env, cpu_fpr[a->rs1]);
+    gen_set_gpr(a->rd, t0);
+    tcg_temp_free(t0);
+
+    return true;
+}
+
+static bool trans_fcvt_d_w(DisasContext *ctx, arg_fcvt_d_w *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVD);
+
+    TCGv t0 = tcg_temp_new();
+    gen_get_gpr(t0, a->rs1);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_d_w(cpu_fpr[a->rd], cpu_env, t0);
+    tcg_temp_free(t0);
+
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fcvt_d_wu(DisasContext *ctx, arg_fcvt_d_wu *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVD);
+
+    TCGv t0 = tcg_temp_new();
+    gen_get_gpr(t0, a->rs1);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_d_wu(cpu_fpr[a->rd], cpu_env, t0);
+    tcg_temp_free(t0);
+
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+#ifdef TARGET_RISCV64
+
+static bool trans_fcvt_l_d(DisasContext *ctx, arg_fcvt_l_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVD);
+
+    TCGv t0 = tcg_temp_new();
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_l_d(t0, cpu_env, cpu_fpr[a->rs1]);
+    gen_set_gpr(a->rd, t0);
+    tcg_temp_free(t0);
+    return true;
+}
+
+static bool trans_fcvt_lu_d(DisasContext *ctx, arg_fcvt_lu_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVD);
+
+    TCGv t0 = tcg_temp_new();
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_lu_d(t0, cpu_env, cpu_fpr[a->rs1]);
+    gen_set_gpr(a->rd, t0);
+    tcg_temp_free(t0);
+    return true;
+}
+
+static bool trans_fmv_x_d(DisasContext *ctx, arg_fmv_x_d *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVD);
+
+    gen_set_gpr(a->rd, cpu_fpr[a->rs1]);
+    return true;
+}
+
+static bool trans_fcvt_d_l(DisasContext *ctx, arg_fcvt_d_l *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVD);
+
+    TCGv t0 = tcg_temp_new();
+    gen_get_gpr(t0, a->rs1);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_d_l(cpu_fpr[a->rd], cpu_env, t0);
+    tcg_temp_free(t0);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fcvt_d_lu(DisasContext *ctx, arg_fcvt_d_lu *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVD);
+
+    TCGv t0 = tcg_temp_new();
+    gen_get_gpr(t0, a->rs1);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_d_lu(cpu_fpr[a->rd], cpu_env, t0);
+    tcg_temp_free(t0);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fmv_d_x(DisasContext *ctx, arg_fmv_d_x *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVD);
+
+    TCGv t0 = tcg_temp_new();
+    gen_get_gpr(t0, a->rs1);
+
+    tcg_gen_mov_tl(cpu_fpr[a->rd], t0);
+    tcg_temp_free(t0);
+    mark_fs_dirty(ctx);
+    return true;
+}
+#endif
diff --git a/target/riscv/insn_trans/trans_rvf.inc.c b/target/riscv/insn_trans/trans_rvf.inc.c
new file mode 100644
index 0000000000..172dbfa919
--- /dev/null
+++ b/target/riscv/insn_trans/trans_rvf.inc.c
@@ -0,0 +1,439 @@
+/*
+ * RISC-V translation routines for the RV64F Standard Extension.
+ *
+ * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu
+ * Copyright (c) 2018 Peer Adelt, peer.adelt@hni.uni-paderborn.de
+ *                    Bastian Koppelmann, kbastian@mail.uni-paderborn.de
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define REQUIRE_FPU do {\
+    if (ctx->mstatus_fs == 0) \
+        return false;                       \
+} while (0)
+
+static bool trans_flw(DisasContext *ctx, arg_flw *a)
+{
+    TCGv t0 = tcg_temp_new();
+    gen_get_gpr(t0, a->rs1);
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVF);
+    tcg_gen_addi_tl(t0, t0, a->imm);
+
+    tcg_gen_qemu_ld_i64(cpu_fpr[a->rd], t0, ctx->mem_idx, MO_TEUL);
+    /* RISC-V requires NaN-boxing of narrower width floating point values */
+    tcg_gen_ori_i64(cpu_fpr[a->rd], cpu_fpr[a->rd], 0xffffffff00000000ULL);
+
+    tcg_temp_free(t0);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fsw(DisasContext *ctx, arg_fsw *a)
+{
+    TCGv t0 = tcg_temp_new();
+    gen_get_gpr(t0, a->rs1);
+
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVF);
+    tcg_gen_addi_tl(t0, t0, a->imm);
+
+    tcg_gen_qemu_st_i64(cpu_fpr[a->rs2], t0, ctx->mem_idx, MO_TEUL);
+
+    tcg_temp_free(t0);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fmadd_s(DisasContext *ctx, arg_fmadd_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVF);
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fmadd_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
+                       cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fmsub_s(DisasContext *ctx, arg_fmsub_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVF);
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fmsub_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
+                       cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fnmsub_s(DisasContext *ctx, arg_fnmsub_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVF);
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fnmsub_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
+                        cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fnmadd_s(DisasContext *ctx, arg_fnmadd_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVF);
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fnmadd_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
+                        cpu_fpr[a->rs2], cpu_fpr[a->rs3]);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fadd_s(DisasContext *ctx, arg_fadd_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVF);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fadd_s(cpu_fpr[a->rd], cpu_env,
+                      cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fsub_s(DisasContext *ctx, arg_fsub_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVF);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fsub_s(cpu_fpr[a->rd], cpu_env,
+                      cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fmul_s(DisasContext *ctx, arg_fmul_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVF);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fmul_s(cpu_fpr[a->rd], cpu_env,
+                      cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fdiv_s(DisasContext *ctx, arg_fdiv_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVF);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fdiv_s(cpu_fpr[a->rd], cpu_env,
+                      cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fsqrt_s(DisasContext *ctx, arg_fsqrt_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVF);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fsqrt_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1]);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fsgnj_s(DisasContext *ctx, arg_fsgnj_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVF);
+    if (a->rs1 == a->rs2) { /* FMOV */
+        tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
+    } else { /* FSGNJ */
+        tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2], cpu_fpr[a->rs1],
+                            0, 31);
+    }
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVF);
+    if (a->rs1 == a->rs2) { /* FNEG */
+        tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], INT32_MIN);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        tcg_gen_not_i64(t0, cpu_fpr[a->rs2]);
+        tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1], 0, 31);
+        tcg_temp_free_i64(t0);
+    }
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVF);
+    if (a->rs1 == a->rs2) { /* FABS */
+        tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], ~INT32_MIN);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT32_MIN);
+        tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0);
+        tcg_temp_free_i64(t0);
+    }
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fmin_s(DisasContext *ctx, arg_fmin_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVF);
+
+    gen_helper_fmin_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
+                      cpu_fpr[a->rs2]);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fmax_s(DisasContext *ctx, arg_fmax_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVF);
+
+    gen_helper_fmax_s(cpu_fpr[a->rd], cpu_env, cpu_fpr[a->rs1],
+                      cpu_fpr[a->rs2]);
+    mark_fs_dirty(ctx);
+    return true;
+}
+
+static bool trans_fcvt_w_s(DisasContext *ctx, arg_fcvt_w_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVF);
+
+    TCGv t0 = tcg_temp_new();
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_w_s(t0, cpu_env, cpu_fpr[a->rs1]);
+    gen_set_gpr(a->rd, t0);
+    tcg_temp_free(t0);
+
+    return true;
+}
+
+static bool trans_fcvt_wu_s(DisasContext *ctx, arg_fcvt_wu_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVF);
+
+    TCGv t0 = tcg_temp_new();
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_wu_s(t0, cpu_env, cpu_fpr[a->rs1]);
+    gen_set_gpr(a->rd, t0);
+    tcg_temp_free(t0);
+
+    return true;
+}
+
+static bool trans_fmv_x_w(DisasContext *ctx, arg_fmv_x_w *a)
+{
+    /* NOTE: This was FMV.X.S in an earlier version of the ISA spec! */
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVF);
+
+    TCGv t0 = tcg_temp_new();
+
+#if defined(TARGET_RISCV64)
+    tcg_gen_ext32s_tl(t0, cpu_fpr[a->rs1]);
+#else
+    tcg_gen_extrl_i64_i32(t0, cpu_fpr[a->rs1]);
+#endif
+
+    gen_set_gpr(a->rd, t0);
+    tcg_temp_free(t0);
+
+    return true;
+}
+
+static bool trans_feq_s(DisasContext *ctx, arg_feq_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVF);
+    TCGv t0 = tcg_temp_new();
+    gen_helper_feq_s(t0, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+    gen_set_gpr(a->rd, t0);
+    tcg_temp_free(t0);
+    return true;
+}
+
+static bool trans_flt_s(DisasContext *ctx, arg_flt_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVF);
+    TCGv t0 = tcg_temp_new();
+    gen_helper_flt_s(t0, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+    gen_set_gpr(a->rd, t0);
+    tcg_temp_free(t0);
+    return true;
+}
+
+static bool trans_fle_s(DisasContext *ctx, arg_fle_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVF);
+    TCGv t0 = tcg_temp_new();
+    gen_helper_fle_s(t0, cpu_env, cpu_fpr[a->rs1], cpu_fpr[a->rs2]);
+    gen_set_gpr(a->rd, t0);
+    tcg_temp_free(t0);
+    return true;
+}
+
+static bool trans_fclass_s(DisasContext *ctx, arg_fclass_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVF);
+
+    TCGv t0 = tcg_temp_new();
+
+    gen_helper_fclass_s(t0, cpu_fpr[a->rs1]);
+
+    gen_set_gpr(a->rd, t0);
+    tcg_temp_free(t0);
+
+    return true;
+}
+
+static bool trans_fcvt_s_w(DisasContext *ctx, arg_fcvt_s_w *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVF);
+
+    TCGv t0 = tcg_temp_new();
+    gen_get_gpr(t0, a->rs1);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_s_w(cpu_fpr[a->rd], cpu_env, t0);
+
+    mark_fs_dirty(ctx);
+    tcg_temp_free(t0);
+
+    return true;
+}
+
+static bool trans_fcvt_s_wu(DisasContext *ctx, arg_fcvt_s_wu *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVF);
+
+    TCGv t0 = tcg_temp_new();
+    gen_get_gpr(t0, a->rs1);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_s_wu(cpu_fpr[a->rd], cpu_env, t0);
+
+    mark_fs_dirty(ctx);
+    tcg_temp_free(t0);
+
+    return true;
+}
+
+static bool trans_fmv_w_x(DisasContext *ctx, arg_fmv_w_x *a)
+{
+    /* NOTE: This was FMV.S.X in an earlier version of the ISA spec! */
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVF);
+
+    TCGv t0 = tcg_temp_new();
+    gen_get_gpr(t0, a->rs1);
+
+#if defined(TARGET_RISCV64)
+    tcg_gen_mov_i64(cpu_fpr[a->rd], t0);
+#else
+    tcg_gen_extu_i32_i64(cpu_fpr[a->rd], t0);
+#endif
+
+    mark_fs_dirty(ctx);
+    tcg_temp_free(t0);
+
+    return true;
+}
+
+#ifdef TARGET_RISCV64
+static bool trans_fcvt_l_s(DisasContext *ctx, arg_fcvt_l_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVF);
+
+    TCGv t0 = tcg_temp_new();
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_l_s(t0, cpu_env, cpu_fpr[a->rs1]);
+    gen_set_gpr(a->rd, t0);
+    tcg_temp_free(t0);
+    return true;
+}
+
+static bool trans_fcvt_lu_s(DisasContext *ctx, arg_fcvt_lu_s *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVF);
+
+    TCGv t0 = tcg_temp_new();
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_lu_s(t0, cpu_env, cpu_fpr[a->rs1]);
+    gen_set_gpr(a->rd, t0);
+    tcg_temp_free(t0);
+    return true;
+}
+
+static bool trans_fcvt_s_l(DisasContext *ctx, arg_fcvt_s_l *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVF);
+
+    TCGv t0 = tcg_temp_new();
+    gen_get_gpr(t0, a->rs1);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_s_l(cpu_fpr[a->rd], cpu_env, t0);
+
+    mark_fs_dirty(ctx);
+    tcg_temp_free(t0);
+    return true;
+}
+
+static bool trans_fcvt_s_lu(DisasContext *ctx, arg_fcvt_s_lu *a)
+{
+    REQUIRE_FPU;
+    REQUIRE_EXT(ctx, RVF);
+
+    TCGv t0 = tcg_temp_new();
+    gen_get_gpr(t0, a->rs1);
+
+    gen_set_rm(ctx, a->rm);
+    gen_helper_fcvt_s_lu(cpu_fpr[a->rd], cpu_env, t0);
+
+    mark_fs_dirty(ctx);
+    tcg_temp_free(t0);
+    return true;
+}
+#endif
diff --git a/target/riscv/insn_trans/trans_rvi.inc.c b/target/riscv/insn_trans/trans_rvi.inc.c
new file mode 100644
index 0000000000..d420a4d8b2
--- /dev/null
+++ b/target/riscv/insn_trans/trans_rvi.inc.c
@@ -0,0 +1,568 @@
+/*
+ * RISC-V translation routines for the RVXI Base Integer Instruction Set.
+ *
+ * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu
+ * Copyright (c) 2018 Peer Adelt, peer.adelt@hni.uni-paderborn.de
+ *                    Bastian Koppelmann, kbastian@mail.uni-paderborn.de
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+static bool trans_lui(DisasContext *ctx, arg_lui *a)
+{
+    if (a->rd != 0) {
+        tcg_gen_movi_tl(cpu_gpr[a->rd], a->imm);
+    }
+    return true;
+}
+
+static bool trans_auipc(DisasContext *ctx, arg_auipc *a)
+{
+    if (a->rd != 0) {
+        tcg_gen_movi_tl(cpu_gpr[a->rd], a->imm + ctx->base.pc_next);
+    }
+    return true;
+}
+
+static bool trans_jal(DisasContext *ctx, arg_jal *a)
+{
+    gen_jal(ctx, a->rd, a->imm);
+    return true;
+}
+
+static bool trans_jalr(DisasContext *ctx, arg_jalr *a)
+{
+    /* no chaining with JALR */
+    TCGLabel *misaligned = NULL;
+    TCGv t0 = tcg_temp_new();
+
+
+    gen_get_gpr(cpu_pc, a->rs1);
+    tcg_gen_addi_tl(cpu_pc, cpu_pc, a->imm);
+    tcg_gen_andi_tl(cpu_pc, cpu_pc, (target_ulong)-2);
+
+    if (!has_ext(ctx, RVC)) {
+        misaligned = gen_new_label();
+        tcg_gen_andi_tl(t0, cpu_pc, 0x2);
+        tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0x0, misaligned);
+    }
+
+    if (a->rd != 0) {
+        tcg_gen_movi_tl(cpu_gpr[a->rd], ctx->pc_succ_insn);
+    }
+    tcg_gen_lookup_and_goto_ptr();
+
+    if (misaligned) {
+        gen_set_label(misaligned);
+        gen_exception_inst_addr_mis(ctx);
+    }
+    ctx->base.is_jmp = DISAS_NORETURN;
+
+    tcg_temp_free(t0);
+    return true;
+}
+
+static bool gen_branch(DisasContext *ctx, arg_b *a, TCGCond cond)
+{
+    TCGLabel *l = gen_new_label();
+    TCGv source1, source2;
+    source1 = tcg_temp_new();
+    source2 = tcg_temp_new();
+    gen_get_gpr(source1, a->rs1);
+    gen_get_gpr(source2, a->rs2);
+
+    tcg_gen_brcond_tl(cond, source1, source2, l);
+    gen_goto_tb(ctx, 1, ctx->pc_succ_insn);
+    gen_set_label(l); /* branch taken */
+
+    if (!has_ext(ctx, RVC) && ((ctx->base.pc_next + a->imm) & 0x3)) {
+        /* misaligned */
+        gen_exception_inst_addr_mis(ctx);
+    } else {
+        gen_goto_tb(ctx, 0, ctx->base.pc_next + a->imm);
+    }
+    ctx->base.is_jmp = DISAS_NORETURN;
+
+    tcg_temp_free(source1);
+    tcg_temp_free(source2);
+
+    return true;
+}
+
+static bool trans_beq(DisasContext *ctx, arg_beq *a)
+{
+    return gen_branch(ctx, a, TCG_COND_EQ);
+}
+
+static bool trans_bne(DisasContext *ctx, arg_bne *a)
+{
+    return gen_branch(ctx, a, TCG_COND_NE);
+}
+
+static bool trans_blt(DisasContext *ctx, arg_blt *a)
+{
+    return gen_branch(ctx, a, TCG_COND_LT);
+}
+
+static bool trans_bge(DisasContext *ctx, arg_bge *a)
+{
+    return gen_branch(ctx, a, TCG_COND_GE);
+}
+
+static bool trans_bltu(DisasContext *ctx, arg_bltu *a)
+{
+    return gen_branch(ctx, a, TCG_COND_LTU);
+}
+
+static bool trans_bgeu(DisasContext *ctx, arg_bgeu *a)
+{
+    return gen_branch(ctx, a, TCG_COND_GEU);
+}
+
+static bool gen_load(DisasContext *ctx, arg_lb *a, TCGMemOp memop)
+{
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
+    gen_get_gpr(t0, a->rs1);
+    tcg_gen_addi_tl(t0, t0, a->imm);
+
+    tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, memop);
+    gen_set_gpr(a->rd, t1);
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+    return true;
+}
+
+static bool trans_lb(DisasContext *ctx, arg_lb *a)
+{
+    return gen_load(ctx, a, MO_SB);
+}
+
+static bool trans_lh(DisasContext *ctx, arg_lh *a)
+{
+    return gen_load(ctx, a, MO_TESW);
+}
+
+static bool trans_lw(DisasContext *ctx, arg_lw *a)
+{
+    return gen_load(ctx, a, MO_TESL);
+}
+
+static bool trans_lbu(DisasContext *ctx, arg_lbu *a)
+{
+    return gen_load(ctx, a, MO_UB);
+}
+
+static bool trans_lhu(DisasContext *ctx, arg_lhu *a)
+{
+    return gen_load(ctx, a, MO_TEUW);
+}
+
+static bool gen_store(DisasContext *ctx, arg_sb *a, TCGMemOp memop)
+{
+    TCGv t0 = tcg_temp_new();
+    TCGv dat = tcg_temp_new();
+    gen_get_gpr(t0, a->rs1);
+    tcg_gen_addi_tl(t0, t0, a->imm);
+    gen_get_gpr(dat, a->rs2);
+
+    tcg_gen_qemu_st_tl(dat, t0, ctx->mem_idx, memop);
+    tcg_temp_free(t0);
+    tcg_temp_free(dat);
+    return true;
+}
+
+
+static bool trans_sb(DisasContext *ctx, arg_sb *a)
+{
+    return gen_store(ctx, a, MO_SB);
+}
+
+static bool trans_sh(DisasContext *ctx, arg_sh *a)
+{
+    return gen_store(ctx, a, MO_TESW);
+}
+
+static bool trans_sw(DisasContext *ctx, arg_sw *a)
+{
+    return gen_store(ctx, a, MO_TESL);
+}
+
+#ifdef TARGET_RISCV64
+static bool trans_lwu(DisasContext *ctx, arg_lwu *a)
+{
+    return gen_load(ctx, a, MO_TEUL);
+}
+
+static bool trans_ld(DisasContext *ctx, arg_ld *a)
+{
+    return gen_load(ctx, a, MO_TEQ);
+}
+
+static bool trans_sd(DisasContext *ctx, arg_sd *a)
+{
+    return gen_store(ctx, a, MO_TEQ);
+}
+#endif
+
+static bool trans_addi(DisasContext *ctx, arg_addi *a)
+{
+    return gen_arith_imm(ctx, a, &tcg_gen_add_tl);
+}
+
+static void gen_slt(TCGv ret, TCGv s1, TCGv s2)
+{
+    tcg_gen_setcond_tl(TCG_COND_LT, ret, s1, s2);
+}
+
+static void gen_sltu(TCGv ret, TCGv s1, TCGv s2)
+{
+    tcg_gen_setcond_tl(TCG_COND_LTU, ret, s1, s2);
+}
+
+
+static bool trans_slti(DisasContext *ctx, arg_slti *a)
+{
+    return gen_arith_imm(ctx, a, &gen_slt);
+}
+
+static bool trans_sltiu(DisasContext *ctx, arg_sltiu *a)
+{
+    return gen_arith_imm(ctx, a, &gen_sltu);
+}
+
+static bool trans_xori(DisasContext *ctx, arg_xori *a)
+{
+    return gen_arith_imm(ctx, a, &tcg_gen_xor_tl);
+}
+static bool trans_ori(DisasContext *ctx, arg_ori *a)
+{
+    return gen_arith_imm(ctx, a, &tcg_gen_or_tl);
+}
+static bool trans_andi(DisasContext *ctx, arg_andi *a)
+{
+    return gen_arith_imm(ctx, a, &tcg_gen_and_tl);
+}
+static bool trans_slli(DisasContext *ctx, arg_slli *a)
+{
+    if (a->shamt >= TARGET_LONG_BITS) {
+        return false;
+    }
+
+    if (a->rd != 0) {
+        TCGv t = tcg_temp_new();
+        gen_get_gpr(t, a->rs1);
+
+        tcg_gen_shli_tl(t, t, a->shamt);
+
+        gen_set_gpr(a->rd, t);
+        tcg_temp_free(t);
+    } /* NOP otherwise */
+    return true;
+}
+
+static bool trans_srli(DisasContext *ctx, arg_srli *a)
+{
+    if (a->shamt >= TARGET_LONG_BITS) {
+        return false;
+    }
+
+    if (a->rd != 0) {
+        TCGv t = tcg_temp_new();
+        gen_get_gpr(t, a->rs1);
+
+        tcg_gen_shri_tl(t, t, a->shamt);
+        gen_set_gpr(a->rd, t);
+        tcg_temp_free(t);
+    } /* NOP otherwise */
+    return true;
+}
+
+static bool trans_srai(DisasContext *ctx, arg_srai *a)
+{
+    if (a->shamt >= TARGET_LONG_BITS) {
+        return false;
+    }
+
+    if (a->rd != 0) {
+        TCGv t = tcg_temp_new();
+        gen_get_gpr(t, a->rs1);
+
+        tcg_gen_sari_tl(t, t, a->shamt);
+        gen_set_gpr(a->rd, t);
+        tcg_temp_free(t);
+    } /* NOP otherwise */
+    return true;
+}
+
+static bool trans_add(DisasContext *ctx, arg_add *a)
+{
+    return gen_arith(ctx, a, &tcg_gen_add_tl);
+}
+
+static bool trans_sub(DisasContext *ctx, arg_sub *a)
+{
+    return gen_arith(ctx, a, &tcg_gen_sub_tl);
+}
+
+static bool trans_sll(DisasContext *ctx, arg_sll *a)
+{
+    return gen_shift(ctx, a, &tcg_gen_shl_tl);
+}
+
+static bool trans_slt(DisasContext *ctx, arg_slt *a)
+{
+    return gen_arith(ctx, a, &gen_slt);
+}
+
+static bool trans_sltu(DisasContext *ctx, arg_sltu *a)
+{
+    return gen_arith(ctx, a, &gen_sltu);
+}
+
+static bool trans_xor(DisasContext *ctx, arg_xor *a)
+{
+    return gen_arith(ctx, a, &tcg_gen_xor_tl);
+}
+
+static bool trans_srl(DisasContext *ctx, arg_srl *a)
+{
+    return gen_shift(ctx, a, &tcg_gen_shr_tl);
+}
+
+static bool trans_sra(DisasContext *ctx, arg_sra *a)
+{
+    return gen_shift(ctx, a, &tcg_gen_sar_tl);
+}
+
+static bool trans_or(DisasContext *ctx, arg_or *a)
+{
+    return gen_arith(ctx, a, &tcg_gen_or_tl);
+}
+
+static bool trans_and(DisasContext *ctx, arg_and *a)
+{
+    return gen_arith(ctx, a, &tcg_gen_and_tl);
+}
+
+#ifdef TARGET_RISCV64
+static bool trans_addiw(DisasContext *ctx, arg_addiw *a)
+{
+    return gen_arith_imm(ctx, a, &gen_addw);
+}
+
+static bool trans_slliw(DisasContext *ctx, arg_slliw *a)
+{
+    TCGv source1;
+    source1 = tcg_temp_new();
+    gen_get_gpr(source1, a->rs1);
+
+    tcg_gen_shli_tl(source1, source1, a->shamt);
+    tcg_gen_ext32s_tl(source1, source1);
+    gen_set_gpr(a->rd, source1);
+
+    tcg_temp_free(source1);
+    return true;
+}
+
+static bool trans_srliw(DisasContext *ctx, arg_srliw *a)
+{
+    TCGv t = tcg_temp_new();
+    gen_get_gpr(t, a->rs1);
+    tcg_gen_extract_tl(t, t, a->shamt, 32 - a->shamt);
+    /* sign-extend for W instructions */
+    tcg_gen_ext32s_tl(t, t);
+    gen_set_gpr(a->rd, t);
+    tcg_temp_free(t);
+    return true;
+}
+
+static bool trans_sraiw(DisasContext *ctx, arg_sraiw *a)
+{
+    TCGv t = tcg_temp_new();
+    gen_get_gpr(t, a->rs1);
+    tcg_gen_sextract_tl(t, t, a->shamt, 32 - a->shamt);
+    gen_set_gpr(a->rd, t);
+    tcg_temp_free(t);
+    return true;
+}
+
+static bool trans_addw(DisasContext *ctx, arg_addw *a)
+{
+    return gen_arith(ctx, a, &gen_addw);
+}
+
+static bool trans_subw(DisasContext *ctx, arg_subw *a)
+{
+    return gen_arith(ctx, a, &gen_subw);
+}
+
+static bool trans_sllw(DisasContext *ctx, arg_sllw *a)
+{
+    TCGv source1 = tcg_temp_new();
+    TCGv source2 = tcg_temp_new();
+
+    gen_get_gpr(source1, a->rs1);
+    gen_get_gpr(source2, a->rs2);
+
+    tcg_gen_andi_tl(source2, source2, 0x1F);
+    tcg_gen_shl_tl(source1, source1, source2);
+
+    tcg_gen_ext32s_tl(source1, source1);
+    gen_set_gpr(a->rd, source1);
+    tcg_temp_free(source1);
+    tcg_temp_free(source2);
+    return true;
+}
+
+static bool trans_srlw(DisasContext *ctx, arg_srlw *a)
+{
+    TCGv source1 = tcg_temp_new();
+    TCGv source2 = tcg_temp_new();
+
+    gen_get_gpr(source1, a->rs1);
+    gen_get_gpr(source2, a->rs2);
+
+    /* clear upper 32 */
+    tcg_gen_ext32u_tl(source1, source1);
+    tcg_gen_andi_tl(source2, source2, 0x1F);
+    tcg_gen_shr_tl(source1, source1, source2);
+
+    tcg_gen_ext32s_tl(source1, source1);
+    gen_set_gpr(a->rd, source1);
+    tcg_temp_free(source1);
+    tcg_temp_free(source2);
+    return true;
+}
+
+static bool trans_sraw(DisasContext *ctx, arg_sraw *a)
+{
+    TCGv source1 = tcg_temp_new();
+    TCGv source2 = tcg_temp_new();
+
+    gen_get_gpr(source1, a->rs1);
+    gen_get_gpr(source2, a->rs2);
+
+    /*
+     * first, trick to get it to act like working on 32 bits (get rid of
+     * upper 32, sign extend to fill space)
+     */
+    tcg_gen_ext32s_tl(source1, source1);
+    tcg_gen_andi_tl(source2, source2, 0x1F);
+    tcg_gen_sar_tl(source1, source1, source2);
+
+    gen_set_gpr(a->rd, source1);
+    tcg_temp_free(source1);
+    tcg_temp_free(source2);
+
+    return true;
+}
+#endif
+
+static bool trans_fence(DisasContext *ctx, arg_fence *a)
+{
+    /* FENCE is a full memory barrier. */
+    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
+    return true;
+}
+
+static bool trans_fence_i(DisasContext *ctx, arg_fence_i *a)
+{
+    /*
+     * FENCE_I is a no-op in QEMU,
+     * however we need to end the translation block
+     */
+    tcg_gen_movi_tl(cpu_pc, ctx->pc_succ_insn);
+    tcg_gen_exit_tb(NULL, 0);
+    ctx->base.is_jmp = DISAS_NORETURN;
+    return true;
+}
+
+#define RISCV_OP_CSR_PRE do {\
+    source1 = tcg_temp_new(); \
+    csr_store = tcg_temp_new(); \
+    dest = tcg_temp_new(); \
+    rs1_pass = tcg_temp_new(); \
+    gen_get_gpr(source1, a->rs1); \
+    tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next); \
+    tcg_gen_movi_tl(rs1_pass, a->rs1); \
+    tcg_gen_movi_tl(csr_store, a->csr); \
+    gen_io_start();\
+} while (0)
+
+#define RISCV_OP_CSR_POST do {\
+    gen_io_end(); \
+    gen_set_gpr(a->rd, dest); \
+    tcg_gen_movi_tl(cpu_pc, ctx->pc_succ_insn); \
+    tcg_gen_exit_tb(NULL, 0); \
+    ctx->base.is_jmp = DISAS_NORETURN; \
+    tcg_temp_free(source1); \
+    tcg_temp_free(csr_store); \
+    tcg_temp_free(dest); \
+    tcg_temp_free(rs1_pass); \
+} while (0)
+
+
+static bool trans_csrrw(DisasContext *ctx, arg_csrrw *a)
+{
+    TCGv source1, csr_store, dest, rs1_pass;
+    RISCV_OP_CSR_PRE;
+    gen_helper_csrrw(dest, cpu_env, source1, csr_store);
+    RISCV_OP_CSR_POST;
+    return true;
+}
+
+static bool trans_csrrs(DisasContext *ctx, arg_csrrs *a)
+{
+    TCGv source1, csr_store, dest, rs1_pass;
+    RISCV_OP_CSR_PRE;
+    gen_helper_csrrs(dest, cpu_env, source1, csr_store, rs1_pass);
+    RISCV_OP_CSR_POST;
+    return true;
+}
+
+static bool trans_csrrc(DisasContext *ctx, arg_csrrc *a)
+{
+    TCGv source1, csr_store, dest, rs1_pass;
+    RISCV_OP_CSR_PRE;
+    gen_helper_csrrc(dest, cpu_env, source1, csr_store, rs1_pass);
+    RISCV_OP_CSR_POST;
+    return true;
+}
+
+static bool trans_csrrwi(DisasContext *ctx, arg_csrrwi *a)
+{
+    TCGv source1, csr_store, dest, rs1_pass;
+    RISCV_OP_CSR_PRE;
+    gen_helper_csrrw(dest, cpu_env, rs1_pass, csr_store);
+    RISCV_OP_CSR_POST;
+    return true;
+}
+
+static bool trans_csrrsi(DisasContext *ctx, arg_csrrsi *a)
+{
+    TCGv source1, csr_store, dest, rs1_pass;
+    RISCV_OP_CSR_PRE;
+    gen_helper_csrrs(dest, cpu_env, rs1_pass, csr_store, rs1_pass);
+    RISCV_OP_CSR_POST;
+    return true;
+}
+
+static bool trans_csrrci(DisasContext *ctx, arg_csrrci *a)
+{
+    TCGv source1, csr_store, dest, rs1_pass;
+    RISCV_OP_CSR_PRE;
+    gen_helper_csrrc(dest, cpu_env, rs1_pass, csr_store, rs1_pass);
+    RISCV_OP_CSR_POST;
+    return true;
+}
diff --git a/target/riscv/insn_trans/trans_rvm.inc.c b/target/riscv/insn_trans/trans_rvm.inc.c
new file mode 100644
index 0000000000..204af225f8
--- /dev/null
+++ b/target/riscv/insn_trans/trans_rvm.inc.c
@@ -0,0 +1,120 @@
+/*
+ * RISC-V translation routines for the RV64M Standard Extension.
+ *
+ * Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu
+ * Copyright (c) 2018 Peer Adelt, peer.adelt@hni.uni-paderborn.de
+ *                    Bastian Koppelmann, kbastian@mail.uni-paderborn.de
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+static bool trans_mul(DisasContext *ctx, arg_mul *a)
+{
+    REQUIRE_EXT(ctx, RVM);
+    return gen_arith(ctx, a, &tcg_gen_mul_tl);
+}
+
+static bool trans_mulh(DisasContext *ctx, arg_mulh *a)
+{
+    REQUIRE_EXT(ctx, RVM);
+    TCGv source1 = tcg_temp_new();
+    TCGv source2 = tcg_temp_new();
+    gen_get_gpr(source1, a->rs1);
+    gen_get_gpr(source2, a->rs2);
+
+    tcg_gen_muls2_tl(source2, source1, source1, source2);
+
+    gen_set_gpr(a->rd, source1);
+    tcg_temp_free(source1);
+    tcg_temp_free(source2);
+    return true;
+}
+
+static bool trans_mulhsu(DisasContext *ctx, arg_mulhsu *a)
+{
+    REQUIRE_EXT(ctx, RVM);
+    return gen_arith(ctx, a, &gen_mulhsu);
+}
+
+static bool trans_mulhu(DisasContext *ctx, arg_mulhu *a)
+{
+    REQUIRE_EXT(ctx, RVM);
+    TCGv source1 = tcg_temp_new();
+    TCGv source2 = tcg_temp_new();
+    gen_get_gpr(source1, a->rs1);
+    gen_get_gpr(source2, a->rs2);
+
+    tcg_gen_mulu2_tl(source2, source1, source1, source2);
+
+    gen_set_gpr(a->rd, source1);
+    tcg_temp_free(source1);
+    tcg_temp_free(source2);
+    return true;
+}
+
+static bool trans_div(DisasContext *ctx, arg_div *a)
+{
+    REQUIRE_EXT(ctx, RVM);
+    return gen_arith(ctx, a, &gen_div);
+}
+
+static bool trans_divu(DisasContext *ctx, arg_divu *a)
+{
+    REQUIRE_EXT(ctx, RVM);
+    return gen_arith(ctx, a, &gen_divu);
+}
+
+static bool trans_rem(DisasContext *ctx, arg_rem *a)
+{
+    REQUIRE_EXT(ctx, RVM);
+    return gen_arith(ctx, a, &gen_rem);
+}
+
+static bool trans_remu(DisasContext *ctx, arg_remu *a)
+{
+    REQUIRE_EXT(ctx, RVM);
+    return gen_arith(ctx, a, &gen_remu);
+}
+
+#ifdef TARGET_RISCV64
+static bool trans_mulw(DisasContext *ctx, arg_mulw *a)
+{
+    REQUIRE_EXT(ctx, RVM);
+    return gen_arith(ctx, a, &gen_mulw);
+}
+
+static bool trans_divw(DisasContext *ctx, arg_divw *a)
+{
+    REQUIRE_EXT(ctx, RVM);
+    return gen_arith_div_w(ctx, a, &gen_div);
+}
+
+static bool trans_divuw(DisasContext *ctx, arg_divuw *a)
+{
+    REQUIRE_EXT(ctx, RVM);
+    return gen_arith_div_w(ctx, a, &gen_divu);
+}
+
+static bool trans_remw(DisasContext *ctx, arg_remw *a)
+{
+    REQUIRE_EXT(ctx, RVM);
+    return gen_arith_div_w(ctx, a, &gen_rem);
+}
+
+static bool trans_remuw(DisasContext *ctx, arg_remuw *a)
+{
+    REQUIRE_EXT(ctx, RVM);
+    return gen_arith_div_w(ctx, a, &gen_remu);
+}
+#endif
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index b7176cbf98..049fa65c66 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -56,6 +56,7 @@ typedef struct DisasContext {
     int frm;
 } DisasContext;
 
+#ifdef TARGET_RISCV64
 /* convert riscv funct3 to qemu memop for load/store */
 static const int tcg_memop_lookup[8] = {
     [0 ... 7] = -1,
@@ -69,6 +70,7 @@ static const int tcg_memop_lookup[8] = {
     [6] = MO_TEUL,
 #endif
 };
+#endif
 
 #ifdef TARGET_RISCV64
 #define CASE_OP_32_64(X) case X: case glue(X, W)
@@ -186,367 +188,112 @@ static void gen_mulhsu(TCGv ret, TCGv arg1, TCGv arg2)
     tcg_temp_free(rh);
 }
 
-static void gen_fsgnj(DisasContext *ctx, uint32_t rd, uint32_t rs1,
-    uint32_t rs2, int rm, uint64_t min)
-{
-    switch (rm) {
-    case 0: /* fsgnj */
-        if (rs1 == rs2) { /* FMOV */
-            tcg_gen_mov_i64(cpu_fpr[rd], cpu_fpr[rs1]);
-        } else {
-            tcg_gen_deposit_i64(cpu_fpr[rd], cpu_fpr[rs2], cpu_fpr[rs1],
-                                0, min == INT32_MIN ? 31 : 63);
-        }
-        break;
-    case 1: /* fsgnjn */
-        if (rs1 == rs2) { /* FNEG */
-            tcg_gen_xori_i64(cpu_fpr[rd], cpu_fpr[rs1], min);
-        } else {
-            TCGv_i64 t0 = tcg_temp_new_i64();
-            tcg_gen_not_i64(t0, cpu_fpr[rs2]);
-            tcg_gen_deposit_i64(cpu_fpr[rd], t0, cpu_fpr[rs1],
-                                0, min == INT32_MIN ? 31 : 63);
-            tcg_temp_free_i64(t0);
-        }
-        break;
-    case 2: /* fsgnjx */
-        if (rs1 == rs2) { /* FABS */
-            tcg_gen_andi_i64(cpu_fpr[rd], cpu_fpr[rs1], ~min);
-        } else {
-            TCGv_i64 t0 = tcg_temp_new_i64();
-            tcg_gen_andi_i64(t0, cpu_fpr[rs2], min);
-            tcg_gen_xor_i64(cpu_fpr[rd], cpu_fpr[rs1], t0);
-            tcg_temp_free_i64(t0);
-        }
-        break;
-    default:
-        gen_exception_illegal(ctx);
-    }
-}
-
-static void gen_arith(DisasContext *ctx, uint32_t opc, int rd, int rs1,
-        int rs2)
-{
-    TCGv source1, source2, cond1, cond2, zeroreg, resultopt1;
-    source1 = tcg_temp_new();
-    source2 = tcg_temp_new();
-    gen_get_gpr(source1, rs1);
-    gen_get_gpr(source2, rs2);
-
-    switch (opc) {
-    CASE_OP_32_64(OPC_RISC_ADD):
-        tcg_gen_add_tl(source1, source1, source2);
-        break;
-    CASE_OP_32_64(OPC_RISC_SUB):
-        tcg_gen_sub_tl(source1, source1, source2);
-        break;
-#if defined(TARGET_RISCV64)
-    case OPC_RISC_SLLW:
-        tcg_gen_andi_tl(source2, source2, 0x1F);
-        tcg_gen_shl_tl(source1, source1, source2);
-        break;
-#endif
-    case OPC_RISC_SLL:
-        tcg_gen_andi_tl(source2, source2, TARGET_LONG_BITS - 1);
-        tcg_gen_shl_tl(source1, source1, source2);
-        break;
-    case OPC_RISC_SLT:
-        tcg_gen_setcond_tl(TCG_COND_LT, source1, source1, source2);
-        break;
-    case OPC_RISC_SLTU:
-        tcg_gen_setcond_tl(TCG_COND_LTU, source1, source1, source2);
-        break;
-    case OPC_RISC_XOR:
-        tcg_gen_xor_tl(source1, source1, source2);
-        break;
-#if defined(TARGET_RISCV64)
-    case OPC_RISC_SRLW:
-        /* clear upper 32 */
-        tcg_gen_ext32u_tl(source1, source1);
-        tcg_gen_andi_tl(source2, source2, 0x1F);
-        tcg_gen_shr_tl(source1, source1, source2);
-        break;
-#endif
-    case OPC_RISC_SRL:
-        tcg_gen_andi_tl(source2, source2, TARGET_LONG_BITS - 1);
-        tcg_gen_shr_tl(source1, source1, source2);
-        break;
-#if defined(TARGET_RISCV64)
-    case OPC_RISC_SRAW:
-        /* first, trick to get it to act like working on 32 bits (get rid of
-        upper 32, sign extend to fill space) */
-        tcg_gen_ext32s_tl(source1, source1);
-        tcg_gen_andi_tl(source2, source2, 0x1F);
-        tcg_gen_sar_tl(source1, source1, source2);
-        break;
-#endif
-    case OPC_RISC_SRA:
-        tcg_gen_andi_tl(source2, source2, TARGET_LONG_BITS - 1);
-        tcg_gen_sar_tl(source1, source1, source2);
-        break;
-    case OPC_RISC_OR:
-        tcg_gen_or_tl(source1, source1, source2);
-        break;
-    case OPC_RISC_AND:
-        tcg_gen_and_tl(source1, source1, source2);
-        break;
-    CASE_OP_32_64(OPC_RISC_MUL):
-        if (!has_ext(ctx, RVM)) {
-            goto do_illegal;
-        }
-        tcg_gen_mul_tl(source1, source1, source2);
-        break;
-    case OPC_RISC_MULH:
-        if (!has_ext(ctx, RVM)) {
-            goto do_illegal;
-        }
-        tcg_gen_muls2_tl(source2, source1, source1, source2);
-        break;
-    case OPC_RISC_MULHSU:
-        if (!has_ext(ctx, RVM)) {
-            goto do_illegal;
-        }
-        gen_mulhsu(source1, source1, source2);
-        break;
-    case OPC_RISC_MULHU:
-        if (!has_ext(ctx, RVM)) {
-            goto do_illegal;
-        }
-        tcg_gen_mulu2_tl(source2, source1, source1, source2);
-        break;
-#if defined(TARGET_RISCV64)
-    case OPC_RISC_DIVW:
-        if (!has_ext(ctx, RVM)) {
-            goto do_illegal;
-        }
-        tcg_gen_ext32s_tl(source1, source1);
-        tcg_gen_ext32s_tl(source2, source2);
-        /* fall through to DIV */
-#endif
-    case OPC_RISC_DIV:
-        if (!has_ext(ctx, RVM)) {
-            goto do_illegal;
-        }
-        /* Handle by altering args to tcg_gen_div to produce req'd results:
-         * For overflow: want source1 in source1 and 1 in source2
-         * For div by zero: want -1 in source1 and 1 in source2 -> -1 result */
-        cond1 = tcg_temp_new();
-        cond2 = tcg_temp_new();
-        zeroreg = tcg_const_tl(0);
-        resultopt1 = tcg_temp_new();
-
-        tcg_gen_movi_tl(resultopt1, (target_ulong)-1);
-        tcg_gen_setcondi_tl(TCG_COND_EQ, cond2, source2, (target_ulong)(~0L));
-        tcg_gen_setcondi_tl(TCG_COND_EQ, cond1, source1,
-                            ((target_ulong)1) << (TARGET_LONG_BITS - 1));
-        tcg_gen_and_tl(cond1, cond1, cond2); /* cond1 = overflow */
-        tcg_gen_setcondi_tl(TCG_COND_EQ, cond2, source2, 0); /* cond2 = div 0 */
-        /* if div by zero, set source1 to -1, otherwise don't change */
-        tcg_gen_movcond_tl(TCG_COND_EQ, source1, cond2, zeroreg, source1,
-                resultopt1);
-        /* if overflow or div by zero, set source2 to 1, else don't change */
-        tcg_gen_or_tl(cond1, cond1, cond2);
-        tcg_gen_movi_tl(resultopt1, (target_ulong)1);
-        tcg_gen_movcond_tl(TCG_COND_EQ, source2, cond1, zeroreg, source2,
-                resultopt1);
-        tcg_gen_div_tl(source1, source1, source2);
-
-        tcg_temp_free(cond1);
-        tcg_temp_free(cond2);
-        tcg_temp_free(zeroreg);
-        tcg_temp_free(resultopt1);
-        break;
-#if defined(TARGET_RISCV64)
-    case OPC_RISC_DIVUW:
-        if (!has_ext(ctx, RVM)) {
-            goto do_illegal;
-        }
-        tcg_gen_ext32u_tl(source1, source1);
-        tcg_gen_ext32u_tl(source2, source2);
-        /* fall through to DIVU */
-#endif
-    case OPC_RISC_DIVU:
-        if (!has_ext(ctx, RVM)) {
-            goto do_illegal;
-        }
-        cond1 = tcg_temp_new();
-        zeroreg = tcg_const_tl(0);
-        resultopt1 = tcg_temp_new();
-
-        tcg_gen_setcondi_tl(TCG_COND_EQ, cond1, source2, 0);
-        tcg_gen_movi_tl(resultopt1, (target_ulong)-1);
-        tcg_gen_movcond_tl(TCG_COND_EQ, source1, cond1, zeroreg, source1,
-                resultopt1);
-        tcg_gen_movi_tl(resultopt1, (target_ulong)1);
-        tcg_gen_movcond_tl(TCG_COND_EQ, source2, cond1, zeroreg, source2,
-                resultopt1);
-        tcg_gen_divu_tl(source1, source1, source2);
-
-        tcg_temp_free(cond1);
-        tcg_temp_free(zeroreg);
-        tcg_temp_free(resultopt1);
-        break;
-#if defined(TARGET_RISCV64)
-    case OPC_RISC_REMW:
-        if (!has_ext(ctx, RVM)) {
-            goto do_illegal;
-        }
-        tcg_gen_ext32s_tl(source1, source1);
-        tcg_gen_ext32s_tl(source2, source2);
-        /* fall through to REM */
-#endif
-    case OPC_RISC_REM:
-        if (!has_ext(ctx, RVM)) {
-            goto do_illegal;
-        }
-        cond1 = tcg_temp_new();
-        cond2 = tcg_temp_new();
-        zeroreg = tcg_const_tl(0);
-        resultopt1 = tcg_temp_new();
-
-        tcg_gen_movi_tl(resultopt1, 1L);
-        tcg_gen_setcondi_tl(TCG_COND_EQ, cond2, source2, (target_ulong)-1);
-        tcg_gen_setcondi_tl(TCG_COND_EQ, cond1, source1,
-                            (target_ulong)1 << (TARGET_LONG_BITS - 1));
-        tcg_gen_and_tl(cond2, cond1, cond2); /* cond1 = overflow */
-        tcg_gen_setcondi_tl(TCG_COND_EQ, cond1, source2, 0); /* cond2 = div 0 */
-        /* if overflow or div by zero, set source2 to 1, else don't change */
-        tcg_gen_or_tl(cond2, cond1, cond2);
-        tcg_gen_movcond_tl(TCG_COND_EQ, source2, cond2, zeroreg, source2,
-                resultopt1);
-        tcg_gen_rem_tl(resultopt1, source1, source2);
-        /* if div by zero, just return the original dividend */
-        tcg_gen_movcond_tl(TCG_COND_EQ, source1, cond1, zeroreg, resultopt1,
-                source1);
-
-        tcg_temp_free(cond1);
-        tcg_temp_free(cond2);
-        tcg_temp_free(zeroreg);
-        tcg_temp_free(resultopt1);
-        break;
-#if defined(TARGET_RISCV64)
-    case OPC_RISC_REMUW:
-        if (!has_ext(ctx, RVM)) {
-            goto do_illegal;
-        }
-        tcg_gen_ext32u_tl(source1, source1);
-        tcg_gen_ext32u_tl(source2, source2);
-        /* fall through to REMU */
-#endif
-    case OPC_RISC_REMU:
-        if (!has_ext(ctx, RVM)) {
-            goto do_illegal;
-        }
-        cond1 = tcg_temp_new();
-        zeroreg = tcg_const_tl(0);
-        resultopt1 = tcg_temp_new();
-
-        tcg_gen_movi_tl(resultopt1, (target_ulong)1);
-        tcg_gen_setcondi_tl(TCG_COND_EQ, cond1, source2, 0);
-        tcg_gen_movcond_tl(TCG_COND_EQ, source2, cond1, zeroreg, source2,
-                resultopt1);
-        tcg_gen_remu_tl(resultopt1, source1, source2);
-        /* if div by zero, just return the original dividend */
-        tcg_gen_movcond_tl(TCG_COND_EQ, source1, cond1, zeroreg, resultopt1,
-                source1);
-
-        tcg_temp_free(cond1);
-        tcg_temp_free(zeroreg);
-        tcg_temp_free(resultopt1);
-        break;
-    do_illegal:
-    default:
-        gen_exception_illegal(ctx);
-        return;
-    }
-
-    if (opc & 0x8) { /* sign extend for W instructions */
-        tcg_gen_ext32s_tl(source1, source1);
-    }
-
-    gen_set_gpr(rd, source1);
-    tcg_temp_free(source1);
-    tcg_temp_free(source2);
-}
-
-static void gen_arith_imm(DisasContext *ctx, uint32_t opc, int rd,
-        int rs1, target_long imm)
-{
-    TCGv source1 = tcg_temp_new();
-    int shift_len = TARGET_LONG_BITS;
-    int shift_a;
-
-    gen_get_gpr(source1, rs1);
-
-    switch (opc) {
-    case OPC_RISC_ADDI:
-#if defined(TARGET_RISCV64)
-    case OPC_RISC_ADDIW:
-#endif
-        tcg_gen_addi_tl(source1, source1, imm);
-        break;
-    case OPC_RISC_SLTI:
-        tcg_gen_setcondi_tl(TCG_COND_LT, source1, source1, imm);
-        break;
-    case OPC_RISC_SLTIU:
-        tcg_gen_setcondi_tl(TCG_COND_LTU, source1, source1, imm);
-        break;
-    case OPC_RISC_XORI:
-        tcg_gen_xori_tl(source1, source1, imm);
-        break;
-    case OPC_RISC_ORI:
-        tcg_gen_ori_tl(source1, source1, imm);
-        break;
-    case OPC_RISC_ANDI:
-        tcg_gen_andi_tl(source1, source1, imm);
-        break;
-#if defined(TARGET_RISCV64)
-    case OPC_RISC_SLLIW:
-        shift_len = 32;
-        /* FALLTHRU */
-#endif
-    case OPC_RISC_SLLI:
-        if (imm >= shift_len) {
-            goto do_illegal;
-        }
-        tcg_gen_shli_tl(source1, source1, imm);
-        break;
-#if defined(TARGET_RISCV64)
-    case OPC_RISC_SHIFT_RIGHT_IW:
-        shift_len = 32;
-        /* FALLTHRU */
-#endif
-    case OPC_RISC_SHIFT_RIGHT_I:
-        /* differentiate on IMM */
-        shift_a = imm & 0x400;
-        imm &= 0x3ff;
-        if (imm >= shift_len) {
-            goto do_illegal;
-        }
-        if (imm != 0) {
-            if (shift_a) {
-                /* SRAI[W] */
-                tcg_gen_sextract_tl(source1, source1, imm, shift_len - imm);
-            } else {
-                /* SRLI[W] */
-                tcg_gen_extract_tl(source1, source1, imm, shift_len - imm);
-            }
-            /* No further sign-extension needed for W instructions.  */
-            opc &= ~0x8;
-        }
-        break;
-    default:
-    do_illegal:
-        gen_exception_illegal(ctx);
-        return;
-    }
-
-    if (opc & 0x8) { /* sign-extend for W instructions */
-        tcg_gen_ext32s_tl(source1, source1);
-    }
-
-    gen_set_gpr(rd, source1);
-    tcg_temp_free(source1);
+static void gen_div(TCGv ret, TCGv source1, TCGv source2)
+{
+    TCGv cond1, cond2, zeroreg, resultopt1;
+    /*
+     * Handle by altering args to tcg_gen_div to produce req'd results:
+     * For overflow: want source1 in source1 and 1 in source2
+     * For div by zero: want -1 in source1 and 1 in source2 -> -1 result
+     */
+    cond1 = tcg_temp_new();
+    cond2 = tcg_temp_new();
+    zeroreg = tcg_const_tl(0);
+    resultopt1 = tcg_temp_new();
+
+    tcg_gen_movi_tl(resultopt1, (target_ulong)-1);
+    tcg_gen_setcondi_tl(TCG_COND_EQ, cond2, source2, (target_ulong)(~0L));
+    tcg_gen_setcondi_tl(TCG_COND_EQ, cond1, source1,
+                        ((target_ulong)1) << (TARGET_LONG_BITS - 1));
+    tcg_gen_and_tl(cond1, cond1, cond2); /* cond1 = overflow */
+    tcg_gen_setcondi_tl(TCG_COND_EQ, cond2, source2, 0); /* cond2 = div 0 */
+    /* if div by zero, set source1 to -1, otherwise don't change */
+    tcg_gen_movcond_tl(TCG_COND_EQ, source1, cond2, zeroreg, source1,
+            resultopt1);
+    /* if overflow or div by zero, set source2 to 1, else don't change */
+    tcg_gen_or_tl(cond1, cond1, cond2);
+    tcg_gen_movi_tl(resultopt1, (target_ulong)1);
+    tcg_gen_movcond_tl(TCG_COND_EQ, source2, cond1, zeroreg, source2,
+            resultopt1);
+    tcg_gen_div_tl(ret, source1, source2);
+
+    tcg_temp_free(cond1);
+    tcg_temp_free(cond2);
+    tcg_temp_free(zeroreg);
+    tcg_temp_free(resultopt1);
+}
+
+static void gen_divu(TCGv ret, TCGv source1, TCGv source2)
+{
+    TCGv cond1, zeroreg, resultopt1;
+    cond1 = tcg_temp_new();
+
+    zeroreg = tcg_const_tl(0);
+    resultopt1 = tcg_temp_new();
+
+    tcg_gen_setcondi_tl(TCG_COND_EQ, cond1, source2, 0);
+    tcg_gen_movi_tl(resultopt1, (target_ulong)-1);
+    tcg_gen_movcond_tl(TCG_COND_EQ, source1, cond1, zeroreg, source1,
+            resultopt1);
+    tcg_gen_movi_tl(resultopt1, (target_ulong)1);
+    tcg_gen_movcond_tl(TCG_COND_EQ, source2, cond1, zeroreg, source2,
+            resultopt1);
+    tcg_gen_divu_tl(ret, source1, source2);
+
+    tcg_temp_free(cond1);
+    tcg_temp_free(zeroreg);
+    tcg_temp_free(resultopt1);
+}
+
+static void gen_rem(TCGv ret, TCGv source1, TCGv source2)
+{
+    TCGv cond1, cond2, zeroreg, resultopt1;
+
+    cond1 = tcg_temp_new();
+    cond2 = tcg_temp_new();
+    zeroreg = tcg_const_tl(0);
+    resultopt1 = tcg_temp_new();
+
+    tcg_gen_movi_tl(resultopt1, 1L);
+    tcg_gen_setcondi_tl(TCG_COND_EQ, cond2, source2, (target_ulong)-1);
+    tcg_gen_setcondi_tl(TCG_COND_EQ, cond1, source1,
+                        (target_ulong)1 << (TARGET_LONG_BITS - 1));
+    tcg_gen_and_tl(cond2, cond1, cond2); /* cond1 = overflow */
+    tcg_gen_setcondi_tl(TCG_COND_EQ, cond1, source2, 0); /* cond2 = div 0 */
+    /* if overflow or div by zero, set source2 to 1, else don't change */
+    tcg_gen_or_tl(cond2, cond1, cond2);
+    tcg_gen_movcond_tl(TCG_COND_EQ, source2, cond2, zeroreg, source2,
+            resultopt1);
+    tcg_gen_rem_tl(resultopt1, source1, source2);
+    /* if div by zero, just return the original dividend */
+    tcg_gen_movcond_tl(TCG_COND_EQ, ret, cond1, zeroreg, resultopt1,
+            source1);
+
+    tcg_temp_free(cond1);
+    tcg_temp_free(cond2);
+    tcg_temp_free(zeroreg);
+    tcg_temp_free(resultopt1);
+}
+
+static void gen_remu(TCGv ret, TCGv source1, TCGv source2)
+{
+    TCGv cond1, zeroreg, resultopt1;
+    cond1 = tcg_temp_new();
+    zeroreg = tcg_const_tl(0);
+    resultopt1 = tcg_temp_new();
+
+    tcg_gen_movi_tl(resultopt1, (target_ulong)1);
+    tcg_gen_setcondi_tl(TCG_COND_EQ, cond1, source2, 0);
+    tcg_gen_movcond_tl(TCG_COND_EQ, source2, cond1, zeroreg, source2,
+            resultopt1);
+    tcg_gen_remu_tl(resultopt1, source1, source2);
+    /* if div by zero, just return the original dividend */
+    tcg_gen_movcond_tl(TCG_COND_EQ, ret, cond1, zeroreg, resultopt1,
+            source1);
+
+    tcg_temp_free(cond1);
+    tcg_temp_free(zeroreg);
+    tcg_temp_free(resultopt1);
 }
 
 static void gen_jal(DisasContext *ctx, int rd, target_ulong imm)
@@ -569,92 +316,8 @@ static void gen_jal(DisasContext *ctx, int rd, target_ulong imm)
     ctx->base.is_jmp = DISAS_NORETURN;
 }
 
-static void gen_jalr(DisasContext *ctx, uint32_t opc, int rd, int rs1,
-                     target_long imm)
-{
-    /* no chaining with JALR */
-    TCGLabel *misaligned = NULL;
-    TCGv t0 = tcg_temp_new();
-
-    switch (opc) {
-    case OPC_RISC_JALR:
-        gen_get_gpr(cpu_pc, rs1);
-        tcg_gen_addi_tl(cpu_pc, cpu_pc, imm);
-        tcg_gen_andi_tl(cpu_pc, cpu_pc, (target_ulong)-2);
-
-        if (!has_ext(ctx, RVC)) {
-            misaligned = gen_new_label();
-            tcg_gen_andi_tl(t0, cpu_pc, 0x2);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0x0, misaligned);
-        }
-
-        if (rd != 0) {
-            tcg_gen_movi_tl(cpu_gpr[rd], ctx->pc_succ_insn);
-        }
-        tcg_gen_lookup_and_goto_ptr();
-
-        if (misaligned) {
-            gen_set_label(misaligned);
-            gen_exception_inst_addr_mis(ctx);
-        }
-        ctx->base.is_jmp = DISAS_NORETURN;
-        break;
-
-    default:
-        gen_exception_illegal(ctx);
-        break;
-    }
-    tcg_temp_free(t0);
-}
-
-static void gen_branch(DisasContext *ctx, uint32_t opc, int rs1, int rs2,
-                       target_long bimm)
-{
-    TCGLabel *l = gen_new_label();
-    TCGv source1, source2;
-    source1 = tcg_temp_new();
-    source2 = tcg_temp_new();
-    gen_get_gpr(source1, rs1);
-    gen_get_gpr(source2, rs2);
-
-    switch (opc) {
-    case OPC_RISC_BEQ:
-        tcg_gen_brcond_tl(TCG_COND_EQ, source1, source2, l);
-        break;
-    case OPC_RISC_BNE:
-        tcg_gen_brcond_tl(TCG_COND_NE, source1, source2, l);
-        break;
-    case OPC_RISC_BLT:
-        tcg_gen_brcond_tl(TCG_COND_LT, source1, source2, l);
-        break;
-    case OPC_RISC_BGE:
-        tcg_gen_brcond_tl(TCG_COND_GE, source1, source2, l);
-        break;
-    case OPC_RISC_BLTU:
-        tcg_gen_brcond_tl(TCG_COND_LTU, source1, source2, l);
-        break;
-    case OPC_RISC_BGEU:
-        tcg_gen_brcond_tl(TCG_COND_GEU, source1, source2, l);
-        break;
-    default:
-        gen_exception_illegal(ctx);
-        return;
-    }
-    tcg_temp_free(source1);
-    tcg_temp_free(source2);
-
-    gen_goto_tb(ctx, 1, ctx->pc_succ_insn);
-    gen_set_label(l); /* branch taken */
-    if (!has_ext(ctx, RVC) && ((ctx->base.pc_next + bimm) & 0x3)) {
-        /* misaligned */
-        gen_exception_inst_addr_mis(ctx);
-    } else {
-        gen_goto_tb(ctx, 0, ctx->base.pc_next + bimm);
-    }
-    ctx->base.is_jmp = DISAS_NORETURN;
-}
-
-static void gen_load(DisasContext *ctx, uint32_t opc, int rd, int rs1,
+#ifdef TARGET_RISCV64
+static void gen_load_c(DisasContext *ctx, uint32_t opc, int rd, int rs1,
         target_long imm)
 {
     TCGv t0 = tcg_temp_new();
@@ -674,7 +337,7 @@ static void gen_load(DisasContext *ctx, uint32_t opc, int rd, int rs1,
     tcg_temp_free(t1);
 }
 
-static void gen_store(DisasContext *ctx, uint32_t opc, int rs1, int rs2,
+static void gen_store_c(DisasContext *ctx, uint32_t opc, int rs1, int rs2,
         target_long imm)
 {
     TCGv t0 = tcg_temp_new();
@@ -693,6 +356,7 @@ static void gen_store(DisasContext *ctx, uint32_t opc, int rs1, int rs2,
     tcg_temp_free(t0);
     tcg_temp_free(dat);
 }
+#endif
 
 #ifndef CONFIG_USER_ONLY
 /* The states of mstatus_fs are:
@@ -719,6 +383,7 @@ static void mark_fs_dirty(DisasContext *ctx)
 static inline void mark_fs_dirty(DisasContext *ctx) { }
 #endif
 
+#if !defined(TARGET_RISCV64)
 static void gen_fp_load(DisasContext *ctx, uint32_t opc, int rd,
         int rs1, target_long imm)
 {
@@ -793,143 +458,7 @@ static void gen_fp_store(DisasContext *ctx, uint32_t opc, int rs1,
 
     tcg_temp_free(t0);
 }
-
-static void gen_atomic(DisasContext *ctx, uint32_t opc,
-                      int rd, int rs1, int rs2)
-{
-    TCGv src1, src2, dat;
-    TCGLabel *l1, *l2;
-    TCGMemOp mop;
-    bool aq, rl;
-
-    /* Extract the size of the atomic operation.  */
-    switch (extract32(opc, 12, 3)) {
-    case 2: /* 32-bit */
-        mop = MO_ALIGN | MO_TESL;
-        break;
-#if defined(TARGET_RISCV64)
-    case 3: /* 64-bit */
-        mop = MO_ALIGN | MO_TEQ;
-        break;
 #endif
-    default:
-        gen_exception_illegal(ctx);
-        return;
-    }
-    rl = extract32(opc, 25, 1);
-    aq = extract32(opc, 26, 1);
-
-    src1 = tcg_temp_new();
-    src2 = tcg_temp_new();
-
-    switch (MASK_OP_ATOMIC_NO_AQ_RL_SZ(opc)) {
-    case OPC_RISC_LR:
-        /* Put addr in load_res, data in load_val.  */
-        gen_get_gpr(src1, rs1);
-        if (rl) {
-            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
-        }
-        tcg_gen_qemu_ld_tl(load_val, src1, ctx->mem_idx, mop);
-        if (aq) {
-            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
-        }
-        tcg_gen_mov_tl(load_res, src1);
-        gen_set_gpr(rd, load_val);
-        break;
-
-    case OPC_RISC_SC:
-        l1 = gen_new_label();
-        l2 = gen_new_label();
-        dat = tcg_temp_new();
-
-        gen_get_gpr(src1, rs1);
-        tcg_gen_brcond_tl(TCG_COND_NE, load_res, src1, l1);
-
-        gen_get_gpr(src2, rs2);
-        /* Note that the TCG atomic primitives are SC,
-           so we can ignore AQ/RL along this path.  */
-        tcg_gen_atomic_cmpxchg_tl(src1, load_res, load_val, src2,
-                                  ctx->mem_idx, mop);
-        tcg_gen_setcond_tl(TCG_COND_NE, dat, src1, load_val);
-        gen_set_gpr(rd, dat);
-        tcg_gen_br(l2);
-
-        gen_set_label(l1);
-        /* Address comparion failure.  However, we still need to
-           provide the memory barrier implied by AQ/RL.  */
-        tcg_gen_mb(TCG_MO_ALL + aq * TCG_BAR_LDAQ + rl * TCG_BAR_STRL);
-        tcg_gen_movi_tl(dat, 1);
-        gen_set_gpr(rd, dat);
-
-        gen_set_label(l2);
-        tcg_temp_free(dat);
-        break;
-
-    case OPC_RISC_AMOSWAP:
-        /* Note that the TCG atomic primitives are SC,
-           so we can ignore AQ/RL along this path.  */
-        gen_get_gpr(src1, rs1);
-        gen_get_gpr(src2, rs2);
-        tcg_gen_atomic_xchg_tl(src2, src1, src2, ctx->mem_idx, mop);
-        gen_set_gpr(rd, src2);
-        break;
-    case OPC_RISC_AMOADD:
-        gen_get_gpr(src1, rs1);
-        gen_get_gpr(src2, rs2);
-        tcg_gen_atomic_fetch_add_tl(src2, src1, src2, ctx->mem_idx, mop);
-        gen_set_gpr(rd, src2);
-        break;
-    case OPC_RISC_AMOXOR:
-        gen_get_gpr(src1, rs1);
-        gen_get_gpr(src2, rs2);
-        tcg_gen_atomic_fetch_xor_tl(src2, src1, src2, ctx->mem_idx, mop);
-        gen_set_gpr(rd, src2);
-        break;
-    case OPC_RISC_AMOAND:
-        gen_get_gpr(src1, rs1);
-        gen_get_gpr(src2, rs2);
-        tcg_gen_atomic_fetch_and_tl(src2, src1, src2, ctx->mem_idx, mop);
-        gen_set_gpr(rd, src2);
-        break;
-    case OPC_RISC_AMOOR:
-        gen_get_gpr(src1, rs1);
-        gen_get_gpr(src2, rs2);
-        tcg_gen_atomic_fetch_or_tl(src2, src1, src2, ctx->mem_idx, mop);
-        gen_set_gpr(rd, src2);
-        break;
-    case OPC_RISC_AMOMIN:
-        gen_get_gpr(src1, rs1);
-        gen_get_gpr(src2, rs2);
-        tcg_gen_atomic_fetch_smin_tl(src2, src1, src2, ctx->mem_idx, mop);
-        gen_set_gpr(rd, src2);
-        break;
-    case OPC_RISC_AMOMAX:
-        gen_get_gpr(src1, rs1);
-        gen_get_gpr(src2, rs2);
-        tcg_gen_atomic_fetch_smax_tl(src2, src1, src2, ctx->mem_idx, mop);
-        gen_set_gpr(rd, src2);
-        break;
-    case OPC_RISC_AMOMINU:
-        gen_get_gpr(src1, rs1);
-        gen_get_gpr(src2, rs2);
-        tcg_gen_atomic_fetch_umin_tl(src2, src1, src2, ctx->mem_idx, mop);
-        gen_set_gpr(rd, src2);
-        break;
-    case OPC_RISC_AMOMAXU:
-        gen_get_gpr(src1, rs1);
-        gen_get_gpr(src2, rs2);
-        tcg_gen_atomic_fetch_umax_tl(src2, src1, src2, ctx->mem_idx, mop);
-        gen_set_gpr(rd, src2);
-        break;
-
-    default:
-        gen_exception_illegal(ctx);
-        break;
-    }
-
-    tcg_temp_free(src1);
-    tcg_temp_free(src2);
-}
 
 static void gen_set_rm(DisasContext *ctx, int rm)
 {
@@ -944,659 +473,6 @@ static void gen_set_rm(DisasContext *ctx, int rm)
     tcg_temp_free_i32(t0);
 }
 
-static void gen_fp_fmadd(DisasContext *ctx, uint32_t opc, int rd,
-                         int rs1, int rs2, int rs3, int rm)
-{
-    switch (opc) {
-    case OPC_RISC_FMADD_S:
-        if (!has_ext(ctx, RVF)) {
-            goto do_illegal;
-        }
-        gen_set_rm(ctx, rm);
-        gen_helper_fmadd_s(cpu_fpr[rd], cpu_env, cpu_fpr[rs1],
-                           cpu_fpr[rs2], cpu_fpr[rs3]);
-        break;
-    case OPC_RISC_FMADD_D:
-        if (!has_ext(ctx, RVD)) {
-            goto do_illegal;
-        }
-        gen_set_rm(ctx, rm);
-        gen_helper_fmadd_d(cpu_fpr[rd], cpu_env, cpu_fpr[rs1],
-                           cpu_fpr[rs2], cpu_fpr[rs3]);
-        break;
-    do_illegal:
-    default:
-        gen_exception_illegal(ctx);
-        break;
-    }
-}
-
-static void gen_fp_fmsub(DisasContext *ctx, uint32_t opc, int rd,
-                         int rs1, int rs2, int rs3, int rm)
-{
-    switch (opc) {
-    case OPC_RISC_FMSUB_S:
-        if (!has_ext(ctx, RVF)) {
-            goto do_illegal;
-        }
-        gen_set_rm(ctx, rm);
-        gen_helper_fmsub_s(cpu_fpr[rd], cpu_env, cpu_fpr[rs1],
-                           cpu_fpr[rs2], cpu_fpr[rs3]);
-        break;
-    case OPC_RISC_FMSUB_D:
-        if (!has_ext(ctx, RVD)) {
-            goto do_illegal;
-        }
-        gen_set_rm(ctx, rm);
-        gen_helper_fmsub_d(cpu_fpr[rd], cpu_env, cpu_fpr[rs1],
-                           cpu_fpr[rs2], cpu_fpr[rs3]);
-        break;
-    do_illegal:
-    default:
-        gen_exception_illegal(ctx);
-        break;
-    }
-}
-
-static void gen_fp_fnmsub(DisasContext *ctx, uint32_t opc, int rd,
-                          int rs1, int rs2, int rs3, int rm)
-{
-    switch (opc) {
-    case OPC_RISC_FNMSUB_S:
-        if (!has_ext(ctx, RVF)) {
-            goto do_illegal;
-        }
-        gen_set_rm(ctx, rm);
-        gen_helper_fnmsub_s(cpu_fpr[rd], cpu_env, cpu_fpr[rs1],
-                            cpu_fpr[rs2], cpu_fpr[rs3]);
-        break;
-    case OPC_RISC_FNMSUB_D:
-        if (!has_ext(ctx, RVD)) {
-            goto do_illegal;
-        }
-        gen_set_rm(ctx, rm);
-        gen_helper_fnmsub_d(cpu_fpr[rd], cpu_env, cpu_fpr[rs1],
-                            cpu_fpr[rs2], cpu_fpr[rs3]);
-        break;
-    do_illegal:
-    default:
-        gen_exception_illegal(ctx);
-        break;
-    }
-}
-
-static void gen_fp_fnmadd(DisasContext *ctx, uint32_t opc, int rd,
-                          int rs1, int rs2, int rs3, int rm)
-{
-    switch (opc) {
-    case OPC_RISC_FNMADD_S:
-        if (!has_ext(ctx, RVF)) {
-            goto do_illegal;
-        }
-        gen_set_rm(ctx, rm);
-        gen_helper_fnmadd_s(cpu_fpr[rd], cpu_env, cpu_fpr[rs1],
-                            cpu_fpr[rs2], cpu_fpr[rs3]);
-        break;
-    case OPC_RISC_FNMADD_D:
-        if (!has_ext(ctx, RVD)) {
-            goto do_illegal;
-        }
-        gen_set_rm(ctx, rm);
-        gen_helper_fnmadd_d(cpu_fpr[rd], cpu_env, cpu_fpr[rs1],
-                            cpu_fpr[rs2], cpu_fpr[rs3]);
-        break;
-    do_illegal:
-    default:
-        gen_exception_illegal(ctx);
-        break;
-    }
-}
-
-static void gen_fp_arith(DisasContext *ctx, uint32_t opc, int rd,
-                         int rs1, int rs2, int rm)
-{
-    TCGv t0 = NULL;
-    bool fp_output = true;
-
-    if (ctx->mstatus_fs == 0) {
-        goto do_illegal;
-    }
-
-    switch (opc) {
-    case OPC_RISC_FADD_S:
-        if (!has_ext(ctx, RVF)) {
-            goto do_illegal;
-        }
-        gen_set_rm(ctx, rm);
-        gen_helper_fadd_s(cpu_fpr[rd], cpu_env, cpu_fpr[rs1], cpu_fpr[rs2]);
-        break;
-    case OPC_RISC_FSUB_S:
-        if (!has_ext(ctx, RVF)) {
-            goto do_illegal;
-        }
-        gen_set_rm(ctx, rm);
-        gen_helper_fsub_s(cpu_fpr[rd], cpu_env, cpu_fpr[rs1], cpu_fpr[rs2]);
-        break;
-    case OPC_RISC_FMUL_S:
-        if (!has_ext(ctx, RVF)) {
-            goto do_illegal;
-        }
-        gen_set_rm(ctx, rm);
-        gen_helper_fmul_s(cpu_fpr[rd], cpu_env, cpu_fpr[rs1], cpu_fpr[rs2]);
-        break;
-    case OPC_RISC_FDIV_S:
-        if (!has_ext(ctx, RVF)) {
-            goto do_illegal;
-        }
-        gen_set_rm(ctx, rm);
-        gen_helper_fdiv_s(cpu_fpr[rd], cpu_env, cpu_fpr[rs1], cpu_fpr[rs2]);
-        break;
-    case OPC_RISC_FSQRT_S:
-        if (!has_ext(ctx, RVF)) {
-            goto do_illegal;
-        }
-        gen_set_rm(ctx, rm);
-        gen_helper_fsqrt_s(cpu_fpr[rd], cpu_env, cpu_fpr[rs1]);
-        break;
-    case OPC_RISC_FSGNJ_S:
-        if (!has_ext(ctx, RVF)) {
-            goto do_illegal;
-        }
-        gen_fsgnj(ctx, rd, rs1, rs2, rm, INT32_MIN);
-        break;
-
-    case OPC_RISC_FMIN_S:
-        if (!has_ext(ctx, RVF)) {
-            goto do_illegal;
-        }
-        /* also handles: OPC_RISC_FMAX_S */
-        switch (rm) {
-        case 0x0:
-            gen_helper_fmin_s(cpu_fpr[rd], cpu_env, cpu_fpr[rs1], cpu_fpr[rs2]);
-            break;
-        case 0x1:
-            gen_helper_fmax_s(cpu_fpr[rd], cpu_env, cpu_fpr[rs1], cpu_fpr[rs2]);
-            break;
-        default:
-            goto do_illegal;
-        }
-        break;
-
-    case OPC_RISC_FEQ_S:
-        /* also handles: OPC_RISC_FLT_S, OPC_RISC_FLE_S */
-        if (!has_ext(ctx, RVF)) {
-            goto do_illegal;
-        }
-        t0 = tcg_temp_new();
-        switch (rm) {
-        case 0x0:
-            gen_helper_fle_s(t0, cpu_env, cpu_fpr[rs1], cpu_fpr[rs2]);
-            break;
-        case 0x1:
-            gen_helper_flt_s(t0, cpu_env, cpu_fpr[rs1], cpu_fpr[rs2]);
-            break;
-        case 0x2:
-            gen_helper_feq_s(t0, cpu_env, cpu_fpr[rs1], cpu_fpr[rs2]);
-            break;
-        default:
-            goto do_illegal;
-        }
-        gen_set_gpr(rd, t0);
-        tcg_temp_free(t0);
-        fp_output = false;
-        break;
-
-    case OPC_RISC_FCVT_W_S:
-        /* also OPC_RISC_FCVT_WU_S, OPC_RISC_FCVT_L_S, OPC_RISC_FCVT_LU_S */
-        if (!has_ext(ctx, RVF)) {
-            goto do_illegal;
-        }
-        t0 = tcg_temp_new();
-        switch (rs2) {
-        case 0: /* FCVT_W_S */
-            gen_set_rm(ctx, rm);
-            gen_helper_fcvt_w_s(t0, cpu_env, cpu_fpr[rs1]);
-            break;
-        case 1: /* FCVT_WU_S */
-            gen_set_rm(ctx, rm);
-            gen_helper_fcvt_wu_s(t0, cpu_env, cpu_fpr[rs1]);
-            break;
-#if defined(TARGET_RISCV64)
-        case 2: /* FCVT_L_S */
-            gen_set_rm(ctx, rm);
-            gen_helper_fcvt_l_s(t0, cpu_env, cpu_fpr[rs1]);
-            break;
-        case 3: /* FCVT_LU_S */
-            gen_set_rm(ctx, rm);
-            gen_helper_fcvt_lu_s(t0, cpu_env, cpu_fpr[rs1]);
-            break;
-#endif
-        default:
-            goto do_illegal;
-        }
-        gen_set_gpr(rd, t0);
-        tcg_temp_free(t0);
-        fp_output = false;
-        break;
-
-    case OPC_RISC_FCVT_S_W:
-        /* also OPC_RISC_FCVT_S_WU, OPC_RISC_FCVT_S_L, OPC_RISC_FCVT_S_LU */
-        if (!has_ext(ctx, RVF)) {
-            goto do_illegal;
-        }
-        t0 = tcg_temp_new();
-        gen_get_gpr(t0, rs1);
-        switch (rs2) {
-        case 0: /* FCVT_S_W */
-            gen_set_rm(ctx, rm);
-            gen_helper_fcvt_s_w(cpu_fpr[rd], cpu_env, t0);
-            break;
-        case 1: /* FCVT_S_WU */
-            gen_set_rm(ctx, rm);
-            gen_helper_fcvt_s_wu(cpu_fpr[rd], cpu_env, t0);
-            break;
-#if defined(TARGET_RISCV64)
-        case 2: /* FCVT_S_L */
-            gen_set_rm(ctx, rm);
-            gen_helper_fcvt_s_l(cpu_fpr[rd], cpu_env, t0);
-            break;
-        case 3: /* FCVT_S_LU */
-            gen_set_rm(ctx, rm);
-            gen_helper_fcvt_s_lu(cpu_fpr[rd], cpu_env, t0);
-            break;
-#endif
-        default:
-            goto do_illegal;
-        }
-        tcg_temp_free(t0);
-        break;
-
-    case OPC_RISC_FMV_X_S:
-        /* also OPC_RISC_FCLASS_S */
-        if (!has_ext(ctx, RVF)) {
-            goto do_illegal;
-        }
-        t0 = tcg_temp_new();
-        switch (rm) {
-        case 0: /* FMV */
-#if defined(TARGET_RISCV64)
-            tcg_gen_ext32s_tl(t0, cpu_fpr[rs1]);
-#else
-            tcg_gen_extrl_i64_i32(t0, cpu_fpr[rs1]);
-#endif
-            break;
-        case 1:
-            gen_helper_fclass_s(t0, cpu_fpr[rs1]);
-            break;
-        default:
-            goto do_illegal;
-        }
-        gen_set_gpr(rd, t0);
-        tcg_temp_free(t0);
-        fp_output = false;
-        break;
-
-    case OPC_RISC_FMV_S_X:
-        if (!has_ext(ctx, RVF)) {
-            goto do_illegal;
-        }
-        t0 = tcg_temp_new();
-        gen_get_gpr(t0, rs1);
-#if defined(TARGET_RISCV64)
-        tcg_gen_mov_i64(cpu_fpr[rd], t0);
-#else
-        tcg_gen_extu_i32_i64(cpu_fpr[rd], t0);
-#endif
-        tcg_temp_free(t0);
-        break;
-
-    /* double */
-    case OPC_RISC_FADD_D:
-        if (!has_ext(ctx, RVD)) {
-            goto do_illegal;
-        }
-        gen_set_rm(ctx, rm);
-        gen_helper_fadd_d(cpu_fpr[rd], cpu_env, cpu_fpr[rs1], cpu_fpr[rs2]);
-        break;
-    case OPC_RISC_FSUB_D:
-        if (!has_ext(ctx, RVD)) {
-            goto do_illegal;
-        }
-        gen_set_rm(ctx, rm);
-        gen_helper_fsub_d(cpu_fpr[rd], cpu_env, cpu_fpr[rs1], cpu_fpr[rs2]);
-        break;
-    case OPC_RISC_FMUL_D:
-        if (!has_ext(ctx, RVD)) {
-            goto do_illegal;
-        }
-        gen_set_rm(ctx, rm);
-        gen_helper_fmul_d(cpu_fpr[rd], cpu_env, cpu_fpr[rs1], cpu_fpr[rs2]);
-        break;
-    case OPC_RISC_FDIV_D:
-        if (!has_ext(ctx, RVD)) {
-            goto do_illegal;
-        }
-        gen_set_rm(ctx, rm);
-        gen_helper_fdiv_d(cpu_fpr[rd], cpu_env, cpu_fpr[rs1], cpu_fpr[rs2]);
-        break;
-    case OPC_RISC_FSQRT_D:
-        if (!has_ext(ctx, RVD)) {
-            goto do_illegal;
-        }
-        gen_set_rm(ctx, rm);
-        gen_helper_fsqrt_d(cpu_fpr[rd], cpu_env, cpu_fpr[rs1]);
-        break;
-    case OPC_RISC_FSGNJ_D:
-        gen_fsgnj(ctx, rd, rs1, rs2, rm, INT64_MIN);
-        break;
-
-    case OPC_RISC_FMIN_D:
-        /* also OPC_RISC_FMAX_D */
-        if (!has_ext(ctx, RVD)) {
-            goto do_illegal;
-        }
-        switch (rm) {
-        case 0:
-            gen_helper_fmin_d(cpu_fpr[rd], cpu_env, cpu_fpr[rs1], cpu_fpr[rs2]);
-            break;
-        case 1:
-            gen_helper_fmax_d(cpu_fpr[rd], cpu_env, cpu_fpr[rs1], cpu_fpr[rs2]);
-            break;
-        default:
-            goto do_illegal;
-        }
-        break;
-
-    case OPC_RISC_FCVT_S_D:
-        if (!has_ext(ctx, RVD)) {
-            goto do_illegal;
-        }
-        switch (rs2) {
-        case 1:
-            gen_set_rm(ctx, rm);
-            gen_helper_fcvt_s_d(cpu_fpr[rd], cpu_env, cpu_fpr[rs1]);
-            break;
-        default:
-            goto do_illegal;
-        }
-        break;
-
-    case OPC_RISC_FCVT_D_S:
-        if (!has_ext(ctx, RVD)) {
-            goto do_illegal;
-        }
-        switch (rs2) {
-        case 0:
-            gen_set_rm(ctx, rm);
-            gen_helper_fcvt_d_s(cpu_fpr[rd], cpu_env, cpu_fpr[rs1]);
-            break;
-        default:
-            goto do_illegal;
-        }
-        break;
-
-    case OPC_RISC_FEQ_D:
-        /* also OPC_RISC_FLT_D, OPC_RISC_FLE_D */
-        if (!has_ext(ctx, RVD)) {
-            goto do_illegal;
-        }
-        t0 = tcg_temp_new();
-        switch (rm) {
-        case 0:
-            gen_helper_fle_d(t0, cpu_env, cpu_fpr[rs1], cpu_fpr[rs2]);
-            break;
-        case 1:
-            gen_helper_flt_d(t0, cpu_env, cpu_fpr[rs1], cpu_fpr[rs2]);
-            break;
-        case 2:
-            gen_helper_feq_d(t0, cpu_env, cpu_fpr[rs1], cpu_fpr[rs2]);
-            break;
-        default:
-            goto do_illegal;
-        }
-        gen_set_gpr(rd, t0);
-        tcg_temp_free(t0);
-        fp_output = false;
-        break;
-
-    case OPC_RISC_FCVT_W_D:
-        /* also OPC_RISC_FCVT_WU_D, OPC_RISC_FCVT_L_D, OPC_RISC_FCVT_LU_D */
-        if (!has_ext(ctx, RVD)) {
-            goto do_illegal;
-        }
-        t0 = tcg_temp_new();
-        switch (rs2) {
-        case 0:
-            gen_set_rm(ctx, rm);
-            gen_helper_fcvt_w_d(t0, cpu_env, cpu_fpr[rs1]);
-            break;
-        case 1:
-            gen_set_rm(ctx, rm);
-            gen_helper_fcvt_wu_d(t0, cpu_env, cpu_fpr[rs1]);
-            break;
-#if defined(TARGET_RISCV64)
-        case 2:
-            gen_set_rm(ctx, rm);
-            gen_helper_fcvt_l_d(t0, cpu_env, cpu_fpr[rs1]);
-            break;
-        case 3:
-            gen_set_rm(ctx, rm);
-            gen_helper_fcvt_lu_d(t0, cpu_env, cpu_fpr[rs1]);
-            break;
-#endif
-        default:
-            goto do_illegal;
-        }
-        gen_set_gpr(rd, t0);
-        tcg_temp_free(t0);
-        fp_output = false;
-        break;
-
-    case OPC_RISC_FCVT_D_W:
-        /* also OPC_RISC_FCVT_D_WU, OPC_RISC_FCVT_D_L, OPC_RISC_FCVT_D_LU */
-        if (!has_ext(ctx, RVD)) {
-            goto do_illegal;
-        }
-        t0 = tcg_temp_new();
-        gen_get_gpr(t0, rs1);
-        switch (rs2) {
-        case 0:
-            gen_set_rm(ctx, rm);
-            gen_helper_fcvt_d_w(cpu_fpr[rd], cpu_env, t0);
-            break;
-        case 1:
-            gen_set_rm(ctx, rm);
-            gen_helper_fcvt_d_wu(cpu_fpr[rd], cpu_env, t0);
-            break;
-#if defined(TARGET_RISCV64)
-        case 2:
-            gen_set_rm(ctx, rm);
-            gen_helper_fcvt_d_l(cpu_fpr[rd], cpu_env, t0);
-            break;
-        case 3:
-            gen_set_rm(ctx, rm);
-            gen_helper_fcvt_d_lu(cpu_fpr[rd], cpu_env, t0);
-            break;
-#endif
-        default:
-            goto do_illegal;
-        }
-        tcg_temp_free(t0);
-        break;
-
-    case OPC_RISC_FMV_X_D:
-        /* also OPC_RISC_FCLASS_D */
-        if (!has_ext(ctx, RVD)) {
-            goto do_illegal;
-        }
-        switch (rm) {
-#if defined(TARGET_RISCV64)
-        case 0: /* FMV */
-            gen_set_gpr(rd, cpu_fpr[rs1]);
-            break;
-#endif
-        case 1:
-            t0 = tcg_temp_new();
-            gen_helper_fclass_d(t0, cpu_fpr[rs1]);
-            gen_set_gpr(rd, t0);
-            tcg_temp_free(t0);
-            break;
-        default:
-            goto do_illegal;
-        }
-        fp_output = false;
-        break;
-
-#if defined(TARGET_RISCV64)
-    case OPC_RISC_FMV_D_X:
-        if (!has_ext(ctx, RVD)) {
-            goto do_illegal;
-        }
-        t0 = tcg_temp_new();
-        gen_get_gpr(t0, rs1);
-        tcg_gen_mov_tl(cpu_fpr[rd], t0);
-        tcg_temp_free(t0);
-        break;
-#endif
-
-    default:
-    do_illegal:
-        if (t0) {
-            tcg_temp_free(t0);
-        }
-        gen_exception_illegal(ctx);
-        return;
-    }
-
-    if (fp_output) {
-        mark_fs_dirty(ctx);
-    }
-}
-
-static void gen_system(DisasContext *ctx, uint32_t opc, int rd, int rs1,
-                       int csr)
-{
-    TCGv source1, csr_store, dest, rs1_pass, imm_rs1;
-    source1 = tcg_temp_new();
-    csr_store = tcg_temp_new();
-    dest = tcg_temp_new();
-    rs1_pass = tcg_temp_new();
-    imm_rs1 = tcg_temp_new();
-    gen_get_gpr(source1, rs1);
-    tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);
-    tcg_gen_movi_tl(rs1_pass, rs1);
-    tcg_gen_movi_tl(csr_store, csr); /* copy into temp reg to feed to helper */
-
-#ifndef CONFIG_USER_ONLY
-    /* Extract funct7 value and check whether it matches SFENCE.VMA */
-    if ((opc == OPC_RISC_ECALL) && ((csr >> 5) == 9)) {
-        if (ctx->priv_ver == PRIV_VERSION_1_10_0) {
-            /* sfence.vma */
-            /* TODO: handle ASID specific fences */
-            gen_helper_tlb_flush(cpu_env);
-            return;
-        } else {
-            gen_exception_illegal(ctx);
-        }
-    }
-#endif
-
-    switch (opc) {
-    case OPC_RISC_ECALL:
-        switch (csr) {
-        case 0x0: /* ECALL */
-            /* always generates U-level ECALL, fixed in do_interrupt handler */
-            generate_exception(ctx, RISCV_EXCP_U_ECALL);
-            tcg_gen_exit_tb(NULL, 0); /* no chaining */
-            ctx->base.is_jmp = DISAS_NORETURN;
-            break;
-        case 0x1: /* EBREAK */
-            generate_exception(ctx, RISCV_EXCP_BREAKPOINT);
-            tcg_gen_exit_tb(NULL, 0); /* no chaining */
-            ctx->base.is_jmp = DISAS_NORETURN;
-            break;
-#ifndef CONFIG_USER_ONLY
-        case 0x002: /* URET */
-            gen_exception_illegal(ctx);
-            break;
-        case 0x102: /* SRET */
-            if (has_ext(ctx, RVS)) {
-                gen_helper_sret(cpu_pc, cpu_env, cpu_pc);
-                tcg_gen_exit_tb(NULL, 0); /* no chaining */
-                ctx->base.is_jmp = DISAS_NORETURN;
-            } else {
-                gen_exception_illegal(ctx);
-            }
-            break;
-        case 0x202: /* HRET */
-            gen_exception_illegal(ctx);
-            break;
-        case 0x302: /* MRET */
-            gen_helper_mret(cpu_pc, cpu_env, cpu_pc);
-            tcg_gen_exit_tb(NULL, 0); /* no chaining */
-            ctx->base.is_jmp = DISAS_NORETURN;
-            break;
-        case 0x7b2: /* DRET */
-            gen_exception_illegal(ctx);
-            break;
-        case 0x105: /* WFI */
-            tcg_gen_movi_tl(cpu_pc, ctx->pc_succ_insn);
-            gen_helper_wfi(cpu_env);
-            break;
-        case 0x104: /* SFENCE.VM */
-            if (ctx->priv_ver <= PRIV_VERSION_1_09_1) {
-                gen_helper_tlb_flush(cpu_env);
-            } else {
-                gen_exception_illegal(ctx);
-            }
-            break;
-#endif
-        default:
-            gen_exception_illegal(ctx);
-            break;
-        }
-        break;
-    default:
-        tcg_gen_movi_tl(imm_rs1, rs1);
-        gen_io_start();
-        switch (opc) {
-        case OPC_RISC_CSRRW:
-            gen_helper_csrrw(dest, cpu_env, source1, csr_store);
-            break;
-        case OPC_RISC_CSRRS:
-            gen_helper_csrrs(dest, cpu_env, source1, csr_store, rs1_pass);
-            break;
-        case OPC_RISC_CSRRC:
-            gen_helper_csrrc(dest, cpu_env, source1, csr_store, rs1_pass);
-            break;
-        case OPC_RISC_CSRRWI:
-            gen_helper_csrrw(dest, cpu_env, imm_rs1, csr_store);
-            break;
-        case OPC_RISC_CSRRSI:
-            gen_helper_csrrs(dest, cpu_env, imm_rs1, csr_store, rs1_pass);
-            break;
-        case OPC_RISC_CSRRCI:
-            gen_helper_csrrc(dest, cpu_env, imm_rs1, csr_store, rs1_pass);
-            break;
-        default:
-            gen_exception_illegal(ctx);
-            return;
-        }
-        gen_io_end();
-        gen_set_gpr(rd, dest);
-        /* end tb since we may be changing priv modes, to get mmu_index right */
-        tcg_gen_movi_tl(cpu_pc, ctx->pc_succ_insn);
-        tcg_gen_exit_tb(NULL, 0); /* no chaining */
-        ctx->base.is_jmp = DISAS_NORETURN;
-        break;
-    }
-    tcg_temp_free(source1);
-    tcg_temp_free(csr_store);
-    tcg_temp_free(dest);
-    tcg_temp_free(rs1_pass);
-    tcg_temp_free(imm_rs1);
-}
-
 static void decode_RV32_64C0(DisasContext *ctx)
 {
     uint8_t funct3 = extract32(ctx->opcode, 13, 3);
@@ -1604,31 +480,10 @@ static void decode_RV32_64C0(DisasContext *ctx)
     uint8_t rs1s = GET_C_RS1S(ctx->opcode);
 
     switch (funct3) {
-    case 0:
-        /* illegal */
-        if (ctx->opcode == 0) {
-            gen_exception_illegal(ctx);
-        } else {
-            /* C.ADDI4SPN -> addi rd', x2, zimm[9:2]*/
-            gen_arith_imm(ctx, OPC_RISC_ADDI, rd_rs2, 2,
-                          GET_C_ADDI4SPN_IMM(ctx->opcode));
-        }
-        break;
-    case 1:
-        /* C.FLD -> fld rd', offset[7:3](rs1')*/
-        gen_fp_load(ctx, OPC_RISC_FLD, rd_rs2, rs1s,
-                    GET_C_LD_IMM(ctx->opcode));
-        /* C.LQ(RV128) */
-        break;
-    case 2:
-        /* C.LW -> lw rd', offset[6:2](rs1') */
-        gen_load(ctx, OPC_RISC_LW, rd_rs2, rs1s,
-                 GET_C_LW_IMM(ctx->opcode));
-        break;
     case 3:
 #if defined(TARGET_RISCV64)
         /* C.LD(RV64/128) -> ld rd', offset[7:3](rs1')*/
-        gen_load(ctx, OPC_RISC_LD, rd_rs2, rs1s,
+        gen_load_c(ctx, OPC_RISC_LD, rd_rs2, rs1s,
                  GET_C_LD_IMM(ctx->opcode));
 #else
         /* C.FLW (RV32) -> flw rd', offset[6:2](rs1')*/
@@ -1636,25 +491,10 @@ static void decode_RV32_64C0(DisasContext *ctx)
                     GET_C_LW_IMM(ctx->opcode));
 #endif
         break;
-    case 4:
-        /* reserved */
-        gen_exception_illegal(ctx);
-        break;
-    case 5:
-        /* C.FSD(RV32/64) -> fsd rs2', offset[7:3](rs1') */
-        gen_fp_store(ctx, OPC_RISC_FSD, rs1s, rd_rs2,
-                     GET_C_LD_IMM(ctx->opcode));
-        /* C.SQ (RV128) */
-        break;
-    case 6:
-        /* C.SW -> sw rs2', offset[6:2](rs1')*/
-        gen_store(ctx, OPC_RISC_SW, rs1s, rd_rs2,
-                  GET_C_LW_IMM(ctx->opcode));
-        break;
     case 7:
 #if defined(TARGET_RISCV64)
         /* C.SD (RV64/128) -> sd rs2', offset[7:3](rs1')*/
-        gen_store(ctx, OPC_RISC_SD, rs1s, rd_rs2,
+        gen_store_c(ctx, OPC_RISC_SD, rs1s, rd_rs2,
                   GET_C_LD_IMM(ctx->opcode));
 #else
         /* C.FSW (RV32) -> fsw rs2', offset[6:2](rs1')*/
@@ -1665,340 +505,152 @@ static void decode_RV32_64C0(DisasContext *ctx)
     }
 }
 
-static void decode_RV32_64C1(DisasContext *ctx)
+static void decode_RV32_64C(DisasContext *ctx)
 {
-    uint8_t funct3 = extract32(ctx->opcode, 13, 3);
-    uint8_t rd_rs1 = GET_C_RS1(ctx->opcode);
-    uint8_t rs1s, rs2s;
-    uint8_t funct2;
+    uint8_t op = extract32(ctx->opcode, 0, 2);
 
-    switch (funct3) {
+    switch (op) {
     case 0:
-        /* C.ADDI -> addi rd, rd, nzimm[5:0] */
-        gen_arith_imm(ctx, OPC_RISC_ADDI, rd_rs1, rd_rs1,
-                      GET_C_IMM(ctx->opcode));
-        break;
-    case 1:
-#if defined(TARGET_RISCV64)
-        /* C.ADDIW (RV64/128) -> addiw rd, rd, imm[5:0]*/
-        gen_arith_imm(ctx, OPC_RISC_ADDIW, rd_rs1, rd_rs1,
-                      GET_C_IMM(ctx->opcode));
-#else
-        /* C.JAL(RV32) -> jal x1, offset[11:1] */
-        gen_jal(ctx, 1, GET_C_J_IMM(ctx->opcode));
-#endif
-        break;
-    case 2:
-        /* C.LI -> addi rd, x0, imm[5:0]*/
-        gen_arith_imm(ctx, OPC_RISC_ADDI, rd_rs1, 0, GET_C_IMM(ctx->opcode));
-        break;
-    case 3:
-        if (rd_rs1 == 2) {
-            /* C.ADDI16SP -> addi x2, x2, nzimm[9:4]*/
-            gen_arith_imm(ctx, OPC_RISC_ADDI, 2, 2,
-                          GET_C_ADDI16SP_IMM(ctx->opcode));
-        } else if (rd_rs1 != 0) {
-            /* C.LUI (rs1/rd =/= {0,2}) -> lui rd, nzimm[17:12]*/
-            tcg_gen_movi_tl(cpu_gpr[rd_rs1],
-                            GET_C_IMM(ctx->opcode) << 12);
-        }
-        break;
-    case 4:
-        funct2 = extract32(ctx->opcode, 10, 2);
-        rs1s = GET_C_RS1S(ctx->opcode);
-        switch (funct2) {
-        case 0: /* C.SRLI(RV32) -> srli rd', rd', shamt[5:0] */
-            gen_arith_imm(ctx, OPC_RISC_SHIFT_RIGHT_I, rs1s, rs1s,
-                               GET_C_ZIMM(ctx->opcode));
-            /* C.SRLI64(RV128) */
-            break;
-        case 1:
-            /* C.SRAI -> srai rd', rd', shamt[5:0]*/
-            gen_arith_imm(ctx, OPC_RISC_SHIFT_RIGHT_I, rs1s, rs1s,
-                            GET_C_ZIMM(ctx->opcode) | 0x400);
-            /* C.SRAI64(RV128) */
-            break;
-        case 2:
-            /* C.ANDI -> andi rd', rd', imm[5:0]*/
-            gen_arith_imm(ctx, OPC_RISC_ANDI, rs1s, rs1s,
-                          GET_C_IMM(ctx->opcode));
-            break;
-        case 3:
-            funct2 = extract32(ctx->opcode, 5, 2);
-            rs2s = GET_C_RS2S(ctx->opcode);
-            switch (funct2) {
-            case 0:
-                /* C.SUB -> sub rd', rd', rs2' */
-                if (extract32(ctx->opcode, 12, 1) == 0) {
-                    gen_arith(ctx, OPC_RISC_SUB, rs1s, rs1s, rs2s);
-                }
-#if defined(TARGET_RISCV64)
-                else {
-                    gen_arith(ctx, OPC_RISC_SUBW, rs1s, rs1s, rs2s);
-                }
-#endif
-                break;
-            case 1:
-                /* C.XOR -> xor rs1', rs1', rs2' */
-                if (extract32(ctx->opcode, 12, 1) == 0) {
-                    gen_arith(ctx, OPC_RISC_XOR, rs1s, rs1s, rs2s);
-                }
-#if defined(TARGET_RISCV64)
-                else {
-                    /* C.ADDW (RV64/128) */
-                    gen_arith(ctx, OPC_RISC_ADDW, rs1s, rs1s, rs2s);
-                }
-#endif
-                break;
-            case 2:
-                /* C.OR -> or rs1', rs1', rs2' */
-                gen_arith(ctx, OPC_RISC_OR, rs1s, rs1s, rs2s);
-                break;
-            case 3:
-                /* C.AND -> and rs1', rs1', rs2' */
-                gen_arith(ctx, OPC_RISC_AND, rs1s, rs1s, rs2s);
-                break;
-            }
-            break;
-        }
-        break;
-    case 5:
-        /* C.J -> jal x0, offset[11:1]*/
-        gen_jal(ctx, 0, GET_C_J_IMM(ctx->opcode));
-        break;
-    case 6:
-        /* C.BEQZ -> beq rs1', x0, offset[8:1]*/
-        rs1s = GET_C_RS1S(ctx->opcode);
-        gen_branch(ctx, OPC_RISC_BEQ, rs1s, 0, GET_C_B_IMM(ctx->opcode));
-        break;
-    case 7:
-        /* C.BNEZ -> bne rs1', x0, offset[8:1]*/
-        rs1s = GET_C_RS1S(ctx->opcode);
-        gen_branch(ctx, OPC_RISC_BNE, rs1s, 0, GET_C_B_IMM(ctx->opcode));
+        decode_RV32_64C0(ctx);
         break;
     }
 }
 
-static void decode_RV32_64C2(DisasContext *ctx)
+#define EX_SH(amount) \
+    static int ex_shift_##amount(int imm) \
+    {                                         \
+        return imm << amount;                 \
+    }
+EX_SH(1)
+EX_SH(2)
+EX_SH(3)
+EX_SH(4)
+EX_SH(12)
+
+#define REQUIRE_EXT(ctx, ext) do { \
+    if (!has_ext(ctx, ext)) {      \
+        return false;              \
+    }                              \
+} while (0)
+
+static int ex_rvc_register(int reg)
+{
+    return 8 + reg;
+}
+
+bool decode_insn32(DisasContext *ctx, uint32_t insn);
+/* Include the auto-generated decoder for 32 bit insn */
+#include "decode_insn32.inc.c"
+
+static bool gen_arith_imm(DisasContext *ctx, arg_i *a,
+                          void(*func)(TCGv, TCGv, TCGv))
 {
-    uint8_t rd, rs2;
-    uint8_t funct3 = extract32(ctx->opcode, 13, 3);
+    TCGv source1, source2;
+    source1 = tcg_temp_new();
+    source2 = tcg_temp_new();
 
+    gen_get_gpr(source1, a->rs1);
+    tcg_gen_movi_tl(source2, a->imm);
 
-    rd = GET_RD(ctx->opcode);
+    (*func)(source1, source1, source2);
 
-    switch (funct3) {
-    case 0: /* C.SLLI -> slli rd, rd, shamt[5:0]
-               C.SLLI64 -> */
-        gen_arith_imm(ctx, OPC_RISC_SLLI, rd, rd, GET_C_ZIMM(ctx->opcode));
-        break;
-    case 1: /* C.FLDSP(RV32/64DC) -> fld rd, offset[8:3](x2) */
-        gen_fp_load(ctx, OPC_RISC_FLD, rd, 2, GET_C_LDSP_IMM(ctx->opcode));
-        break;
-    case 2: /* C.LWSP -> lw rd, offset[7:2](x2) */
-        gen_load(ctx, OPC_RISC_LW, rd, 2, GET_C_LWSP_IMM(ctx->opcode));
-        break;
-    case 3:
-#if defined(TARGET_RISCV64)
-        /* C.LDSP(RVC64) -> ld rd, offset[8:3](x2) */
-        gen_load(ctx, OPC_RISC_LD, rd, 2, GET_C_LDSP_IMM(ctx->opcode));
-#else
-        /* C.FLWSP(RV32FC) -> flw rd, offset[7:2](x2) */
-        gen_fp_load(ctx, OPC_RISC_FLW, rd, 2, GET_C_LWSP_IMM(ctx->opcode));
-#endif
-        break;
-    case 4:
-        rs2 = GET_C_RS2(ctx->opcode);
-
-        if (extract32(ctx->opcode, 12, 1) == 0) {
-            if (rs2 == 0) {
-                /* C.JR -> jalr x0, rs1, 0*/
-                gen_jalr(ctx, OPC_RISC_JALR, 0, rd, 0);
-            } else {
-                /* C.MV -> add rd, x0, rs2 */
-                gen_arith(ctx, OPC_RISC_ADD, rd, 0, rs2);
-            }
-        } else {
-            if (rd == 0) {
-                /* C.EBREAK -> ebreak*/
-                gen_system(ctx, OPC_RISC_ECALL, 0, 0, 0x1);
-            } else {
-                if (rs2 == 0) {
-                    /* C.JALR -> jalr x1, rs1, 0*/
-                    gen_jalr(ctx, OPC_RISC_JALR, 1, rd, 0);
-                } else {
-                    /* C.ADD -> add rd, rd, rs2 */
-                    gen_arith(ctx, OPC_RISC_ADD, rd, rd, rs2);
-                }
-            }
-        }
-        break;
-    case 5:
-        /* C.FSDSP -> fsd rs2, offset[8:3](x2)*/
-        gen_fp_store(ctx, OPC_RISC_FSD, 2, GET_C_RS2(ctx->opcode),
-                     GET_C_SDSP_IMM(ctx->opcode));
-        /* C.SQSP */
-        break;
-    case 6: /* C.SWSP -> sw rs2, offset[7:2](x2)*/
-        gen_store(ctx, OPC_RISC_SW, 2, GET_C_RS2(ctx->opcode),
-                  GET_C_SWSP_IMM(ctx->opcode));
-        break;
-    case 7:
-#if defined(TARGET_RISCV64)
-        /* C.SDSP(Rv64/128) -> sd rs2, offset[8:3](x2)*/
-        gen_store(ctx, OPC_RISC_SD, 2, GET_C_RS2(ctx->opcode),
-                  GET_C_SDSP_IMM(ctx->opcode));
-#else
-        /* C.FSWSP(RV32) -> fsw rs2, offset[7:2](x2) */
-        gen_fp_store(ctx, OPC_RISC_FSW, 2, GET_C_RS2(ctx->opcode),
-                     GET_C_SWSP_IMM(ctx->opcode));
-#endif
-        break;
-    }
+    gen_set_gpr(a->rd, source1);
+    tcg_temp_free(source1);
+    tcg_temp_free(source2);
+    return true;
 }
 
-static void decode_RV32_64C(DisasContext *ctx)
+#ifdef TARGET_RISCV64
+static void gen_addw(TCGv ret, TCGv arg1, TCGv arg2)
 {
-    uint8_t op = extract32(ctx->opcode, 0, 2);
+    tcg_gen_add_tl(ret, arg1, arg2);
+    tcg_gen_ext32s_tl(ret, ret);
+}
 
-    switch (op) {
-    case 0:
-        decode_RV32_64C0(ctx);
-        break;
-    case 1:
-        decode_RV32_64C1(ctx);
-        break;
-    case 2:
-        decode_RV32_64C2(ctx);
-        break;
-    }
+static void gen_subw(TCGv ret, TCGv arg1, TCGv arg2)
+{
+    tcg_gen_sub_tl(ret, arg1, arg2);
+    tcg_gen_ext32s_tl(ret, ret);
 }
 
-static void decode_RV32_64G(DisasContext *ctx)
+static void gen_mulw(TCGv ret, TCGv arg1, TCGv arg2)
 {
-    int rs1;
-    int rs2;
-    int rd;
-    uint32_t op;
-    target_long imm;
-
-    /* We do not do misaligned address check here: the address should never be
-     * misaligned at this point. Instructions that set PC must do the check,
-     * since epc must be the address of the instruction that caused us to
-     * perform the misaligned instruction fetch */
-
-    op = MASK_OP_MAJOR(ctx->opcode);
-    rs1 = GET_RS1(ctx->opcode);
-    rs2 = GET_RS2(ctx->opcode);
-    rd = GET_RD(ctx->opcode);
-    imm = GET_IMM(ctx->opcode);
+    tcg_gen_mul_tl(ret, arg1, arg2);
+    tcg_gen_ext32s_tl(ret, ret);
+}
+
+static bool gen_arith_div_w(DisasContext *ctx, arg_r *a,
+                            void(*func)(TCGv, TCGv, TCGv))
+{
+    TCGv source1, source2;
+    source1 = tcg_temp_new();
+    source2 = tcg_temp_new();
+
+    gen_get_gpr(source1, a->rs1);
+    gen_get_gpr(source2, a->rs2);
+    tcg_gen_ext32s_tl(source1, source1);
+    tcg_gen_ext32s_tl(source2, source2);
+
+    (*func)(source1, source1, source2);
+
+    tcg_gen_ext32s_tl(source1, source1);
+    gen_set_gpr(a->rd, source1);
+    tcg_temp_free(source1);
+    tcg_temp_free(source2);
+    return true;
+}
 
-    switch (op) {
-    case OPC_RISC_LUI:
-        if (rd == 0) {
-            break; /* NOP */
-        }
-        tcg_gen_movi_tl(cpu_gpr[rd], sextract64(ctx->opcode, 12, 20) << 12);
-        break;
-    case OPC_RISC_AUIPC:
-        if (rd == 0) {
-            break; /* NOP */
-        }
-        tcg_gen_movi_tl(cpu_gpr[rd], (sextract64(ctx->opcode, 12, 20) << 12) +
-               ctx->base.pc_next);
-        break;
-    case OPC_RISC_JAL:
-        imm = GET_JAL_IMM(ctx->opcode);
-        gen_jal(ctx, rd, imm);
-        break;
-    case OPC_RISC_JALR:
-        gen_jalr(ctx, MASK_OP_JALR(ctx->opcode), rd, rs1, imm);
-        break;
-    case OPC_RISC_BRANCH:
-        gen_branch(ctx, MASK_OP_BRANCH(ctx->opcode), rs1, rs2,
-                   GET_B_IMM(ctx->opcode));
-        break;
-    case OPC_RISC_LOAD:
-        gen_load(ctx, MASK_OP_LOAD(ctx->opcode), rd, rs1, imm);
-        break;
-    case OPC_RISC_STORE:
-        gen_store(ctx, MASK_OP_STORE(ctx->opcode), rs1, rs2,
-                  GET_STORE_IMM(ctx->opcode));
-        break;
-    case OPC_RISC_ARITH_IMM:
-#if defined(TARGET_RISCV64)
-    case OPC_RISC_ARITH_IMM_W:
-#endif
-        if (rd == 0) {
-            break; /* NOP */
-        }
-        gen_arith_imm(ctx, MASK_OP_ARITH_IMM(ctx->opcode), rd, rs1, imm);
-        break;
-    case OPC_RISC_ARITH:
-#if defined(TARGET_RISCV64)
-    case OPC_RISC_ARITH_W:
 #endif
-        if (rd == 0) {
-            break; /* NOP */
-        }
-        gen_arith(ctx, MASK_OP_ARITH(ctx->opcode), rd, rs1, rs2);
-        break;
-    case OPC_RISC_FP_LOAD:
-        gen_fp_load(ctx, MASK_OP_FP_LOAD(ctx->opcode), rd, rs1, imm);
-        break;
-    case OPC_RISC_FP_STORE:
-        gen_fp_store(ctx, MASK_OP_FP_STORE(ctx->opcode), rs1, rs2,
-                     GET_STORE_IMM(ctx->opcode));
-        break;
-    case OPC_RISC_ATOMIC:
-        if (!has_ext(ctx, RVA)) {
-            goto do_illegal;
-        }
-        gen_atomic(ctx, MASK_OP_ATOMIC(ctx->opcode), rd, rs1, rs2);
-        break;
-    case OPC_RISC_FMADD:
-        gen_fp_fmadd(ctx, MASK_OP_FP_FMADD(ctx->opcode), rd, rs1, rs2,
-                     GET_RS3(ctx->opcode), GET_RM(ctx->opcode));
-        break;
-    case OPC_RISC_FMSUB:
-        gen_fp_fmsub(ctx, MASK_OP_FP_FMSUB(ctx->opcode), rd, rs1, rs2,
-                     GET_RS3(ctx->opcode), GET_RM(ctx->opcode));
-        break;
-    case OPC_RISC_FNMSUB:
-        gen_fp_fnmsub(ctx, MASK_OP_FP_FNMSUB(ctx->opcode), rd, rs1, rs2,
-                      GET_RS3(ctx->opcode), GET_RM(ctx->opcode));
-        break;
-    case OPC_RISC_FNMADD:
-        gen_fp_fnmadd(ctx, MASK_OP_FP_FNMADD(ctx->opcode), rd, rs1, rs2,
-                      GET_RS3(ctx->opcode), GET_RM(ctx->opcode));
-        break;
-    case OPC_RISC_FP_ARITH:
-        gen_fp_arith(ctx, MASK_OP_FP_ARITH(ctx->opcode), rd, rs1, rs2,
-                     GET_RM(ctx->opcode));
-        break;
-    case OPC_RISC_FENCE:
-        if (ctx->opcode & 0x1000) {
-            /* FENCE_I is a no-op in QEMU,
-             * however we need to end the translation block */
-            tcg_gen_movi_tl(cpu_pc, ctx->pc_succ_insn);
-            tcg_gen_exit_tb(NULL, 0);
-            ctx->base.is_jmp = DISAS_NORETURN;
-        } else {
-            /* FENCE is a full memory barrier. */
-            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
-        }
-        break;
-    case OPC_RISC_SYSTEM:
-        gen_system(ctx, MASK_OP_SYSTEM(ctx->opcode), rd, rs1,
-                   (ctx->opcode & 0xFFF00000) >> 20);
-        break;
-    do_illegal:
-    default:
-        gen_exception_illegal(ctx);
-        break;
-    }
+
+static bool gen_arith(DisasContext *ctx, arg_r *a,
+                      void(*func)(TCGv, TCGv, TCGv))
+{
+    TCGv source1, source2;
+    source1 = tcg_temp_new();
+    source2 = tcg_temp_new();
+
+    gen_get_gpr(source1, a->rs1);
+    gen_get_gpr(source2, a->rs2);
+
+    (*func)(source1, source1, source2);
+
+    gen_set_gpr(a->rd, source1);
+    tcg_temp_free(source1);
+    tcg_temp_free(source2);
+    return true;
+}
+
+static bool gen_shift(DisasContext *ctx, arg_r *a,
+                        void(*func)(TCGv, TCGv, TCGv))
+{
+    TCGv source1 = tcg_temp_new();
+    TCGv source2 = tcg_temp_new();
+
+    gen_get_gpr(source1, a->rs1);
+    gen_get_gpr(source2, a->rs2);
+
+    tcg_gen_andi_tl(source2, source2, TARGET_LONG_BITS - 1);
+    (*func)(source1, source1, source2);
+
+    gen_set_gpr(a->rd, source1);
+    tcg_temp_free(source1);
+    tcg_temp_free(source2);
+    return true;
 }
 
+/* Include insn module translation function */
+#include "insn_trans/trans_rvi.inc.c"
+#include "insn_trans/trans_rvm.inc.c"
+#include "insn_trans/trans_rva.inc.c"
+#include "insn_trans/trans_rvf.inc.c"
+#include "insn_trans/trans_rvd.inc.c"
+#include "insn_trans/trans_privileged.inc.c"
+
+bool decode_insn16(DisasContext *ctx, uint16_t insn);
+/* auto-generated decoder*/
+#include "decode_insn16.inc.c"
+#include "insn_trans/trans_rvc.inc.c"
+
 static void decode_opc(DisasContext *ctx)
 {
     /* check for compressed insn */
@@ -2007,11 +659,16 @@ static void decode_opc(DisasContext *ctx)
             gen_exception_illegal(ctx);
         } else {
             ctx->pc_succ_insn = ctx->base.pc_next + 2;
-            decode_RV32_64C(ctx);
+            if (!decode_insn16(ctx, ctx->opcode)) {
+                /* fall back to old decoder */
+                decode_RV32_64C(ctx);
+            }
         }
     } else {
         ctx->pc_succ_insn = ctx->base.pc_next + 4;
-        decode_RV32_64G(ctx);
+        if (!decode_insn32(ctx, ctx->opcode)) {
+            gen_exception_illegal(ctx);
+        }
     }
 }
 
diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051
index 3b50c7f188..6a3b7c2b89 100755
--- a/tests/qemu-iotests/051
+++ b/tests/qemu-iotests/051
@@ -356,6 +356,13 @@ $QEMU_IO -c "read -P 0x33 0 4k" "$TEST_IMG" | _filter_qemu_io
 # Using snapshot=on with a non-existent TMPDIR
 TMPDIR=/nonexistent run_qemu -drive driver=null-co,snapshot=on
 
+# Using snapshot=on together with read-only=on
+echo "info block" |
+    run_qemu -drive file="$TEST_IMG",snapshot=on,read-only=on,if=none,id=$device_id |
+    _filter_qemu_io |
+    sed -e 's#"/[^"]*/vl\.[A-Za-z0-9]\{6\}"#SNAPSHOT_PATH#g'
+
+
 # success, all done
 echo "*** done"
 rm -f $seq.full
diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out
index b900935fbc..9f1cf22608 100644
--- a/tests/qemu-iotests/051.out
+++ b/tests/qemu-iotests/051.out
@@ -458,4 +458,13 @@ read 4096/4096 bytes at offset 0
 Testing: -drive driver=null-co,snapshot=on
 QEMU_PROG: -drive driver=null-co,snapshot=on: Could not get temporary filename: No such file or directory
 
+Testing: -drive file=TEST_DIR/t.qcow2,snapshot=on,read-only=on,if=none,id=drive0
+QEMU X.Y.Z monitor - type 'help' for more information
+(qemu) info block
+drive0 (NODE_NAME): json:{"backing": {"driver": "qcow2", "file": {"driver": "file", "filename": "TEST_DIR/t.qcow2"}}, "driver": "qcow2", "file": {"driver": "file", "filename": SNAPSHOT_PATH}} (qcow2, read-only)
+    Removable device: not locked, tray closed
+    Cache mode:       writeback, ignore flushes
+    Backing file:     TEST_DIR/t.qcow2 (chain depth: 1)
+(qemu) quit
+
 *** done
diff --git a/tests/qemu-iotests/051.pc.out b/tests/qemu-iotests/051.pc.out
index 8c5c735dfd..c4743cc31c 100644
--- a/tests/qemu-iotests/051.pc.out
+++ b/tests/qemu-iotests/051.pc.out
@@ -530,4 +530,13 @@ read 4096/4096 bytes at offset 0
 Testing: -drive driver=null-co,snapshot=on
 QEMU_PROG: -drive driver=null-co,snapshot=on: Could not get temporary filename: No such file or directory
 
+Testing: -drive file=TEST_DIR/t.qcow2,snapshot=on,read-only=on,if=none,id=drive0
+QEMU X.Y.Z monitor - type 'help' for more information
+(qemu) info block
+drive0 (NODE_NAME): json:{"backing": {"driver": "qcow2", "file": {"driver": "file", "filename": "TEST_DIR/t.qcow2"}}, "driver": "qcow2", "file": {"driver": "file", "filename": SNAPSHOT_PATH}} (qcow2, read-only)
+    Removable device: not locked, tray closed
+    Cache mode:       writeback, ignore flushes
+    Backing file:     TEST_DIR/t.qcow2 (chain depth: 1)
+(qemu) quit
+
 *** done
diff --git a/tests/qemu-iotests/124 b/tests/qemu-iotests/124
index 5aa1bf1bd6..80b356f7bb 100755
--- a/tests/qemu-iotests/124
+++ b/tests/qemu-iotests/124
@@ -634,6 +634,119 @@ class TestIncrementalBackupBlkdebug(TestIncrementalBackupBase):
         self.vm.shutdown()
         self.check_backups()
 
+    def test_incremental_pause(self):
+        """
+        Test an incremental backup that errors into a pause and is resumed.
+        """
+
+        drive0 = self.drives[0]
+        # NB: The blkdebug script here looks for a "flush, read, read" pattern.
+        # The flush occurs in hmp_io_writes, the first read in device_add, and
+        # the last read during the block job.
+        result = self.vm.qmp('blockdev-add',
+                             node_name=drive0['id'],
+                             driver=drive0['fmt'],
+                             file={
+                                 'driver': 'blkdebug',
+                                 'image': {
+                                     'driver': 'file',
+                                     'filename': drive0['file']
+                                 },
+                                 'set-state': [{
+                                     'event': 'flush_to_disk',
+                                     'state': 1,
+                                     'new_state': 2
+                                 },{
+                                     'event': 'read_aio',
+                                     'state': 2,
+                                     'new_state': 3
+                                 }],
+                                 'inject-error': [{
+                                     'event': 'read_aio',
+                                     'errno': 5,
+                                     'state': 3,
+                                     'immediately': False,
+                                     'once': True
+                                 }],
+                             })
+        self.assert_qmp(result, 'return', {})
+        self.create_anchor_backup(drive0)
+        bitmap = self.add_bitmap('bitmap0', drive0)
+
+        # Emulate guest activity
+        self.hmp_io_writes(drive0['id'], (('0xab', 0, 512),
+                                          ('0xfe', '16M', '256k'),
+                                          ('0x64', '32736k', '64k')))
+
+        # For the purposes of query-block visibility of bitmaps, add a drive
+        # frontend after we've written data; otherwise we can't use hmp-io
+        result = self.vm.qmp("device_add",
+                             id="device0",
+                             drive=drive0['id'],
+                             driver="virtio-blk")
+        self.assert_qmp(result, 'return', {})
+
+        # Bitmap Status Check
+        query = self.vm.qmp('query-block')
+        ret = [bmap for bmap in query['return'][0]['dirty-bitmaps']
+               if bmap.get('name') == bitmap.name][0]
+        self.assert_qmp(ret, 'count', 458752)
+        self.assert_qmp(ret, 'granularity', 65536)
+        self.assert_qmp(ret, 'status', 'active')
+        self.assert_qmp(ret, 'busy', False)
+        self.assert_qmp(ret, 'recording', True)
+
+        # Start backup
+        parent, _ = bitmap.last_target()
+        target = self.prepare_backup(bitmap, parent)
+        res = self.vm.qmp('drive-backup',
+                          job_id=bitmap.drive['id'],
+                          device=bitmap.drive['id'],
+                          sync='incremental',
+                          bitmap=bitmap.name,
+                          format=bitmap.drive['fmt'],
+                          target=target,
+                          mode='existing',
+                          on_source_error='stop')
+        self.assert_qmp(res, 'return', {})
+
+        # Wait for the error
+        event = self.vm.event_wait(name="BLOCK_JOB_ERROR",
+                                   match={"data":{"device":bitmap.drive['id']}})
+        self.assert_qmp(event, 'data', {'device': bitmap.drive['id'],
+                                        'action': 'stop',
+                                        'operation': 'read'})
+
+        # Bitmap Status Check
+        query = self.vm.qmp('query-block')
+        ret = [bmap for bmap in query['return'][0]['dirty-bitmaps']
+               if bmap.get('name') == bitmap.name][0]
+        self.assert_qmp(ret, 'count', 458752)
+        self.assert_qmp(ret, 'granularity', 65536)
+        self.assert_qmp(ret, 'status', 'frozen')
+        self.assert_qmp(ret, 'busy', True)
+        self.assert_qmp(ret, 'recording', True)
+
+        # Resume and check incremental backup for consistency
+        res = self.vm.qmp('block-job-resume', device=bitmap.drive['id'])
+        self.assert_qmp(res, 'return', {})
+        self.wait_qmp_backup(bitmap.drive['id'])
+
+        # Bitmap Status Check
+        query = self.vm.qmp('query-block')
+        ret = [bmap for bmap in query['return'][0]['dirty-bitmaps']
+               if bmap.get('name') == bitmap.name][0]
+        self.assert_qmp(ret, 'count', 0)
+        self.assert_qmp(ret, 'granularity', 65536)
+        self.assert_qmp(ret, 'status', 'active')
+        self.assert_qmp(ret, 'busy', False)
+        self.assert_qmp(ret, 'recording', True)
+
+        # Finalize / Cleanup
+        self.make_reference_backup(bitmap)
+        self.vm.shutdown()
+        self.check_backups()
+
 
 if __name__ == '__main__':
     iotests.main(supported_fmts=['qcow2'])
diff --git a/tests/qemu-iotests/124.out b/tests/qemu-iotests/124.out
index e56cae021b..281b69efea 100644
--- a/tests/qemu-iotests/124.out
+++ b/tests/qemu-iotests/124.out
@@ -1,5 +1,5 @@
-...........
+............
 ----------------------------------------------------------------------
-Ran 11 tests
+Ran 12 tests
 
 OK
diff --git a/tests/qemu-iotests/232 b/tests/qemu-iotests/232
index 71fd48eff0..0de097fc88 100755
--- a/tests/qemu-iotests/232
+++ b/tests/qemu-iotests/232
@@ -29,6 +29,7 @@ status=1	# failure is the default!
 _cleanup()
 {
     _cleanup_test_img
+    rm -f $TEST_IMG.[01234]
 }
 trap "_cleanup; exit \$status" 0 1 2 3 15
 
@@ -143,6 +144,36 @@ run_qemu_info_block -blockdev driver=file,filename="$TEST_IMG",node-name=node0,a
 run_qemu_info_block -blockdev driver=file,filename="$TEST_IMG",node-name=node0,auto-read-only=on
 run_qemu_info_block -blockdev driver=file,filename="$TEST_IMG",node-name=node0
 
+echo
+echo "=== Try commit to backing file with auto-read-only ==="
+echo
+
+TEST_IMG="$TEST_IMG.0" _make_test_img $size
+TEST_IMG="$TEST_IMG.1" _make_test_img $size
+TEST_IMG="$TEST_IMG.2" _make_test_img $size
+TEST_IMG="$TEST_IMG.3" _make_test_img $size
+TEST_IMG="$TEST_IMG.4" _make_test_img $size
+
+(cat <<EOF
+{"execute":"qmp_capabilities"}
+{"execute":"block-commit",
+ "arguments":{"device":"format-4", "top-node": "format-2", "base-node":"format-0", "job-id":"job0"}}
+EOF
+sleep 1
+echo '{"execute":"quit"}'
+) | $QEMU -qmp stdio -nographic -nodefaults \
+    -blockdev file,node-name=file-0,filename=$TEST_IMG.0,auto-read-only=on \
+    -blockdev qcow2,node-name=format-0,file=file-0,read-only=on \
+    -blockdev file,node-name=file-1,filename=$TEST_IMG.1,auto-read-only=on \
+    -blockdev qcow2,node-name=format-1,file=file-1,read-only=on,backing=format-0 \
+    -blockdev file,node-name=file-2,filename=$TEST_IMG.2,auto-read-only=on \
+    -blockdev qcow2,node-name=format-2,file=file-2,read-only=on,backing=format-1 \
+    -blockdev file,node-name=file-3,filename=$TEST_IMG.3,auto-read-only=on \
+    -blockdev qcow2,node-name=format-3,file=file-3,read-only=on,backing=format-2 \
+    -blockdev file,node-name=file-4,filename=$TEST_IMG.4,auto-read-only=on \
+    -blockdev qcow2,node-name=format-4,file=file-4,read-only=on,backing=format-3 |
+    _filter_qmp
+
 # success, all done
 echo "*** done"
 rm -f $seq.full
diff --git a/tests/qemu-iotests/232.out b/tests/qemu-iotests/232.out
index dcb683afa3..5bcc44bb62 100644
--- a/tests/qemu-iotests/232.out
+++ b/tests/qemu-iotests/232.out
@@ -22,12 +22,12 @@ NODE_NAME: TEST_DIR/t.IMGFMT (file, read-only)
 NODE_NAME: TEST_DIR/t.IMGFMT (file, read-only)
 
 QEMU_PROG: -drive driver=file,file=TEST_DIR/t.IMGFMT,if=none,read-only=off,auto-read-only=off: Could not open 'TEST_DIR/t.IMGFMT': Permission denied
-NODE_NAME: TEST_DIR/t.IMGFMT (file, read-only)
-NODE_NAME: TEST_DIR/t.IMGFMT (file, read-only)
+NODE_NAME: TEST_DIR/t.IMGFMT (file)
+NODE_NAME: TEST_DIR/t.IMGFMT (file)
 
 QEMU_PROG: -drive driver=file,file=TEST_DIR/t.IMGFMT,if=none,auto-read-only=off: Could not open 'TEST_DIR/t.IMGFMT': Permission denied
-NODE_NAME: TEST_DIR/t.IMGFMT (file, read-only)
-NODE_NAME: TEST_DIR/t.IMGFMT (file, read-only)
+NODE_NAME: TEST_DIR/t.IMGFMT (file)
+NODE_NAME: TEST_DIR/t.IMGFMT (file)
 
 === -blockdev with read-write image: read-only/auto-read-only combinations ===
 
@@ -50,10 +50,30 @@ node0: TEST_DIR/t.IMGFMT (file, read-only)
 node0: TEST_DIR/t.IMGFMT (file, read-only)
 
 QEMU_PROG: -blockdev driver=file,filename=TEST_DIR/t.IMGFMT,node-name=node0,read-only=off,auto-read-only=off: Could not open 'TEST_DIR/t.IMGFMT': Permission denied
-node0: TEST_DIR/t.IMGFMT (file, read-only)
+node0: TEST_DIR/t.IMGFMT (file)
 QEMU_PROG: -blockdev driver=file,filename=TEST_DIR/t.IMGFMT,node-name=node0,read-only=off: Could not open 'TEST_DIR/t.IMGFMT': Permission denied
 
 QEMU_PROG: -blockdev driver=file,filename=TEST_DIR/t.IMGFMT,node-name=node0,auto-read-only=off: Could not open 'TEST_DIR/t.IMGFMT': Permission denied
-node0: TEST_DIR/t.IMGFMT (file, read-only)
+node0: TEST_DIR/t.IMGFMT (file)
 QEMU_PROG: -blockdev driver=file,filename=TEST_DIR/t.IMGFMT,node-name=node0: Could not open 'TEST_DIR/t.IMGFMT': Permission denied
+
+=== Try commit to backing file with auto-read-only ===
+
+Formatting 'TEST_DIR/t.IMGFMT.0', fmt=IMGFMT size=134217728
+Formatting 'TEST_DIR/t.IMGFMT.1', fmt=IMGFMT size=134217728
+Formatting 'TEST_DIR/t.IMGFMT.2', fmt=IMGFMT size=134217728
+Formatting 'TEST_DIR/t.IMGFMT.3', fmt=IMGFMT size=134217728
+Formatting 'TEST_DIR/t.IMGFMT.4', fmt=IMGFMT size=134217728
+QMP_VERSION
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}}
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job0"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job0"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 134217728, "offset": 134217728, "speed": 0, "type": "commit"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}}
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 *** done
diff --git a/tests/qemu-iotests/236.out b/tests/qemu-iotests/236.out
index 5006f7bca1..815cd053f0 100644
--- a/tests/qemu-iotests/236.out
+++ b/tests/qemu-iotests/236.out
@@ -22,17 +22,21 @@ write -P0xcd 0x3ff0000 64k
   "bitmaps": {
     "drive0": [
       {
+        "busy": false,
         "count": 262144,
         "granularity": 65536,
         "name": "bitmapB",
         "persistent": false,
+        "recording": true,
         "status": "active"
       },
       {
+        "busy": false,
         "count": 262144,
         "granularity": 65536,
         "name": "bitmapA",
         "persistent": false,
+        "recording": true,
         "status": "active"
       }
     ]
@@ -84,17 +88,21 @@ write -P0xcd 0x3ff0000 64k
   "bitmaps": {
     "drive0": [
       {
+        "busy": false,
         "count": 262144,
         "granularity": 65536,
         "name": "bitmapB",
         "persistent": false,
+        "recording": true,
         "status": "active"
       },
       {
+        "busy": false,
         "count": 262144,
         "granularity": 65536,
         "name": "bitmapA",
         "persistent": false,
+        "recording": true,
         "status": "active"
       }
     ]
@@ -184,24 +192,30 @@ write -P0xea 0x3fe0000 64k
   "bitmaps": {
     "drive0": [
       {
+        "busy": false,
         "count": 393216,
         "granularity": 65536,
         "name": "bitmapC",
         "persistent": false,
+        "recording": false,
         "status": "disabled"
       },
       {
+        "busy": false,
         "count": 262144,
         "granularity": 65536,
         "name": "bitmapB",
         "persistent": false,
+        "recording": false,
         "status": "disabled"
       },
       {
+        "busy": false,
         "count": 458752,
         "granularity": 65536,
         "name": "bitmapA",
         "persistent": false,
+        "recording": false,
         "status": "disabled"
       }
     ]
@@ -251,24 +265,30 @@ write -P0xea 0x3fe0000 64k
   "bitmaps": {
     "drive0": [
       {
+        "busy": false,
         "count": 393216,
         "granularity": 65536,
         "name": "bitmapC",
         "persistent": false,
+        "recording": false,
         "status": "disabled"
       },
       {
+        "busy": false,
         "count": 262144,
         "granularity": 65536,
         "name": "bitmapB",
         "persistent": false,
+        "recording": false,
         "status": "disabled"
       },
       {
+        "busy": false,
         "count": 458752,
         "granularity": 65536,
         "name": "bitmapA",
         "persistent": false,
+        "recording": false,
         "status": "disabled"
       }
     ]
@@ -311,31 +331,39 @@ write -P0xea 0x3fe0000 64k
   "bitmaps": {
     "drive0": [
       {
+        "busy": false,
         "count": 458752,
         "granularity": 65536,
         "name": "bitmapD",
         "persistent": false,
+        "recording": false,
         "status": "disabled"
       },
       {
+        "busy": false,
         "count": 393216,
         "granularity": 65536,
         "name": "bitmapC",
         "persistent": false,
+        "recording": false,
         "status": "disabled"
       },
       {
+        "busy": false,
         "count": 262144,
         "granularity": 65536,
         "name": "bitmapB",
         "persistent": false,
+        "recording": false,
         "status": "disabled"
       },
       {
+        "busy": false,
         "count": 458752,
         "granularity": 65536,
         "name": "bitmapA",
         "persistent": false,
+        "recording": false,
         "status": "disabled"
       }
     ]
diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245
new file mode 100644
index 0000000000..7891a210c1
--- /dev/null
+++ b/tests/qemu-iotests/245
@@ -0,0 +1,991 @@
+#!/usr/bin/env python
+#
+# Test cases for the QMP 'x-blockdev-reopen' command
+#
+# Copyright (C) 2018-2019 Igalia, S.L.
+# Author: Alberto Garcia <berto@igalia.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import os
+import re
+import iotests
+import copy
+import json
+from iotests import qemu_img, qemu_io
+
+hd_path = [
+    os.path.join(iotests.test_dir, 'hd0.img'),
+    os.path.join(iotests.test_dir, 'hd1.img'),
+    os.path.join(iotests.test_dir, 'hd2.img')
+]
+
+def hd_opts(idx):
+    return {'driver': iotests.imgfmt,
+            'node-name': 'hd%d' % idx,
+            'file': {'driver': 'file',
+                     'node-name': 'hd%d-file' % idx,
+                     'filename':  hd_path[idx] } }
+
+class TestBlockdevReopen(iotests.QMPTestCase):
+    total_io_cmds = 0
+
+    def setUp(self):
+        qemu_img('create', '-f', iotests.imgfmt, hd_path[0], '3M')
+        qemu_img('create', '-f', iotests.imgfmt, '-b', hd_path[0], hd_path[1])
+        qemu_img('create', '-f', iotests.imgfmt, hd_path[2], '3M')
+        qemu_io('-f', iotests.imgfmt, '-c', 'write -P 0xa0  0 1M', hd_path[0])
+        qemu_io('-f', iotests.imgfmt, '-c', 'write -P 0xa1 1M 1M', hd_path[1])
+        qemu_io('-f', iotests.imgfmt, '-c', 'write -P 0xa2 2M 1M', hd_path[2])
+        self.vm = iotests.VM()
+        self.vm.launch()
+
+    def tearDown(self):
+        self.vm.shutdown()
+        self.check_qemu_io_errors()
+        os.remove(hd_path[0])
+        os.remove(hd_path[1])
+        os.remove(hd_path[2])
+
+    # The output of qemu-io is not returned by vm.hmp_qemu_io() but
+    # it's stored in the log and can only be read when the VM has been
+    # shut down. This function runs qemu-io and keeps track of the
+    # number of times it's been called.
+    def run_qemu_io(self, img, cmd):
+        result = self.vm.hmp_qemu_io(img, cmd)
+        self.assert_qmp(result, 'return', '')
+        self.total_io_cmds += 1
+
+    # Once the VM is shut down we can parse the log and see if qemu-io
+    # ran without errors.
+    def check_qemu_io_errors(self):
+        self.assertFalse(self.vm.is_running())
+        found = 0
+        log = self.vm.get_log()
+        for line in log.split("\n"):
+            if line.startswith("Pattern verification failed"):
+                raise Exception("%s (command #%d)" % (line, found))
+            if re.match("read .*/.* bytes at offset", line):
+                found += 1
+        self.assertEqual(found, self.total_io_cmds,
+                         "Expected output of %d qemu-io commands, found %d" %
+                         (found, self.total_io_cmds))
+
+    # Run x-blockdev-reopen with 'opts' but applying 'newopts'
+    # on top of it. The original 'opts' dict is unmodified
+    def reopen(self, opts, newopts = {}, errmsg = None):
+        opts = copy.deepcopy(opts)
+
+        # Apply the changes from 'newopts' on top of 'opts'
+        for key in newopts:
+            value = newopts[key]
+            # If key has the form "foo.bar" then we need to do
+            # opts["foo"]["bar"] = value, not opts["foo.bar"] = value
+            subdict = opts
+            while key.find('.') != -1:
+                [prefix, key] = key.split('.', 1)
+                subdict = opts[prefix]
+            subdict[key] = value
+
+        result = self.vm.qmp('x-blockdev-reopen', conv_keys = False, **opts)
+        if errmsg:
+            self.assert_qmp(result, 'error/class', 'GenericError')
+            self.assert_qmp(result, 'error/desc', errmsg)
+        else:
+            self.assert_qmp(result, 'return', {})
+
+
+    # Run query-named-block-nodes and return the specified entry
+    def get_node(self, node_name):
+        result = self.vm.qmp('query-named-block-nodes')
+        for node in result['return']:
+            if node['node-name'] == node_name:
+                return node
+        return None
+
+    # Run 'query-named-block-nodes' and compare its output with the
+    # value passed by the user in 'graph'
+    def check_node_graph(self, graph):
+        result = self.vm.qmp('query-named-block-nodes')
+        self.assertEqual(json.dumps(graph,  sort_keys=True),
+                         json.dumps(result, sort_keys=True))
+
+    # This test opens one single disk image (without backing files)
+    # and tries to reopen it with illegal / incorrect parameters.
+    def test_incorrect_parameters_single_file(self):
+        # Open 'hd0' only (no backing files)
+        opts = hd_opts(0)
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts)
+        self.assert_qmp(result, 'return', {})
+        original_graph = self.vm.qmp('query-named-block-nodes')
+
+        # We can reopen the image passing the same options
+        self.reopen(opts)
+
+        # We can also reopen passing a child reference in 'file'
+        self.reopen(opts, {'file': 'hd0-file'})
+
+        # We cannot change any of these
+        self.reopen(opts, {'node-name': 'not-found'}, "Cannot find node named 'not-found'")
+        self.reopen(opts, {'node-name': ''}, "Cannot find node named ''")
+        self.reopen(opts, {'node-name': None}, "Invalid parameter type for 'node-name', expected: string")
+        self.reopen(opts, {'driver': 'raw'}, "Cannot change the option 'driver'")
+        self.reopen(opts, {'driver': ''}, "Invalid parameter ''")
+        self.reopen(opts, {'driver': None}, "Invalid parameter type for 'driver', expected: string")
+        self.reopen(opts, {'file': 'not-found'}, "Cannot change the option 'file'")
+        self.reopen(opts, {'file': ''}, "Cannot change the option 'file'")
+        self.reopen(opts, {'file': None}, "Invalid parameter type for 'file', expected: BlockdevRef")
+        self.reopen(opts, {'file.node-name': 'newname'}, "Cannot change the option 'node-name'")
+        self.reopen(opts, {'file.driver': 'host_device'}, "Cannot change the option 'driver'")
+        self.reopen(opts, {'file.filename': hd_path[1]}, "Cannot change the option 'filename'")
+        self.reopen(opts, {'file.aio': 'native'}, "Cannot change the option 'aio'")
+        self.reopen(opts, {'file.locking': 'off'}, "Cannot change the option 'locking'")
+        self.reopen(opts, {'file.filename': None}, "Invalid parameter type for 'file.filename', expected: string")
+
+        # node-name is optional in BlockdevOptions, but x-blockdev-reopen needs it
+        del opts['node-name']
+        self.reopen(opts, {}, "Node name not specified")
+
+        # Check that nothing has changed
+        self.check_node_graph(original_graph)
+
+        # Remove the node
+        result = self.vm.qmp('blockdev-del', conv_keys = True, node_name = 'hd0')
+        self.assert_qmp(result, 'return', {})
+
+    # This test opens an image with a backing file and tries to reopen
+    # it with illegal / incorrect parameters.
+    def test_incorrect_parameters_backing_file(self):
+        # Open hd1 omitting the backing options (hd0 will be opened
+        # with the default options)
+        opts = hd_opts(1)
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts)
+        self.assert_qmp(result, 'return', {})
+        original_graph = self.vm.qmp('query-named-block-nodes')
+
+        # We can't reopen the image passing the same options, 'backing' is mandatory
+        self.reopen(opts, {}, "backing is missing for 'hd1'")
+
+        # Everything works if we pass 'backing' using the existing node name
+        for node in original_graph['return']:
+            if node['drv'] == iotests.imgfmt and node['file'] == hd_path[0]:
+                backing_node_name = node['node-name']
+        self.reopen(opts, {'backing': backing_node_name})
+
+        # We can't use a non-existing or empty (non-NULL) node as the backing image
+        self.reopen(opts, {'backing': 'not-found'}, "Cannot find device= nor node_name=not-found")
+        self.reopen(opts, {'backing': ''}, "Cannot find device= nor node_name=")
+
+        # We can reopen the image just fine if we specify the backing options
+        opts['backing'] = {'driver': iotests.imgfmt,
+                           'file': {'driver': 'file',
+                                    'filename': hd_path[0]}}
+        self.reopen(opts)
+
+        # We cannot change any of these options
+        self.reopen(opts, {'backing.node-name': 'newname'}, "Cannot change the option 'node-name'")
+        self.reopen(opts, {'backing.driver': 'raw'}, "Cannot change the option 'driver'")
+        self.reopen(opts, {'backing.file.node-name': 'newname'}, "Cannot change the option 'node-name'")
+        self.reopen(opts, {'backing.file.driver': 'host_device'}, "Cannot change the option 'driver'")
+
+        # Check that nothing has changed since the beginning
+        self.check_node_graph(original_graph)
+
+        # Remove the node
+        result = self.vm.qmp('blockdev-del', conv_keys = True, node_name = 'hd1')
+        self.assert_qmp(result, 'return', {})
+
+    # Reopen an image several times changing some of its options
+    def test_reopen(self):
+        # Open the hd1 image passing all backing options
+        opts = hd_opts(1)
+        opts['backing'] = hd_opts(0)
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts)
+        self.assert_qmp(result, 'return', {})
+        original_graph = self.vm.qmp('query-named-block-nodes')
+
+        # We can reopen the image passing the same options
+        self.reopen(opts)
+
+        # Reopen in read-only mode
+        self.assert_qmp(self.get_node('hd1'), 'ro', False)
+
+        self.reopen(opts, {'read-only': True})
+        self.assert_qmp(self.get_node('hd1'), 'ro', True)
+        self.reopen(opts)
+        self.assert_qmp(self.get_node('hd1'), 'ro', False)
+
+        # Change the cache options
+        self.assert_qmp(self.get_node('hd1'), 'cache/writeback', True)
+        self.assert_qmp(self.get_node('hd1'), 'cache/direct', False)
+        self.assert_qmp(self.get_node('hd1'), 'cache/no-flush', False)
+        self.reopen(opts, {'cache': { 'direct': True, 'no-flush': True }})
+        self.assert_qmp(self.get_node('hd1'), 'cache/writeback', True)
+        self.assert_qmp(self.get_node('hd1'), 'cache/direct', True)
+        self.assert_qmp(self.get_node('hd1'), 'cache/no-flush', True)
+
+        # Reopen again with the original options
+        self.reopen(opts)
+        self.assert_qmp(self.get_node('hd1'), 'cache/writeback', True)
+        self.assert_qmp(self.get_node('hd1'), 'cache/direct', False)
+        self.assert_qmp(self.get_node('hd1'), 'cache/no-flush', False)
+
+        # Change 'detect-zeroes' and 'discard'
+        self.assert_qmp(self.get_node('hd1'), 'detect_zeroes', 'off')
+        self.reopen(opts, {'detect-zeroes': 'on'})
+        self.assert_qmp(self.get_node('hd1'), 'detect_zeroes', 'on')
+        self.reopen(opts, {'detect-zeroes': 'unmap'},
+                    "setting detect-zeroes to unmap is not allowed " +
+                    "without setting discard operation to unmap")
+        self.assert_qmp(self.get_node('hd1'), 'detect_zeroes', 'on')
+        self.reopen(opts, {'detect-zeroes': 'unmap', 'discard': 'unmap'})
+        self.assert_qmp(self.get_node('hd1'), 'detect_zeroes', 'unmap')
+        self.reopen(opts)
+        self.assert_qmp(self.get_node('hd1'), 'detect_zeroes', 'off')
+
+        # Changing 'force-share' is currently not supported
+        self.reopen(opts, {'force-share': True}, "Cannot change the option 'force-share'")
+
+        # Change some qcow2-specific options
+        # No way to test for success other than checking the return message
+        if iotests.imgfmt == 'qcow2':
+            self.reopen(opts, {'l2-cache-entry-size': 128 * 1024},
+                        "L2 cache entry size must be a power of two "+
+                        "between 512 and the cluster size (65536)")
+            self.reopen(opts, {'l2-cache-size': 1024 * 1024,
+                               'cache-size':     512 * 1024},
+                        "l2-cache-size may not exceed cache-size")
+            self.reopen(opts, {'l2-cache-size':        4 * 1024 * 1024,
+                               'refcount-cache-size':  4 * 1024 * 1024,
+                               'l2-cache-entry-size': 32 * 1024})
+            self.reopen(opts, {'pass-discard-request': True})
+
+        # Check that nothing has changed since the beginning
+        # (from the parameters that we can check)
+        self.check_node_graph(original_graph)
+
+        # Check that the node names (other than the top-level one) are optional
+        del opts['file']['node-name']
+        del opts['backing']['node-name']
+        del opts['backing']['file']['node-name']
+        self.reopen(opts)
+        self.check_node_graph(original_graph)
+
+        # Reopen setting backing = null, this removes the backing image from the chain
+        self.reopen(opts, {'backing': None})
+        self.assert_qmp_absent(self.get_node('hd1'), 'image/backing-image')
+
+        # Open the 'hd0' image
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **hd_opts(0))
+        self.assert_qmp(result, 'return', {})
+
+        # Reopen the hd1 image setting 'hd0' as its backing image
+        self.reopen(opts, {'backing': 'hd0'})
+        self.assert_qmp(self.get_node('hd1'), 'image/backing-image/filename', hd_path[0])
+
+        # Check that nothing has changed since the beginning
+        self.reopen(hd_opts(0), {'read-only': True})
+        self.check_node_graph(original_graph)
+
+        # The backing file (hd0) is now a reference, we cannot change backing.* anymore
+        self.reopen(opts, {}, "Cannot change the option 'backing.driver'")
+
+        # We can't remove 'hd0' while it's a backing image of 'hd1'
+        result = self.vm.qmp('blockdev-del', conv_keys = True, node_name = 'hd0')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', "Node 'hd0' is busy: node is used as backing hd of 'hd1'")
+
+        # But we can remove both nodes if done in the proper order
+        result = self.vm.qmp('blockdev-del', conv_keys = True, node_name = 'hd1')
+        self.assert_qmp(result, 'return', {})
+        result = self.vm.qmp('blockdev-del', conv_keys = True, node_name = 'hd0')
+        self.assert_qmp(result, 'return', {})
+
+    # Reopen a raw image and see the effect of changing the 'offset' option
+    def test_reopen_raw(self):
+        opts = {'driver': 'raw', 'node-name': 'hd0',
+                'file': { 'driver': 'file',
+                          'filename': hd_path[0],
+                          'node-name': 'hd0-file' } }
+
+        # First we create a 2MB raw file, and fill each half with a
+        # different value
+        qemu_img('create', '-f', 'raw', hd_path[0], '2M')
+        qemu_io('-f', 'raw', '-c', 'write -P 0xa0  0 1M', hd_path[0])
+        qemu_io('-f', 'raw', '-c', 'write -P 0xa1 1M 1M', hd_path[0])
+
+        # Open the raw file with QEMU
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts)
+        self.assert_qmp(result, 'return', {})
+
+        # Read 1MB from offset 0
+        self.run_qemu_io("hd0", "read -P 0xa0  0 1M")
+
+        # Reopen the image with a 1MB offset.
+        # Now the results are different
+        self.reopen(opts, {'offset': 1024*1024})
+        self.run_qemu_io("hd0", "read -P 0xa1  0 1M")
+
+        # Reopen again with the original options.
+        # We get the original results again
+        self.reopen(opts)
+        self.run_qemu_io("hd0", "read -P 0xa0  0 1M")
+
+        # Remove the block device
+        result = self.vm.qmp('blockdev-del', conv_keys = True, node_name = 'hd0')
+        self.assert_qmp(result, 'return', {})
+
+    # Omitting an option should reset it to the default value, but if
+    # an option cannot be changed it shouldn't be possible to reset it
+    # to its default value either
+    def test_reset_default_values(self):
+        opts = {'driver': 'qcow2', 'node-name': 'hd0',
+                'file': { 'driver': 'file',
+                          'filename': hd_path[0],
+                          'x-check-cache-dropped': True, # This one can be changed
+                          'locking': 'off',              # This one can NOT be changed
+                          'node-name': 'hd0-file' } }
+
+        # Open the file with QEMU
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts)
+        self.assert_qmp(result, 'return', {})
+
+        # file.x-check-cache-dropped can be changed...
+        self.reopen(opts, { 'file.x-check-cache-dropped': False })
+        # ...and dropped completely (resetting to the default value)
+        del opts['file']['x-check-cache-dropped']
+        self.reopen(opts)
+
+        # file.locking cannot be changed nor reset to the default value
+        self.reopen(opts, { 'file.locking': 'on' }, "Cannot change the option 'locking'")
+        del opts['file']['locking']
+        self.reopen(opts, {}, "Option 'locking' cannot be reset to its default value")
+        # But we can reopen it if we maintain its previous value
+        self.reopen(opts, { 'file.locking': 'off' })
+
+        # Remove the block device
+        result = self.vm.qmp('blockdev-del', conv_keys = True, node_name = 'hd0')
+        self.assert_qmp(result, 'return', {})
+
+    # This test modifies the node graph a few times by changing the
+    # 'backing' option on reopen and verifies that the guest data that
+    # is read afterwards is consistent with the graph changes.
+    def test_io_with_graph_changes(self):
+        opts = []
+
+        # Open hd0, hd1 and hd2 without any backing image
+        for i in range(3):
+            opts.append(hd_opts(i))
+            opts[i]['backing'] = None
+            result = self.vm.qmp('blockdev-add', conv_keys = False, **opts[i])
+            self.assert_qmp(result, 'return', {})
+
+        # hd0
+        self.run_qemu_io("hd0", "read -P 0xa0  0 1M")
+        self.run_qemu_io("hd0", "read -P 0    1M 1M")
+        self.run_qemu_io("hd0", "read -P 0    2M 1M")
+
+        # hd1 <- hd0
+        self.reopen(opts[0], {'backing': 'hd1'})
+
+        self.run_qemu_io("hd0", "read -P 0xa0  0 1M")
+        self.run_qemu_io("hd0", "read -P 0xa1 1M 1M")
+        self.run_qemu_io("hd0", "read -P 0    2M 1M")
+
+        # hd1 <- hd0 , hd1 <- hd2
+        self.reopen(opts[2], {'backing': 'hd1'})
+
+        self.run_qemu_io("hd2", "read -P 0     0 1M")
+        self.run_qemu_io("hd2", "read -P 0xa1 1M 1M")
+        self.run_qemu_io("hd2", "read -P 0xa2 2M 1M")
+
+        # hd1 <- hd2 <- hd0
+        self.reopen(opts[0], {'backing': 'hd2'})
+
+        self.run_qemu_io("hd0", "read -P 0xa0  0 1M")
+        self.run_qemu_io("hd0", "read -P 0xa1 1M 1M")
+        self.run_qemu_io("hd0", "read -P 0xa2 2M 1M")
+
+        # hd2 <- hd0
+        self.reopen(opts[2], {'backing': None})
+
+        self.run_qemu_io("hd0", "read -P 0xa0  0 1M")
+        self.run_qemu_io("hd0", "read -P 0    1M 1M")
+        self.run_qemu_io("hd0", "read -P 0xa2 2M 1M")
+
+        # hd2 <- hd1 <- hd0
+        self.reopen(opts[1], {'backing': 'hd2'})
+        self.reopen(opts[0], {'backing': 'hd1'})
+
+        self.run_qemu_io("hd0", "read -P 0xa0  0 1M")
+        self.run_qemu_io("hd0", "read -P 0xa1 1M 1M")
+        self.run_qemu_io("hd0", "read -P 0xa2 2M 1M")
+
+        # Illegal operation: hd2 is a child of hd1
+        self.reopen(opts[2], {'backing': 'hd1'},
+                    "Making 'hd1' a backing file of 'hd2' would create a cycle")
+
+        # hd2 <- hd0, hd2 <- hd1
+        self.reopen(opts[0], {'backing': 'hd2'})
+
+        self.run_qemu_io("hd1", "read -P 0     0 1M")
+        self.run_qemu_io("hd1", "read -P 0xa1 1M 1M")
+        self.run_qemu_io("hd1", "read -P 0xa2 2M 1M")
+
+        # More illegal operations
+        self.reopen(opts[2], {'backing': 'hd1'},
+                    "Making 'hd1' a backing file of 'hd2' would create a cycle")
+        self.reopen(opts[2], {'file': 'hd0-file'}, "Cannot change the option 'file'")
+
+        result = self.vm.qmp('blockdev-del', conv_keys = True, node_name = 'hd2')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', "Node 'hd2' is busy: node is used as backing hd of 'hd0'")
+
+        # hd1 doesn't have a backing file now
+        self.reopen(opts[1], {'backing': None})
+
+        self.run_qemu_io("hd1", "read -P 0     0 1M")
+        self.run_qemu_io("hd1", "read -P 0xa1 1M 1M")
+        self.run_qemu_io("hd1", "read -P 0    2M 1M")
+
+        # We can't remove the 'backing' option if the image has a
+        # default backing file
+        del opts[1]['backing']
+        self.reopen(opts[1], {}, "backing is missing for 'hd1'")
+
+        self.run_qemu_io("hd1", "read -P 0     0 1M")
+        self.run_qemu_io("hd1", "read -P 0xa1 1M 1M")
+        self.run_qemu_io("hd1", "read -P 0    2M 1M")
+
+    # This test verifies that we can't change the children of a block
+    # device during a reopen operation in a way that would create
+    # cycles in the node graph
+    def test_graph_cycles(self):
+        opts = []
+
+        # Open all three images without backing file
+        for i in range(3):
+            opts.append(hd_opts(i))
+            opts[i]['backing'] = None
+            result = self.vm.qmp('blockdev-add', conv_keys = False, **opts[i])
+            self.assert_qmp(result, 'return', {})
+
+        # hd1 <- hd0, hd1 <- hd2
+        self.reopen(opts[0], {'backing': 'hd1'})
+        self.reopen(opts[2], {'backing': 'hd1'})
+
+        # Illegal: hd2 is backed by hd1
+        self.reopen(opts[1], {'backing': 'hd2'},
+                    "Making 'hd2' a backing file of 'hd1' would create a cycle")
+
+        # hd1 <- hd0 <- hd2
+        self.reopen(opts[2], {'backing': 'hd0'})
+
+        # Illegal: hd2 is backed by hd0, which is backed by hd1
+        self.reopen(opts[1], {'backing': 'hd2'},
+                    "Making 'hd2' a backing file of 'hd1' would create a cycle")
+
+        # Illegal: hd1 cannot point to itself
+        self.reopen(opts[1], {'backing': 'hd1'},
+                    "Making 'hd1' a backing file of 'hd1' would create a cycle")
+
+        # Remove all backing files
+        self.reopen(opts[0])
+        self.reopen(opts[2])
+
+        ##########################################
+        # Add a blkverify node using hd0 and hd1 #
+        ##########################################
+        bvopts = {'driver': 'blkverify',
+                  'node-name': 'bv',
+                  'test': 'hd0',
+                  'raw': 'hd1'}
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **bvopts)
+        self.assert_qmp(result, 'return', {})
+
+        # blkverify doesn't currently allow reopening. TODO: implement this
+        self.reopen(bvopts, {}, "Block format 'blkverify' used by node 'bv'" +
+                    " does not support reopening files")
+
+        # Illegal: hd0 is a child of the blkverify node
+        self.reopen(opts[0], {'backing': 'bv'},
+                    "Making 'bv' a backing file of 'hd0' would create a cycle")
+
+        # Delete the blkverify node
+        result = self.vm.qmp('blockdev-del', conv_keys = True, node_name = 'bv')
+        self.assert_qmp(result, 'return', {})
+
+    # Misc reopen tests with different block drivers
+    def test_misc_drivers(self):
+        ####################
+        ###### quorum ######
+        ####################
+        for i in range(3):
+            opts = hd_opts(i)
+            # Open all three images without backing file
+            opts['backing'] = None
+            result = self.vm.qmp('blockdev-add', conv_keys = False, **opts)
+            self.assert_qmp(result, 'return', {})
+
+        opts = {'driver': 'quorum',
+                'node-name': 'quorum0',
+                'children': ['hd0', 'hd1', 'hd2'],
+                'vote-threshold': 2}
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts)
+        self.assert_qmp(result, 'return', {})
+
+        # Quorum doesn't currently allow reopening. TODO: implement this
+        self.reopen(opts, {}, "Block format 'quorum' used by node 'quorum0'" +
+                    " does not support reopening files")
+
+        # You can't make quorum0 a backing file of hd0:
+        # hd0 is already a child of quorum0.
+        self.reopen(hd_opts(0), {'backing': 'quorum0'},
+                    "Making 'quorum0' a backing file of 'hd0' would create a cycle")
+
+        # Delete quorum0
+        result = self.vm.qmp('blockdev-del', conv_keys = True, node_name = 'quorum0')
+        self.assert_qmp(result, 'return', {})
+
+        # Delete hd0, hd1 and hd2
+        for i in range(3):
+            result = self.vm.qmp('blockdev-del', conv_keys = True,
+                                 node_name = 'hd%d' % i)
+            self.assert_qmp(result, 'return', {})
+
+        ######################
+        ###### blkdebug ######
+        ######################
+        opts = {'driver': 'blkdebug',
+                'node-name': 'bd',
+                'config': '/dev/null',
+                'image': hd_opts(0)}
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts)
+        self.assert_qmp(result, 'return', {})
+
+        # blkdebug allows reopening if we keep the same options
+        self.reopen(opts)
+
+        # but it currently does not allow changes
+        self.reopen(opts, {'image': 'hd1'}, "Cannot change the option 'image'")
+        self.reopen(opts, {'align': 33554432}, "Cannot change the option 'align'")
+        self.reopen(opts, {'config': '/non/existent'}, "Cannot change the option 'config'")
+        del opts['config']
+        self.reopen(opts, {}, "Option 'config' cannot be reset to its default value")
+
+        # Delete the blkdebug node
+        result = self.vm.qmp('blockdev-del', conv_keys = True, node_name = 'bd')
+        self.assert_qmp(result, 'return', {})
+
+        ##################
+        ###### null ######
+        ##################
+        opts = {'driver': 'null-aio', 'node-name': 'root', 'size': 1024}
+
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts)
+        self.assert_qmp(result, 'return', {})
+
+        # 1 << 30 is the default value, but we cannot change it explicitly
+        self.reopen(opts, {'size': (1 << 30)}, "Cannot change the option 'size'")
+
+        # We cannot change 'size' back to its default value either
+        del opts['size']
+        self.reopen(opts, {}, "Option 'size' cannot be reset to its default value")
+
+        result = self.vm.qmp('blockdev-del', conv_keys = True, node_name = 'root')
+        self.assert_qmp(result, 'return', {})
+
+        ##################
+        ###### file ######
+        ##################
+        opts = hd_opts(0)
+        opts['file']['locking'] = 'on'
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts)
+        self.assert_qmp(result, 'return', {})
+
+        # 'locking' cannot be changed
+        del opts['file']['locking']
+        self.reopen(opts, {'file.locking': 'on'})
+        self.reopen(opts, {'file.locking': 'off'}, "Cannot change the option 'locking'")
+        self.reopen(opts, {}, "Option 'locking' cannot be reset to its default value")
+
+        # Trying to reopen the 'file' node directly does not make a difference
+        opts = opts['file']
+        self.reopen(opts, {'locking': 'on'})
+        self.reopen(opts, {'locking': 'off'}, "Cannot change the option 'locking'")
+        self.reopen(opts, {}, "Option 'locking' cannot be reset to its default value")
+
+        result = self.vm.qmp('blockdev-del', conv_keys = True, node_name = 'hd0')
+        self.assert_qmp(result, 'return', {})
+
+        ######################
+        ###### throttle ######
+        ######################
+        opts = { 'qom-type': 'throttle-group', 'id': 'group0',
+                 'props': { 'limits': { 'iops-total': 1000 } } }
+        result = self.vm.qmp('object-add', conv_keys = False, **opts)
+        self.assert_qmp(result, 'return', {})
+
+        opts = { 'qom-type': 'throttle-group', 'id': 'group1',
+                 'props': { 'limits': { 'iops-total': 2000 } } }
+        result = self.vm.qmp('object-add', conv_keys = False, **opts)
+        self.assert_qmp(result, 'return', {})
+
+        # Add a throttle filter with group = group0
+        opts = { 'driver': 'throttle', 'node-name': 'throttle0',
+                 'throttle-group': 'group0', 'file': hd_opts(0) }
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts)
+        self.assert_qmp(result, 'return', {})
+
+        # We can reopen it if we keep the same options
+        self.reopen(opts)
+
+        # We can also reopen if 'file' is a reference to the child
+        self.reopen(opts, {'file': 'hd0'})
+
+        # This is illegal
+        self.reopen(opts, {'throttle-group': 'notfound'}, "Throttle group 'notfound' does not exist")
+
+        # But it's possible to change the group to group1
+        self.reopen(opts, {'throttle-group': 'group1'})
+
+        # Now group1 is in use, it cannot be deleted
+        result = self.vm.qmp('object-del', id = 'group1')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', "object 'group1' is in use, can not be deleted")
+
+        # Default options, this switches the group back to group0
+        self.reopen(opts)
+
+        # So now we cannot delete group0
+        result = self.vm.qmp('object-del', id = 'group0')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', "object 'group0' is in use, can not be deleted")
+
+        # But group1 is free this time, and it can be deleted
+        result = self.vm.qmp('object-del', id = 'group1')
+        self.assert_qmp(result, 'return', {})
+
+        # Let's delete the filter node
+        result = self.vm.qmp('blockdev-del', conv_keys = True, node_name = 'throttle0')
+        self.assert_qmp(result, 'return', {})
+
+        # And we can finally get rid of group0
+        result = self.vm.qmp('object-del', id = 'group0')
+        self.assert_qmp(result, 'return', {})
+
+    # If an image has a backing file then the 'backing' option must be
+    # passed on reopen. We don't allow leaving the option out in this
+    # case because it's unclear what the correct semantics would be.
+    def test_missing_backing_options_1(self):
+        # hd2
+        opts = hd_opts(2)
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts)
+        self.assert_qmp(result, 'return', {})
+
+        # hd0
+        opts = hd_opts(0)
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts)
+        self.assert_qmp(result, 'return', {})
+
+        # hd0 has no backing file: we can omit the 'backing' option
+        self.reopen(opts)
+
+        # hd2 <- hd0
+        self.reopen(opts, {'backing': 'hd2'})
+
+        # hd0 has a backing file: we must set the 'backing' option
+        self.reopen(opts, {}, "backing is missing for 'hd0'")
+
+        # hd2 can't be removed because it's the backing file of hd0
+        result = self.vm.qmp('blockdev-del', conv_keys = True, node_name = 'hd2')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', "Node 'hd2' is busy: node is used as backing hd of 'hd0'")
+
+        # Detach hd2 from hd0.
+        self.reopen(opts, {'backing': None})
+        self.reopen(opts, {}, "backing is missing for 'hd0'")
+
+        # Remove both hd0 and hd2
+        result = self.vm.qmp('blockdev-del', conv_keys = True, node_name = 'hd0')
+        self.assert_qmp(result, 'return', {})
+
+        result = self.vm.qmp('blockdev-del', conv_keys = True, node_name = 'hd2')
+        self.assert_qmp(result, 'return', {})
+
+    # If an image has default backing file (as part of its metadata)
+    # then the 'backing' option must be passed on reopen. We don't
+    # allow leaving the option out in this case because it's unclear
+    # what the correct semantics would be.
+    def test_missing_backing_options_2(self):
+        # hd0 <- hd1
+        # (hd0 is hd1's default backing file)
+        opts = hd_opts(1)
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts)
+        self.assert_qmp(result, 'return', {})
+
+        # hd1 has a backing file: we can't omit the 'backing' option
+        self.reopen(opts, {}, "backing is missing for 'hd1'")
+
+        # Let's detach the backing file
+        self.reopen(opts, {'backing': None})
+
+        # No backing file attached to hd1 now, but we still can't omit the 'backing' option
+        self.reopen(opts, {}, "backing is missing for 'hd1'")
+
+        result = self.vm.qmp('blockdev-del', conv_keys = True, node_name = 'hd1')
+        self.assert_qmp(result, 'return', {})
+
+    # Test that making 'backing' a reference to an existing child
+    # keeps its current options
+    def test_backing_reference(self):
+        # hd2 <- hd1 <- hd0
+        opts = hd_opts(0)
+        opts['backing'] = hd_opts(1)
+        opts['backing']['backing'] = hd_opts(2)
+        # Enable 'detect-zeroes' on all three nodes
+        opts['detect-zeroes'] = 'on'
+        opts['backing']['detect-zeroes'] = 'on'
+        opts['backing']['backing']['detect-zeroes'] = 'on'
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts)
+        self.assert_qmp(result, 'return', {})
+
+        # Reopen the chain passing the minimum amount of required options.
+        # By making 'backing' a reference to hd1 (instead of a sub-dict)
+        # we tell QEMU to keep its current set of options.
+        opts = {'driver': iotests.imgfmt,
+                'node-name': 'hd0',
+                'file': 'hd0-file',
+                'backing': 'hd1' }
+        self.reopen(opts)
+
+        # This has reset 'detect-zeroes' on hd0, but not on hd1 and hd2.
+        self.assert_qmp(self.get_node('hd0'), 'detect_zeroes', 'off')
+        self.assert_qmp(self.get_node('hd1'), 'detect_zeroes', 'on')
+        self.assert_qmp(self.get_node('hd2'), 'detect_zeroes', 'on')
+
+    # Test what happens if the graph changes due to other operations
+    # such as block-stream
+    def test_block_stream_1(self):
+        # hd1 <- hd0
+        opts = hd_opts(0)
+        opts['backing'] = hd_opts(1)
+        opts['backing']['backing'] = None
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts)
+        self.assert_qmp(result, 'return', {})
+
+        # Stream hd1 into hd0 and wait until it's done
+        result = self.vm.qmp('block-stream', conv_keys = True, job_id = 'stream0', device = 'hd0')
+        self.assert_qmp(result, 'return', {})
+        self.wait_until_completed(drive = 'stream0')
+
+        # Now we have only hd0
+        self.assertEqual(self.get_node('hd1'), None)
+
+        # We have backing.* options but there's no backing file anymore
+        self.reopen(opts, {}, "Cannot change the option 'backing.driver'")
+
+        # If we remove the 'backing' option then we can reopen hd0 just fine
+        del opts['backing']
+        self.reopen(opts)
+
+        # We can also reopen hd0 if we set 'backing' to null
+        self.reopen(opts, {'backing': None})
+
+        result = self.vm.qmp('blockdev-del', conv_keys = True, node_name = 'hd0')
+        self.assert_qmp(result, 'return', {})
+
+    # Another block_stream test
+    def test_block_stream_2(self):
+        # hd2 <- hd1 <- hd0
+        opts = hd_opts(0)
+        opts['backing'] = hd_opts(1)
+        opts['backing']['backing'] = hd_opts(2)
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts)
+        self.assert_qmp(result, 'return', {})
+
+        # Stream hd1 into hd0 and wait until it's done
+        result = self.vm.qmp('block-stream', conv_keys = True, job_id = 'stream0',
+                             device = 'hd0', base_node = 'hd2')
+        self.assert_qmp(result, 'return', {})
+        self.wait_until_completed(drive = 'stream0')
+
+        # The chain is hd2 <- hd0 now. hd1 is missing
+        self.assertEqual(self.get_node('hd1'), None)
+
+        # The backing options in the dict were meant for hd1, but we cannot
+        # use them with hd2 because hd1 had a backing file while hd2 does not.
+        self.reopen(opts, {}, "Cannot change the option 'backing.driver'")
+
+        # If we remove hd1's options from the dict then things work fine
+        opts['backing'] = opts['backing']['backing']
+        self.reopen(opts)
+
+        # We can also reopen hd0 if we use a reference to the backing file
+        self.reopen(opts, {'backing': 'hd2'})
+
+        # But we cannot leave the option out
+        del opts['backing']
+        self.reopen(opts, {}, "backing is missing for 'hd0'")
+
+        # Now we can delete hd0 (and hd2)
+        result = self.vm.qmp('blockdev-del', conv_keys = True, node_name = 'hd0')
+        self.assert_qmp(result, 'return', {})
+        self.assertEqual(self.get_node('hd2'), None)
+
+    # Reopen the chain during a block-stream job (from hd1 to hd0)
+    def test_block_stream_3(self):
+        # hd2 <- hd1 <- hd0
+        opts = hd_opts(0)
+        opts['backing'] = hd_opts(1)
+        opts['backing']['backing'] = hd_opts(2)
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts)
+        self.assert_qmp(result, 'return', {})
+
+        # hd2 <- hd0
+        result = self.vm.qmp('block-stream', conv_keys = True, job_id = 'stream0',
+                             device = 'hd0', base_node = 'hd2', speed = 512 * 1024)
+        self.assert_qmp(result, 'return', {})
+
+        # We can't remove hd2 while the stream job is ongoing
+        opts['backing']['backing'] = None
+        self.reopen(opts, {}, "Cannot change 'backing' link from 'hd1' to 'hd2'")
+
+        # We can't remove hd1 while the stream job is ongoing
+        opts['backing'] = None
+        self.reopen(opts, {}, "Cannot change 'backing' link from 'hd0' to 'hd1'")
+
+        self.wait_until_completed(drive = 'stream0')
+
+    # Reopen the chain during a block-stream job (from hd2 to hd1)
+    def test_block_stream_4(self):
+        # hd2 <- hd1 <- hd0
+        opts = hd_opts(0)
+        opts['backing'] = hd_opts(1)
+        opts['backing']['backing'] = hd_opts(2)
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts)
+        self.assert_qmp(result, 'return', {})
+
+        # hd1 <- hd0
+        result = self.vm.qmp('block-stream', conv_keys = True, job_id = 'stream0',
+                             device = 'hd1', speed = 512 * 1024)
+        self.assert_qmp(result, 'return', {})
+
+        # We can't reopen with the original options because that would
+        # make hd1 read-only and block-stream requires it to be read-write
+        self.reopen(opts, {}, "Can't set node 'hd1' to r/o with copy-on-read enabled")
+
+        # We can't remove hd2 while the stream job is ongoing
+        opts['backing']['backing'] = None
+        self.reopen(opts, {'backing.read-only': False}, "Cannot change 'backing' link from 'hd1' to 'hd2'")
+
+        # We can detach hd1 from hd0 because it doesn't affect the stream job
+        opts['backing'] = None
+        self.reopen(opts)
+
+        self.wait_until_completed(drive = 'stream0')
+
+    # Reopen the chain during a block-commit job (from hd0 to hd2)
+    def test_block_commit_1(self):
+        # hd2 <- hd1 <- hd0
+        opts = hd_opts(0)
+        opts['backing'] = hd_opts(1)
+        opts['backing']['backing'] = hd_opts(2)
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts)
+        self.assert_qmp(result, 'return', {})
+
+        result = self.vm.qmp('block-commit', conv_keys = True, job_id = 'commit0',
+                             device = 'hd0', speed = 1024 * 1024)
+        self.assert_qmp(result, 'return', {})
+
+        # We can't remove hd2 while the commit job is ongoing
+        opts['backing']['backing'] = None
+        self.reopen(opts, {}, "Cannot change 'backing' link from 'hd1' to 'hd2'")
+
+        # We can't remove hd1 while the commit job is ongoing
+        opts['backing'] = None
+        self.reopen(opts, {}, "Cannot change 'backing' link from 'hd0' to 'hd1'")
+
+        event = self.vm.event_wait(name='BLOCK_JOB_READY')
+        self.assert_qmp(event, 'data/device', 'commit0')
+        self.assert_qmp(event, 'data/type', 'commit')
+        self.assert_qmp_absent(event, 'data/error')
+
+        result = self.vm.qmp('block-job-complete', device='commit0')
+        self.assert_qmp(result, 'return', {})
+
+        self.wait_until_completed(drive = 'commit0')
+
+    # Reopen the chain during a block-commit job (from hd1 to hd2)
+    def test_block_commit_2(self):
+        # hd2 <- hd1 <- hd0
+        opts = hd_opts(0)
+        opts['backing'] = hd_opts(1)
+        opts['backing']['backing'] = hd_opts(2)
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts)
+        self.assert_qmp(result, 'return', {})
+
+        result = self.vm.qmp('block-commit', conv_keys = True, job_id = 'commit0',
+                             device = 'hd0', top_node = 'hd1', speed = 1024 * 1024)
+        self.assert_qmp(result, 'return', {})
+
+        # We can't remove hd2 while the commit job is ongoing
+        opts['backing']['backing'] = None
+        self.reopen(opts, {}, "Cannot change the option 'backing.driver'")
+
+        # We can't remove hd1 while the commit job is ongoing
+        opts['backing'] = None
+        self.reopen(opts, {}, "Cannot change backing link if 'hd0' has an implicit backing file")
+
+        # hd2 <- hd0
+        self.wait_until_completed(drive = 'commit0')
+
+        self.assert_qmp(self.get_node('hd0'), 'ro', False)
+        self.assertEqual(self.get_node('hd1'), None)
+        self.assert_qmp(self.get_node('hd2'), 'ro', True)
+
+    # We don't allow setting a backing file that uses a different AioContext
+    def test_iothreads(self):
+        opts = hd_opts(0)
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts)
+        self.assert_qmp(result, 'return', {})
+
+        opts2 = hd_opts(2)
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts2)
+        self.assert_qmp(result, 'return', {})
+
+        result = self.vm.qmp('object-add', qom_type='iothread', id='iothread0')
+        self.assert_qmp(result, 'return', {})
+
+        result = self.vm.qmp('object-add', qom_type='iothread', id='iothread1')
+        self.assert_qmp(result, 'return', {})
+
+        result = self.vm.qmp('x-blockdev-set-iothread', node_name='hd0', iothread='iothread0')
+        self.assert_qmp(result, 'return', {})
+
+        self.reopen(opts, {'backing': 'hd2'}, "Cannot use a new backing file with a different AioContext")
+
+        result = self.vm.qmp('x-blockdev-set-iothread', node_name='hd2', iothread='iothread1')
+        self.assert_qmp(result, 'return', {})
+
+        self.reopen(opts, {'backing': 'hd2'}, "Cannot use a new backing file with a different AioContext")
+
+        result = self.vm.qmp('x-blockdev-set-iothread', node_name='hd2', iothread='iothread0')
+        self.assert_qmp(result, 'return', {})
+
+        self.reopen(opts, {'backing': 'hd2'})
+
+if __name__ == '__main__':
+    iotests.main(supported_fmts=["qcow2"])
diff --git a/tests/qemu-iotests/245.out b/tests/qemu-iotests/245.out
new file mode 100644
index 0000000000..71009c239f
--- /dev/null
+++ b/tests/qemu-iotests/245.out
@@ -0,0 +1,5 @@
+..................
+----------------------------------------------------------------------
+Ran 18 tests
+
+OK
diff --git a/tests/qemu-iotests/246 b/tests/qemu-iotests/246
new file mode 100755
index 0000000000..b0997a392f
--- /dev/null
+++ b/tests/qemu-iotests/246
@@ -0,0 +1,114 @@
+#!/usr/bin/env python
+#
+# Test persistent bitmap resizing.
+#
+# Copyright (c) 2019 John Snow for Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# owner=jsnow@redhat.com
+
+import iotests
+from iotests import log
+
+iotests.verify_image_format(supported_fmts=['qcow2'])
+size = 64 * 1024 * 1024 * 1024
+gran_small = 32 * 1024
+gran_large = 128 * 1024
+
+def query_bitmaps(vm):
+    res = vm.qmp("query-block")
+    return { "bitmaps": { device['device']: device.get('dirty-bitmaps', []) for
+                          device in res['return'] } }
+
+with iotests.FilePath('img') as img_path, \
+     iotests.VM() as vm:
+
+    log('--- Preparing image & VM ---\n')
+    iotests.qemu_img_create('-f', iotests.imgfmt, img_path, str(size))
+    vm.add_drive(img_path)
+
+
+    log('--- 1st Boot (Establish Baseline Image) ---\n')
+    vm.launch()
+
+    log('\n--- Adding bitmaps Small, Medium, Large, and Transient ---\n')
+    vm.qmp_log("block-dirty-bitmap-add", node="drive0",
+               name="Small", granularity=gran_small, persistent=True)
+    vm.qmp_log("block-dirty-bitmap-add", node="drive0",
+               name="Medium", persistent=True)
+    vm.qmp_log("block-dirty-bitmap-add", node="drive0",
+               name="Large", granularity=gran_large, persistent=True)
+    vm.qmp_log("block-dirty-bitmap-add", node="drive0",
+               name="Transient", persistent=False)
+
+    log('--- Forcing flush of bitmaps to disk ---\n')
+    log(query_bitmaps(vm), indent=2)
+    vm.shutdown()
+
+
+    log('--- 2nd Boot (Grow Image) ---\n')
+    vm.launch()
+    log(query_bitmaps(vm), indent=2)
+
+    log('--- Adding new bitmap, growing image, and adding 2nd new bitmap ---')
+    vm.qmp_log("block-dirty-bitmap-add", node="drive0",
+               name="New", persistent=True)
+    vm.qmp_log("human-monitor-command",
+               command_line="block_resize drive0 70G")
+    vm.qmp_log("block-dirty-bitmap-add", node="drive0",
+               name="Newtwo", persistent=True)
+    log(query_bitmaps(vm), indent=2)
+
+    log('--- Forcing flush of bitmaps to disk ---\n')
+    vm.shutdown()
+
+
+    log('--- 3rd Boot (Shrink Image) ---\n')
+    vm.launch()
+    log(query_bitmaps(vm), indent=2)
+
+    log('--- Adding "NewB" bitmap, removing "New" bitmap ---')
+    vm.qmp_log("block-dirty-bitmap-add", node="drive0",
+               name="NewB", persistent=True)
+    vm.qmp_log("block-dirty-bitmap-remove", node="drive0",
+               name="New")
+
+    log('--- Truncating image ---\n')
+    vm.qmp_log("human-monitor-command",
+               command_line="block_resize drive0 50G")
+
+    log('--- Adding "NewC" bitmap, removing "NewTwo" bitmap ---')
+    vm.qmp_log("block-dirty-bitmap-add", node="drive0",
+               name="NewC", persistent=True)
+    vm.qmp_log("block-dirty-bitmap-remove", node="drive0", name="Newtwo")
+
+    log('--- Forcing flush of bitmaps to disk ---\n')
+    vm.shutdown()
+
+
+    log('--- 4th Boot (Verification and Cleanup) ---\n')
+    vm.launch()
+    log(query_bitmaps(vm), indent=2)
+
+    log('--- Removing all Bitmaps ---\n')
+    vm.qmp_log("block-dirty-bitmap-remove", node="drive0", name="Small")
+    vm.qmp_log("block-dirty-bitmap-remove", node="drive0", name="Medium")
+    vm.qmp_log("block-dirty-bitmap-remove", node="drive0", name="Large")
+    vm.qmp_log("block-dirty-bitmap-remove", node="drive0", name="NewB")
+    vm.qmp_log("block-dirty-bitmap-remove", node="drive0", name="NewC")
+    log(query_bitmaps(vm), indent=2)
+
+    log('\n--- Done ---')
+    vm.shutdown()
diff --git a/tests/qemu-iotests/246.out b/tests/qemu-iotests/246.out
new file mode 100644
index 0000000000..6671a11fdd
--- /dev/null
+++ b/tests/qemu-iotests/246.out
@@ -0,0 +1,295 @@
+--- Preparing image & VM ---
+
+--- 1st Boot (Establish Baseline Image) ---
+
+
+--- Adding bitmaps Small, Medium, Large, and Transient ---
+
+{"execute": "block-dirty-bitmap-add", "arguments": {"granularity": 32768, "name": "Small", "node": "drive0", "persistent": true}}
+{"return": {}}
+{"execute": "block-dirty-bitmap-add", "arguments": {"name": "Medium", "node": "drive0", "persistent": true}}
+{"return": {}}
+{"execute": "block-dirty-bitmap-add", "arguments": {"granularity": 131072, "name": "Large", "node": "drive0", "persistent": true}}
+{"return": {}}
+{"execute": "block-dirty-bitmap-add", "arguments": {"name": "Transient", "node": "drive0", "persistent": false}}
+{"return": {}}
+--- Forcing flush of bitmaps to disk ---
+
+{
+  "bitmaps": {
+    "drive0": [
+      {
+        "busy": false,
+        "count": 0,
+        "granularity": 65536,
+        "name": "Transient",
+        "persistent": false,
+        "recording": true,
+        "status": "active"
+      },
+      {
+        "busy": false,
+        "count": 0,
+        "granularity": 131072,
+        "name": "Large",
+        "persistent": true,
+        "recording": true,
+        "status": "active"
+      },
+      {
+        "busy": false,
+        "count": 0,
+        "granularity": 65536,
+        "name": "Medium",
+        "persistent": true,
+        "recording": true,
+        "status": "active"
+      },
+      {
+        "busy": false,
+        "count": 0,
+        "granularity": 32768,
+        "name": "Small",
+        "persistent": true,
+        "recording": true,
+        "status": "active"
+      }
+    ]
+  }
+}
+--- 2nd Boot (Grow Image) ---
+
+{
+  "bitmaps": {
+    "drive0": [
+      {
+        "busy": false,
+        "count": 0,
+        "granularity": 32768,
+        "name": "Small",
+        "persistent": true,
+        "recording": true,
+        "status": "active"
+      },
+      {
+        "busy": false,
+        "count": 0,
+        "granularity": 65536,
+        "name": "Medium",
+        "persistent": true,
+        "recording": true,
+        "status": "active"
+      },
+      {
+        "busy": false,
+        "count": 0,
+        "granularity": 131072,
+        "name": "Large",
+        "persistent": true,
+        "recording": true,
+        "status": "active"
+      }
+    ]
+  }
+}
+--- Adding new bitmap, growing image, and adding 2nd new bitmap ---
+{"execute": "block-dirty-bitmap-add", "arguments": {"name": "New", "node": "drive0", "persistent": true}}
+{"return": {}}
+{"execute": "human-monitor-command", "arguments": {"command-line": "block_resize drive0 70G"}}
+{"return": ""}
+{"execute": "block-dirty-bitmap-add", "arguments": {"name": "Newtwo", "node": "drive0", "persistent": true}}
+{"return": {}}
+{
+  "bitmaps": {
+    "drive0": [
+      {
+        "busy": false,
+        "count": 0,
+        "granularity": 65536,
+        "name": "Newtwo",
+        "persistent": true,
+        "recording": true,
+        "status": "active"
+      },
+      {
+        "busy": false,
+        "count": 0,
+        "granularity": 65536,
+        "name": "New",
+        "persistent": true,
+        "recording": true,
+        "status": "active"
+      },
+      {
+        "busy": false,
+        "count": 0,
+        "granularity": 32768,
+        "name": "Small",
+        "persistent": true,
+        "recording": true,
+        "status": "active"
+      },
+      {
+        "busy": false,
+        "count": 0,
+        "granularity": 65536,
+        "name": "Medium",
+        "persistent": true,
+        "recording": true,
+        "status": "active"
+      },
+      {
+        "busy": false,
+        "count": 0,
+        "granularity": 131072,
+        "name": "Large",
+        "persistent": true,
+        "recording": true,
+        "status": "active"
+      }
+    ]
+  }
+}
+--- Forcing flush of bitmaps to disk ---
+
+--- 3rd Boot (Shrink Image) ---
+
+{
+  "bitmaps": {
+    "drive0": [
+      {
+        "busy": false,
+        "count": 0,
+        "granularity": 65536,
+        "name": "New",
+        "persistent": true,
+        "recording": true,
+        "status": "active"
+      },
+      {
+        "busy": false,
+        "count": 0,
+        "granularity": 65536,
+        "name": "Newtwo",
+        "persistent": true,
+        "recording": true,
+        "status": "active"
+      },
+      {
+        "busy": false,
+        "count": 0,
+        "granularity": 32768,
+        "name": "Small",
+        "persistent": true,
+        "recording": true,
+        "status": "active"
+      },
+      {
+        "busy": false,
+        "count": 0,
+        "granularity": 65536,
+        "name": "Medium",
+        "persistent": true,
+        "recording": true,
+        "status": "active"
+      },
+      {
+        "busy": false,
+        "count": 0,
+        "granularity": 131072,
+        "name": "Large",
+        "persistent": true,
+        "recording": true,
+        "status": "active"
+      }
+    ]
+  }
+}
+--- Adding "NewB" bitmap, removing "New" bitmap ---
+{"execute": "block-dirty-bitmap-add", "arguments": {"name": "NewB", "node": "drive0", "persistent": true}}
+{"return": {}}
+{"execute": "block-dirty-bitmap-remove", "arguments": {"name": "New", "node": "drive0"}}
+{"return": {}}
+--- Truncating image ---
+
+{"execute": "human-monitor-command", "arguments": {"command-line": "block_resize drive0 50G"}}
+{"return": ""}
+--- Adding "NewC" bitmap, removing "NewTwo" bitmap ---
+{"execute": "block-dirty-bitmap-add", "arguments": {"name": "NewC", "node": "drive0", "persistent": true}}
+{"return": {}}
+{"execute": "block-dirty-bitmap-remove", "arguments": {"name": "Newtwo", "node": "drive0"}}
+{"return": {}}
+--- Forcing flush of bitmaps to disk ---
+
+--- 4th Boot (Verification and Cleanup) ---
+
+{
+  "bitmaps": {
+    "drive0": [
+      {
+        "busy": false,
+        "count": 0,
+        "granularity": 65536,
+        "name": "NewB",
+        "persistent": true,
+        "recording": true,
+        "status": "active"
+      },
+      {
+        "busy": false,
+        "count": 0,
+        "granularity": 65536,
+        "name": "NewC",
+        "persistent": true,
+        "recording": true,
+        "status": "active"
+      },
+      {
+        "busy": false,
+        "count": 0,
+        "granularity": 32768,
+        "name": "Small",
+        "persistent": true,
+        "recording": true,
+        "status": "active"
+      },
+      {
+        "busy": false,
+        "count": 0,
+        "granularity": 65536,
+        "name": "Medium",
+        "persistent": true,
+        "recording": true,
+        "status": "active"
+      },
+      {
+        "busy": false,
+        "count": 0,
+        "granularity": 131072,
+        "name": "Large",
+        "persistent": true,
+        "recording": true,
+        "status": "active"
+      }
+    ]
+  }
+}
+--- Removing all Bitmaps ---
+
+{"execute": "block-dirty-bitmap-remove", "arguments": {"name": "Small", "node": "drive0"}}
+{"return": {}}
+{"execute": "block-dirty-bitmap-remove", "arguments": {"name": "Medium", "node": "drive0"}}
+{"return": {}}
+{"execute": "block-dirty-bitmap-remove", "arguments": {"name": "Large", "node": "drive0"}}
+{"return": {}}
+{"execute": "block-dirty-bitmap-remove", "arguments": {"name": "NewB", "node": "drive0"}}
+{"return": {}}
+{"execute": "block-dirty-bitmap-remove", "arguments": {"name": "NewC", "node": "drive0"}}
+{"return": {}}
+{
+  "bitmaps": {
+    "drive0": []
+  }
+}
+
+--- Done ---
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 36100b803c..7289309604 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -243,3 +243,5 @@
 242 rw auto quick
 243 rw auto quick
 244 rw auto quick
+245 rw auto
+246 rw auto quick
diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
index b02be0274e..b65365934b 100644
--- a/tests/virtio-blk-test.c
+++ b/tests/virtio-blk-test.c
@@ -751,7 +751,7 @@ static void *virtio_blk_test_setup(GString *cmd_line, void *arg)
     char *tmp_path = drive_create();
 
     g_string_append_printf(cmd_line,
-                           " -drive if=none,id=drive0,file=%s,format=raw "
+                           " -drive if=none,id=drive0,file=%s,format=raw,auto-read-only=off "
                            "-drive if=none,id=drive1,file=null-co://,format=raw ",
                            tmp_path);
 
diff --git a/ui/Makefile.objs b/ui/Makefile.objs
index fe1a7aed97..cc2bf5b180 100644
--- a/ui/Makefile.objs
+++ b/ui/Makefile.objs
@@ -46,8 +46,8 @@ endif
 
 common-obj-$(CONFIG_CURSES) += curses.mo
 curses.mo-objs := curses.o
-curses.mo-cflags := $(CURSES_CFLAGS)
-curses.mo-libs := $(CURSES_LIBS)
+curses.mo-cflags := $(CURSES_CFLAGS) $(ICONV_CFLAGS)
+curses.mo-libs := $(CURSES_LIBS) $(ICONV_LIBS)
 
 common-obj-$(call land,$(CONFIG_SPICE),$(CONFIG_GIO)) += spice-app.mo
 spice-app.mo-objs := spice-app.o
diff --git a/ui/curses.c b/ui/curses.c
index 37954ce1b0..3a7e8649f3 100644
--- a/ui/curses.c
+++ b/ui/curses.c
@@ -27,6 +27,10 @@
 #include <sys/ioctl.h>
 #include <termios.h>
 #endif
+#include <locale.h>
+#include <wchar.h>
+#include <langinfo.h>
+#include <iconv.h>
 
 #include "qapi/error.h"
 #include "qemu-common.h"
@@ -54,25 +58,30 @@ static WINDOW *screenpad = NULL;
 static int width, height, gwidth, gheight, invalidate;
 static int px, py, sminx, sminy, smaxx, smaxy;
 
-static chtype vga_to_curses[256];
+static const char *font_charset = "CP437";
+static cchar_t vga_to_curses[256];
 
 static void curses_update(DisplayChangeListener *dcl,
                           int x, int y, int w, int h)
 {
     console_ch_t *line;
-    chtype curses_line[width];
+    cchar_t curses_line[width];
 
     line = screen + y * width;
     for (h += y; y < h; y ++, line += width) {
         for (x = 0; x < width; x++) {
             chtype ch = line[x] & 0xff;
             chtype at = line[x] & ~0xff;
-            if (vga_to_curses[ch]) {
-                ch = vga_to_curses[ch];
+            if (vga_to_curses[ch].chars[0]) {
+                curses_line[x] = vga_to_curses[ch];
+            } else {
+                curses_line[x].chars[0] = ch;
+                curses_line[x].chars[1] = 0;
+                curses_line[x].attr = 0;
             }
-            curses_line[x] = ch | at;
+            curses_line[x].attr |= at;
         }
-        mvwaddchnstr(screenpad, y, 0, curses_line, width);
+        mvwadd_wchnstr(screenpad, y, 0, curses_line, width);
     }
 
     pnoutrefresh(screenpad, py, px, sminy, sminx, smaxy - 1, smaxx - 1);
@@ -391,6 +400,254 @@ static void curses_atexit(void)
     endwin();
 }
 
+/* Setup wchar glyph for one UCS-2 char */
+static void convert_ucs(int glyph, uint16_t ch, iconv_t conv)
+{
+    wchar_t wch;
+    char *pch, *pwch;
+    size_t sch, swch;
+
+    pch = (char *) &ch;
+    pwch = (char *) &wch;
+    sch = sizeof(ch);
+    swch = sizeof(wch);
+
+    if (iconv(conv, &pch, &sch, &pwch, &swch) == (size_t) -1) {
+        fprintf(stderr, "Could not convert 0x%04x from UCS-2 to WCHAR_T: %s\n",
+                        ch, strerror(errno));
+    } else {
+        vga_to_curses[glyph].chars[0] = wch;
+    }
+}
+
+/* Setup wchar glyph for one font character */
+static void convert_font(unsigned char ch, iconv_t conv)
+{
+    wchar_t wch;
+    char *pch, *pwch;
+    size_t sch, swch;
+
+    pch = (char *) &ch;
+    pwch = (char *) &wch;
+    sch = sizeof(ch);
+    swch = sizeof(wch);
+
+    if (iconv(conv, &pch, &sch, &pwch, &swch) == (size_t) -1) {
+        fprintf(stderr, "Could not convert 0x%02x from %s to WCHAR_T: %s\n",
+                        ch, font_charset, strerror(errno));
+    } else {
+        vga_to_curses[ch].chars[0] = wch;
+    }
+}
+
+/* Convert one wchar to UCS-2 */
+static uint16_t get_ucs(wchar_t wch, iconv_t conv)
+{
+    uint16_t ch;
+    char *pch, *pwch;
+    size_t sch, swch;
+
+    pch = (char *) &ch;
+    pwch = (char *) &wch;
+    sch = sizeof(ch);
+    swch = sizeof(wch);
+
+    if (iconv(conv, &pwch, &swch, &pch, &sch) == (size_t) -1) {
+        fprintf(stderr, "Could not convert 0x%02x from WCHAR_T to UCS-2: %s\n",
+                        wch, strerror(errno));
+        return 0xFFFD;
+    }
+
+    return ch;
+}
+
+/*
+ * Setup mapping for vga to curses line graphics.
+ */
+static void font_setup(void)
+{
+    /*
+     * Control characters are normally non-printable, but VGA does have
+     * well-known glyphs for them.
+     */
+    static uint16_t control_characters[0x20] = {
+      0x0020,
+      0x263a,
+      0x263b,
+      0x2665,
+      0x2666,
+      0x2663,
+      0x2660,
+      0x2022,
+      0x25d8,
+      0x25cb,
+      0x25d9,
+      0x2642,
+      0x2640,
+      0x266a,
+      0x266b,
+      0x263c,
+      0x25ba,
+      0x25c4,
+      0x2195,
+      0x203c,
+      0x00b6,
+      0x00a7,
+      0x25ac,
+      0x21a8,
+      0x2191,
+      0x2193,
+      0x2192,
+      0x2190,
+      0x221f,
+      0x2194,
+      0x25b2,
+      0x25bc
+    };
+
+    iconv_t ucs_to_wchar_conv;
+    iconv_t wchar_to_ucs_conv;
+    iconv_t font_conv;
+    int i;
+
+    ucs_to_wchar_conv = iconv_open("WCHAR_T", "UCS-2");
+    if (ucs_to_wchar_conv == (iconv_t) -1) {
+        fprintf(stderr, "Could not convert font glyphs from UCS-2: '%s'\n",
+                        strerror(errno));
+        exit(1);
+    }
+
+    wchar_to_ucs_conv = iconv_open("UCS-2", "WCHAR_T");
+    if (wchar_to_ucs_conv == (iconv_t) -1) {
+        fprintf(stderr, "Could not convert font glyphs to UCS-2: '%s'\n",
+                        strerror(errno));
+        exit(1);
+    }
+
+    font_conv = iconv_open("WCHAR_T", font_charset);
+    if (font_conv == (iconv_t) -1) {
+        fprintf(stderr, "Could not convert font glyphs from %s: '%s'\n",
+                        font_charset, strerror(errno));
+        exit(1);
+    }
+
+    /* Control characters */
+    for (i = 0; i <= 0x1F; i++) {
+        convert_ucs(i, control_characters[i], ucs_to_wchar_conv);
+    }
+
+    for (i = 0x20; i <= 0xFF; i++) {
+        convert_font(i, font_conv);
+    }
+
+    /* DEL */
+    convert_ucs(0x7F, 0x2302, ucs_to_wchar_conv);
+
+    if (strcmp(nl_langinfo(CODESET), "UTF-8")) {
+        /* Non-Unicode capable, use termcap equivalents for those available */
+        for (i = 0; i <= 0xFF; i++) {
+            switch (get_ucs(vga_to_curses[i].chars[0], wchar_to_ucs_conv)) {
+            case 0x00a3:
+                vga_to_curses[i] = *WACS_STERLING;
+                break;
+            case 0x2591:
+                vga_to_curses[i] = *WACS_BOARD;
+                break;
+            case 0x2592:
+                vga_to_curses[i] = *WACS_CKBOARD;
+                break;
+            case 0x2502:
+                vga_to_curses[i] = *WACS_VLINE;
+                break;
+            case 0x2524:
+                vga_to_curses[i] = *WACS_RTEE;
+                break;
+            case 0x2510:
+                vga_to_curses[i] = *WACS_URCORNER;
+                break;
+            case 0x2514:
+                vga_to_curses[i] = *WACS_LLCORNER;
+                break;
+            case 0x2534:
+                vga_to_curses[i] = *WACS_BTEE;
+                break;
+            case 0x252c:
+                vga_to_curses[i] = *WACS_TTEE;
+                break;
+            case 0x251c:
+                vga_to_curses[i] = *WACS_LTEE;
+                break;
+            case 0x2500:
+                vga_to_curses[i] = *WACS_HLINE;
+                break;
+            case 0x253c:
+                vga_to_curses[i] = *WACS_PLUS;
+                break;
+            case 0x256c:
+                vga_to_curses[i] = *WACS_LANTERN;
+                break;
+            case 0x256a:
+                vga_to_curses[i] = *WACS_NEQUAL;
+                break;
+            case 0x2518:
+                vga_to_curses[i] = *WACS_LRCORNER;
+                break;
+            case 0x250c:
+                vga_to_curses[i] = *WACS_ULCORNER;
+                break;
+            case 0x2588:
+                vga_to_curses[i] = *WACS_BLOCK;
+                break;
+            case 0x03c0:
+                vga_to_curses[i] = *WACS_PI;
+                break;
+            case 0x00b1:
+                vga_to_curses[i] = *WACS_PLMINUS;
+                break;
+            case 0x2265:
+                vga_to_curses[i] = *WACS_GEQUAL;
+                break;
+            case 0x2264:
+                vga_to_curses[i] = *WACS_LEQUAL;
+                break;
+            case 0x00b0:
+                vga_to_curses[i] = *WACS_DEGREE;
+                break;
+            case 0x25a0:
+                vga_to_curses[i] = *WACS_BULLET;
+                break;
+            case 0x2666:
+                vga_to_curses[i] = *WACS_DIAMOND;
+                break;
+            case 0x2192:
+                vga_to_curses[i] = *WACS_RARROW;
+                break;
+            case 0x2190:
+                vga_to_curses[i] = *WACS_LARROW;
+                break;
+            case 0x2191:
+                vga_to_curses[i] = *WACS_UARROW;
+                break;
+            case 0x2193:
+                vga_to_curses[i] = *WACS_DARROW;
+                break;
+            case 0x23ba:
+                vga_to_curses[i] = *WACS_S1;
+                break;
+            case 0x23bb:
+                vga_to_curses[i] = *WACS_S3;
+                break;
+            case 0x23bc:
+                vga_to_curses[i] = *WACS_S7;
+                break;
+            case 0x23bd:
+                vga_to_curses[i] = *WACS_S9;
+                break;
+            }
+        }
+    }
+}
+
 static void curses_setup(void)
 {
     int i, colour_default[8] = {
@@ -420,47 +677,7 @@ static void curses_setup(void)
         init_pair(i, COLOR_WHITE, COLOR_BLACK);
     }
 
-    /*
-     * Setup mapping for vga to curses line graphics.
-     * FIXME: for better font, have to use ncursesw and setlocale()
-     */
-#if 0
-    /* FIXME: map from where? */
-    ACS_S1;
-    ACS_S3;
-    ACS_S7;
-    ACS_S9;
-#endif
-    /* ACS_* is not constant. So, we can't initialize statically. */
-    vga_to_curses['\0'] = ' ';
-    vga_to_curses[0x04] = ACS_DIAMOND;
-    vga_to_curses[0x18] = ACS_UARROW;
-    vga_to_curses[0x19] = ACS_DARROW;
-    vga_to_curses[0x1a] = ACS_RARROW;
-    vga_to_curses[0x1b] = ACS_LARROW;
-    vga_to_curses[0x9c] = ACS_STERLING;
-    vga_to_curses[0xb0] = ACS_BOARD;
-    vga_to_curses[0xb1] = ACS_CKBOARD;
-    vga_to_curses[0xb3] = ACS_VLINE;
-    vga_to_curses[0xb4] = ACS_RTEE;
-    vga_to_curses[0xbf] = ACS_URCORNER;
-    vga_to_curses[0xc0] = ACS_LLCORNER;
-    vga_to_curses[0xc1] = ACS_BTEE;
-    vga_to_curses[0xc2] = ACS_TTEE;
-    vga_to_curses[0xc3] = ACS_LTEE;
-    vga_to_curses[0xc4] = ACS_HLINE;
-    vga_to_curses[0xc5] = ACS_PLUS;
-    vga_to_curses[0xce] = ACS_LANTERN;
-    vga_to_curses[0xd8] = ACS_NEQUAL;
-    vga_to_curses[0xd9] = ACS_LRCORNER;
-    vga_to_curses[0xda] = ACS_ULCORNER;
-    vga_to_curses[0xdb] = ACS_BLOCK;
-    vga_to_curses[0xe3] = ACS_PI;
-    vga_to_curses[0xf1] = ACS_PLMINUS;
-    vga_to_curses[0xf2] = ACS_GEQUAL;
-    vga_to_curses[0xf3] = ACS_LEQUAL;
-    vga_to_curses[0xf8] = ACS_DEGREE;
-    vga_to_curses[0xfe] = ACS_BULLET;
+    font_setup();
 }
 
 static void curses_keyboard_setup(void)
@@ -493,6 +710,10 @@ static void curses_display_init(DisplayState *ds, DisplayOptions *opts)
     }
 #endif
 
+    setlocale(LC_CTYPE, "");
+    if (opts->u.curses.charset) {
+        font_charset = opts->u.curses.charset;
+    }
     curses_setup();
     curses_keyboard_setup();
     atexit(curses_atexit);
diff --git a/vl.c b/vl.c
index 027b853d92..c1d5484e12 100644
--- a/vl.c
+++ b/vl.c
@@ -3201,7 +3201,7 @@ int main(int argc, char **argv, char **envp)
 #ifdef CONFIG_CURSES
                 dpy.type = DISPLAY_TYPE_CURSES;
 #else
-                error_report("curses support is disabled");
+                error_report("curses or iconv support is disabled");
                 exit(1);
 #endif
                 break;