Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging

Block layer patches: - Fix check_to_replace_node() - commit: Expose on-error option in QMP - qcow2: Fix qcow2_alloc_cluster_abort() for external data file - mirror: Fix deadlock - vvfat: Fix segfault while closing read-write node - Code cleanups # gpg: Signature made Tue 18 Feb 2020 14:04:43 GMT # gpg: using RSA key 7F09B272C88F2FD6 # gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full] # Primary key fingerprint: DC3D EB15 9A9A F95D 3D74 56FE 7F09 B272 C88F 2FD6 * remotes/kevin/tags/for-upstream: (36 commits) iotests: Check that @replaces can replace filters iotests: Add tests for invalid Quorum @replaces iotests: Use self.image_len in TestRepairQuorum iotests: Resolve TODOs in 041 iotests/041: Drop superfluous shutdowns iotests: Add VM.assert_block_path() iotests: Use complete_and_wait() in 155 quorum: Stop marking it as a filter mirror: Double-check immediately before replacing block: Remove bdrv_recurse_is_first_non_filter() block: Use bdrv_recurse_can_replace() quorum: Implement .bdrv_recurse_can_replace() blkverify: Implement .bdrv_recurse_can_replace() block: Add bdrv_recurse_can_replace() quorum: Fix child permissions iotests: Let 041 use -blockdev for quorum children block: Drop bdrv_is_first_non_filter() blockdev: Allow resizing everywhere blockdev: Allow external snapshots everywhere block/io_uring: Remove superfluous semicolon ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
author: Peter Maydell <peter.maydell@linaro.org> 2020-02-18 14:23:43 +0000
committer: Peter Maydell <peter.maydell@linaro.org> 2020-02-18 14:23:43 +0000
commit: 672f9d0df10a68a5c5f2b32cbc8284abf9f5ee18 (patch)
tree: 2288bb7887cf822679bf7d538ba4c085fa8d798a
parent: 6c599282f8ab382fe59f03a6cae755b89561a7b3 (diff)
parent: c45a88f4429d7a8f384b75f3fd3fed5138a6edca (diff)
28 files changed, 675 insertions, 206 deletions
diff --git a/block.c b/block.c
index 9c810534d6..946e3c896e 100644
--- a/block.c
+++ b/block.c
@@ -2435,13 +2435,13 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
     if (bdrv_get_aio_context(child_bs) != ctx) {
         ret = bdrv_try_set_aio_context(child_bs, ctx, &local_err);
         if (ret < 0 && child_role->can_set_aio_ctx) {
-            GSList *ignore = g_slist_prepend(NULL, child);;
+            GSList *ignore = g_slist_prepend(NULL, child);
             ctx = bdrv_get_aio_context(child_bs);
             if (child_role->can_set_aio_ctx(child, ctx, &ignore, NULL)) {
                 error_free(local_err);
                 ret = 0;
                 g_slist_free(ignore);
-                ignore = g_slist_prepend(NULL, child);;
+                ignore = g_slist_prepend(NULL, child);
                 child_role->set_aio_ctx(child, ctx, &ignore);
             }
             g_slist_free(ignore);
@@ -6201,65 +6201,55 @@ int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
     return bs->drv->bdrv_amend_options(bs, opts, status_cb, cb_opaque, errp);
 }
 
-/* This function will be called by the bdrv_recurse_is_first_non_filter method
- * of block filter and by bdrv_is_first_non_filter.
- * It is used to test if the given bs is the candidate or recurse more in the
- * node graph.
+/*
+ * This function checks whether the given @to_replace is allowed to be
+ * replaced by a node that always shows the same data as @bs.  This is
+ * used for example to verify whether the mirror job can replace
+ * @to_replace by the target mirrored from @bs.
+ * To be replaceable, @bs and @to_replace may either be guaranteed to
+ * always show the same data (because they are only connected through
+ * filters), or some driver may allow replacing one of its children
+ * because it can guarantee that this child's data is not visible at
+ * all (for example, for dissenting quorum children that have no other
+ * parents).
  */
-bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
-                                      BlockDriverState *candidate)
+bool bdrv_recurse_can_replace(BlockDriverState *bs,
+                              BlockDriverState *to_replace)
 {
-    /* return false if basic checks fails */
     if (!bs || !bs->drv) {
         return false;
     }
 
-    /* the code reached a non block filter driver -> check if the bs is
-     * the same as the candidate. It's the recursion termination condition.
-     */
-    if (!bs->drv->is_filter) {
-        return bs == candidate;
+    if (bs == to_replace) {
+        return true;
     }
-    /* Down this path the driver is a block filter driver */
 
-    /* If the block filter recursion method is defined use it to recurse down
-     * the node graph.
-     */
-    if (bs->drv->bdrv_recurse_is_first_non_filter) {
-        return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
+    /* See what the driver can do */
+    if (bs->drv->bdrv_recurse_can_replace) {
+        return bs->drv->bdrv_recurse_can_replace(bs, to_replace);
     }
 
-    /* the driver is a block filter but don't allow to recurse -> return false
-     */
-    return false;
-}
-
-/* This function checks if the candidate is the first non filter bs down it's
- * bs chain. Since we don't have pointers to parents it explore all bs chains
- * from the top. Some filters can choose not to pass down the recursion.
- */
-bool bdrv_is_first_non_filter(BlockDriverState *candidate)
-{
-    BlockDriverState *bs;
-    BdrvNextIterator it;
-
-    /* walk down the bs forest recursively */
-    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
-        bool perm;
-
-        /* try to recurse in this top level bs */
-        perm = bdrv_recurse_is_first_non_filter(bs, candidate);
-
-        /* candidate is the first non filter */
-        if (perm) {
-            bdrv_next_cleanup(&it);
-            return true;
-        }
+    /* For filters without an own implementation, we can recurse on our own */
+    if (bs->drv->is_filter) {
+        BdrvChild *child = bs->file ?: bs->backing;
+        return bdrv_recurse_can_replace(child->bs, to_replace);
     }
 
+    /* Safe default */
     return false;
 }
 
+/*
+ * Check whether the given @node_name can be replaced by a node that
+ * has the same data as @parent_bs.  If so, return @node_name's BDS;
+ * NULL otherwise.
+ *
+ * @node_name must be a (recursive) *child of @parent_bs (or this
+ * function will return NULL).
+ *
+ * The result (whether the node can be replaced or not) is only valid
+ * for as long as no graph or permission changes occur.
+ */
 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
                                         const char *node_name, Error **errp)
 {
@@ -6284,8 +6274,11 @@ BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
      * Another benefit is that this tests exclude backing files which are
      * blocked by the backing blockers.
      */
-    if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
-        error_setg(errp, "Only top most non filter can be replaced");
+    if (!bdrv_recurse_can_replace(parent_bs, to_replace_bs)) {
+        error_setg(errp, "Cannot replace '%s' by a node mirrored from '%s', "
+                   "because it cannot be guaranteed that doing so would not "
+                   "lead to an abrupt change of visible data",
+                   node_name, parent_bs->node_name);
         to_replace_bs = NULL;
         goto out;
     }
diff --git a/block/blkverify.c b/block/blkverify.c
index 304b0a1368..ba6b1853ae 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -268,18 +268,18 @@ static int blkverify_co_flush(BlockDriverState *bs)
     return bdrv_co_flush(s->test_file->bs);
 }
 
-static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
-                                                  BlockDriverState *candidate)
+static bool blkverify_recurse_can_replace(BlockDriverState *bs,
+                                          BlockDriverState *to_replace)
 {
     BDRVBlkverifyState *s = bs->opaque;
 
-    bool perm = bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
-
-    if (perm) {
-        return true;
-    }
-
-    return bdrv_recurse_is_first_non_filter(s->test_file->bs, candidate);
+    /*
+     * blkverify quits the whole qemu process if there is a mismatch
+     * between bs->file->bs and s->test_file->bs.  Therefore, we know
+     * know that both must match bs and we can recurse down to either.
+     */
+    return bdrv_recurse_can_replace(bs->file->bs, to_replace) ||
+           bdrv_recurse_can_replace(s->test_file->bs, to_replace);
 }
 
 static void blkverify_refresh_filename(BlockDriverState *bs)
@@ -327,7 +327,7 @@ static BlockDriver bdrv_blkverify = {
     .bdrv_co_flush                    = blkverify_co_flush,
 
     .is_filter                        = true,
-    .bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter,
+    .bdrv_recurse_can_replace         = blkverify_recurse_can_replace,
 };
 
 static void bdrv_blkverify_init(void)
diff --git a/block/commit.c b/block/commit.c
index 23c90b3b91..8e672799af 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -43,27 +43,6 @@ typedef struct CommitBlockJob {
     char *backing_file_str;
 } CommitBlockJob;
 
-static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base,
-                                        int64_t offset, uint64_t bytes,
-                                        void *buf)
-{
-    int ret = 0;
-
-    assert(bytes < SIZE_MAX);
-
-    ret = blk_co_pread(bs, offset, bytes, buf, 0);
-    if (ret < 0) {
-        return ret;
-    }
-
-    ret = blk_co_pwrite(base, offset, bytes, buf, 0);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return 0;
-}
-
 static int commit_prepare(Job *job)
 {
     CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
@@ -140,7 +119,6 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
     int ret = 0;
     int64_t n = 0; /* bytes */
     void *buf = NULL;
-    int bytes_written = 0;
     int64_t len, base_len;
 
     ret = len = blk_getlength(s->top);
@@ -165,6 +143,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
 
     for (offset = 0; offset < len; offset += n) {
         bool copy;
+        bool error_in_source = true;
 
         /* Note that even when no rate limit is applied we need to yield
          * with no pending I/O here so that bdrv_drain_all() returns.
@@ -179,12 +158,20 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
         copy = (ret == 1);
         trace_commit_one_iteration(s, offset, n, ret);
         if (copy) {
-            ret = commit_populate(s->top, s->base, offset, n, buf);
-            bytes_written += n;
+            assert(n < SIZE_MAX);
+
+            ret = blk_co_pread(s->top, offset, n, buf, 0);
+            if (ret >= 0) {
+                ret = blk_co_pwrite(s->base, offset, n, buf, 0);
+                if (ret < 0) {
+                    error_in_source = false;
+                }
+            }
         }
         if (ret < 0) {
             BlockErrorAction action =
-                block_job_error_action(&s->common, false, s->on_error, -ret);
+                block_job_error_action(&s->common, s->on_error,
+                                       error_in_source, -ret);
             if (action == BLOCK_ERROR_ACTION_REPORT) {
                 goto out;
             } else {
diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index e95223d3cb..242d3ff055 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -118,13 +118,6 @@ static void cor_lock_medium(BlockDriverState *bs, bool locked)
 }
 
 
-static bool cor_recurse_is_first_non_filter(BlockDriverState *bs,
-                                            BlockDriverState *candidate)
-{
-    return bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
-}
-
-
 static BlockDriver bdrv_copy_on_read = {
     .format_name                        = "copy-on-read",
 
@@ -143,8 +136,6 @@ static BlockDriver bdrv_copy_on_read = {
 
     .bdrv_co_block_status               = bdrv_co_block_status_from_file,
 
-    .bdrv_recurse_is_first_non_filter   = cor_recurse_is_first_non_filter,
-
     .has_variable_length                = true,
     .is_filter                          = true,
 };
diff --git a/block/filter-compress.c b/block/filter-compress.c
index 60137fb680..82c315b298 100644
--- a/block/filter-compress.c
+++ b/block/filter-compress.c
@@ -128,13 +128,6 @@ static void compress_lock_medium(BlockDriverState *bs, bool locked)
 }
 
 
-static bool compress_recurse_is_first_non_filter(BlockDriverState *bs,
-                                                 BlockDriverState *candidate)
-{
-    return bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
-}
-
-
 static BlockDriver bdrv_compress = {
     .format_name                        = "compress",
 
@@ -154,8 +147,6 @@ static BlockDriver bdrv_compress = {
 
     .bdrv_co_block_status               = bdrv_co_block_status_from_file,
 
-    .bdrv_recurse_is_first_non_filter   = compress_recurse_is_first_non_filter,
-
     .has_variable_length                = true,
     .is_filter                          = true,
 };
diff --git a/block/io_uring.c b/block/io_uring.c
index 56892fd1ab..a3142ca989 100644
--- a/block/io_uring.c
+++ b/block/io_uring.c
@@ -187,7 +187,7 @@ static void luring_process_completions(LuringState *s)
                     ret = 0;
                 }
             } else {
-                ret = -ENOSPC;;
+                ret = -ENOSPC;
             }
         }
 end:
diff --git a/block/mirror.c b/block/mirror.c
index f0f2d9dff1..447051dbc6 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -103,6 +103,7 @@ struct MirrorOp {
     bool is_pseudo_op;
     bool is_active_write;
     CoQueue waiting_requests;
+    Coroutine *co;
 
     QTAILQ_ENTRY(MirrorOp) next;
 };
@@ -282,11 +283,14 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset,
 }
 
 static inline void coroutine_fn
-mirror_wait_for_any_operation(MirrorBlockJob *s, bool active)
+mirror_wait_for_any_operation(MirrorBlockJob *s, MirrorOp *self, bool active)
 {
     MirrorOp *op;
 
     QTAILQ_FOREACH(op, &s->ops_in_flight, next) {
+        if (self == op) {
+            continue;
+        }
         /* Do not wait on pseudo ops, because it may in turn wait on
          * some other operation to start, which may in fact be the
          * caller of this function.  Since there is only one pseudo op
@@ -301,10 +305,10 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, bool active)
 }
 
 static inline void coroutine_fn
-mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s)
+mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s, MirrorOp *self)
 {
     /* Only non-active operations use up in-flight slots */
-    mirror_wait_for_any_operation(s, false);
+    mirror_wait_for_any_operation(s, self, false);
 }
 
 /* Perform a mirror copy operation.
@@ -347,7 +351,7 @@ static void coroutine_fn mirror_co_read(void *opaque)
 
     while (s->buf_free_count < nb_chunks) {
         trace_mirror_yield_in_flight(s, op->offset, s->in_flight);
-        mirror_wait_for_free_in_flight_slot(s);
+        mirror_wait_for_free_in_flight_slot(s, op);
     }
 
     /* Now make a QEMUIOVector taking enough granularity-sized chunks
@@ -429,6 +433,7 @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset,
     default:
         abort();
     }
+    op->co = co;
 
     QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next);
     qemu_coroutine_enter(co);
@@ -553,7 +558,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
 
         while (s->in_flight >= MAX_IN_FLIGHT) {
             trace_mirror_yield_in_flight(s, offset, s->in_flight);
-            mirror_wait_for_free_in_flight_slot(s);
+            mirror_wait_for_free_in_flight_slot(s, pseudo_op);
         }
 
         if (s->ret < 0) {
@@ -607,7 +612,7 @@ static void mirror_free_init(MirrorBlockJob *s)
 static void coroutine_fn mirror_wait_for_all_io(MirrorBlockJob *s)
 {
     while (s->in_flight > 0) {
-        mirror_wait_for_free_in_flight_slot(s);
+        mirror_wait_for_free_in_flight_slot(s, NULL);
     }
 }
 
@@ -695,7 +700,19 @@ static int mirror_exit_common(Job *job)
          * drain potential other users of the BDS before changing the graph. */
         assert(s->in_drain);
         bdrv_drained_begin(target_bs);
-        bdrv_replace_node(to_replace, target_bs, &local_err);
+        /*
+         * Cannot use check_to_replace_node() here, because that would
+         * check for an op blocker on @to_replace, and we have our own
+         * there.
+         */
+        if (bdrv_recurse_can_replace(src, to_replace)) {
+            bdrv_replace_node(to_replace, target_bs, &local_err);
+        } else {
+            error_setg(&local_err, "Can no longer replace '%s' by '%s', "
+                       "because it can no longer be guaranteed that doing so "
+                       "would not lead to an abrupt change of visible data",
+                       to_replace->node_name, target_bs->node_name);
+        }
         bdrv_drained_end(target_bs);
         if (local_err) {
             error_report_err(local_err);
@@ -792,7 +809,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
             if (s->in_flight >= MAX_IN_FLIGHT) {
                 trace_mirror_yield(s, UINT64_MAX, s->buf_free_count,
                                    s->in_flight);
-                mirror_wait_for_free_in_flight_slot(s);
+                mirror_wait_for_free_in_flight_slot(s, NULL);
                 continue;
             }
 
@@ -945,7 +962,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
         /* Do not start passive operations while there are active
          * writes in progress */
         while (s->in_active_write_counter) {
-            mirror_wait_for_any_operation(s, true);
+            mirror_wait_for_any_operation(s, NULL, true);
         }
 
         if (s->ret < 0) {
@@ -971,7 +988,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
             if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 ||
                 (cnt == 0 && s->in_flight > 0)) {
                 trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight);
-                mirror_wait_for_free_in_flight_slot(s);
+                mirror_wait_for_free_in_flight_slot(s, NULL);
                 continue;
             } else if (cnt != 0) {
                 delay_ns = mirror_iteration(s);
diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index d41f5d049b..8cccc2c9f3 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -647,7 +647,6 @@ static Qcow2BitmapList *bitmap_list_load(BlockDriverState *bs, uint64_t offset,
     return bm_list;
 
 broken_dir:
-    ret = -EINVAL;
     error_setg(errp, "Broken bitmap directory");
 
 fail:
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 1947f13a2d..78c95dfa16 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1026,8 +1026,11 @@ err:
 void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m)
 {
     BDRVQcow2State *s = bs->opaque;
-    qcow2_free_clusters(bs, m->alloc_offset, m->nb_clusters << s->cluster_bits,
-                        QCOW2_DISCARD_NEVER);
+    if (!has_data_file(bs)) {
+        qcow2_free_clusters(bs, m->alloc_offset,
+                            m->nb_clusters << s->cluster_bits,
+                            QCOW2_DISCARD_NEVER);
+    }
 }
 
 /*
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index c963bc8de1..7ef1c0e42a 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -889,6 +889,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
                                                 offset);
             if (table != NULL) {
                 qcow2_cache_put(s->refcount_block_cache, &refcount_block);
+                old_table_index = -1;
                 qcow2_cache_discard(s->refcount_block_cache, table);
             }
 
diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c
index 8f5a0d1ebe..77bb578cdf 100644
--- a/block/qcow2-threads.c
+++ b/block/qcow2-threads.c
@@ -246,12 +246,15 @@ qcow2_co_encdec(BlockDriverState *bs, uint64_t host_offset,
         .len = len,
         .func = func,
     };
+    uint64_t sector_size;
 
-    assert(QEMU_IS_ALIGNED(guest_offset, BDRV_SECTOR_SIZE));
-    assert(QEMU_IS_ALIGNED(host_offset, BDRV_SECTOR_SIZE));
-    assert(QEMU_IS_ALIGNED(len, BDRV_SECTOR_SIZE));
     assert(s->crypto);
 
+    sector_size = qcrypto_block_get_sector_size(s->crypto);
+    assert(QEMU_IS_ALIGNED(guest_offset, sector_size));
+    assert(QEMU_IS_ALIGNED(host_offset, sector_size));
+    assert(QEMU_IS_ALIGNED(len, sector_size));
+
     return len == 0 ? 0 : qcow2_co_process(bs, qcow2_encdec_pool_func, &arg);
 }
 
@@ -270,7 +273,8 @@ qcow2_co_encdec(BlockDriverState *bs, uint64_t host_offset,
  *        will be written to the underlying storage device at
  *        @host_offset
  *
- * @len - length of the buffer (must be a BDRV_SECTOR_SIZE multiple)
+ * @len - length of the buffer (must be a multiple of the encryption
+ *        sector size)
  *
  * Depending on the encryption method, @host_offset and/or @guest_offset
  * may be used for generating the initialization vector for
diff --git a/block/qcow2.c b/block/qcow2.c
index ef96606f8d..8dcee5efec 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2068,8 +2068,6 @@ qcow2_co_preadv_encrypted(BlockDriverState *bs,
         goto fail;
     }
 
-    assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
-    assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
     if (qcow2_co_decrypt(bs,
                          file_cluster_offset + offset_into_cluster(s, offset),
                          offset, buf, bytes) < 0)
diff --git a/block/quorum.c b/block/quorum.c
index df68adcfaa..6d7a56bd93 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -796,17 +796,53 @@ static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
     return result;
 }
 
-static bool quorum_recurse_is_first_non_filter(BlockDriverState *bs,
-                                               BlockDriverState *candidate)
+static bool quorum_recurse_can_replace(BlockDriverState *bs,
+                                       BlockDriverState *to_replace)
 {
     BDRVQuorumState *s = bs->opaque;
     int i;
 
     for (i = 0; i < s->num_children; i++) {
-        bool perm = bdrv_recurse_is_first_non_filter(s->children[i]->bs,
-                                                     candidate);
-        if (perm) {
-            return true;
+        /*
+         * We have no idea whether our children show the same data as
+         * this node (@bs).  It is actually highly likely that
+         * @to_replace does not, because replacing a broken child is
+         * one of the main use cases here.
+         *
+         * We do know that the new BDS will match @bs, so replacing
+         * any of our children by it will be safe.  It cannot change
+         * the data this quorum node presents to its parents.
+         *
+         * However, replacing @to_replace by @bs in any of our
+         * children's chains may change visible data somewhere in
+         * there.  We therefore cannot recurse down those chains with
+         * bdrv_recurse_can_replace().
+         * (More formally, bdrv_recurse_can_replace() requires that
+         * @to_replace will be replaced by something matching the @bs
+         * passed to it.  We cannot guarantee that.)
+         *
+         * Thus, we can only check whether any of our immediate
+         * children matches @to_replace.
+         *
+         * (In the future, we might add a function to recurse down a
+         * chain that checks that nothing there cares about a change
+         * in data from the respective child in question.  For
+         * example, most filters do not care when their child's data
+         * suddenly changes, as long as their parents do not care.)
+         */
+        if (s->children[i]->bs == to_replace) {
+            /*
+             * We now have to ensure that there is no other parent
+             * that cares about replacing this child by a node with
+             * potentially different data.
+             * We do so by checking whether there are any other parents
+             * at all, which is stricter than necessary, but also very
+             * simple.  (We may decide to implement something more
+             * complex and permissive when there is an actual need for
+             * it.)
+             */
+            return QLIST_FIRST(&to_replace->parents) == s->children[i] &&
+                QLIST_NEXT(s->children[i], next_parent) == NULL;
         }
     }
 
@@ -1114,6 +1150,23 @@ static char *quorum_dirname(BlockDriverState *bs, Error **errp)
     return NULL;
 }
 
+static void quorum_child_perm(BlockDriverState *bs, BdrvChild *c,
+                              const BdrvChildRole *role,
+                              BlockReopenQueue *reopen_queue,
+                              uint64_t perm, uint64_t shared,
+                              uint64_t *nperm, uint64_t *nshared)
+{
+    *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
+
+    /*
+     * We cannot share RESIZE or WRITE, as this would make the
+     * children differ from each other.
+     */
+    *nshared = (shared & (BLK_PERM_CONSISTENT_READ |
+                          BLK_PERM_WRITE_UNCHANGED))
+             | DEFAULT_PERM_UNCHANGED;
+}
+
 static const char *const quorum_strong_runtime_opts[] = {
     QUORUM_OPT_VOTE_THRESHOLD,
     QUORUM_OPT_BLKVERIFY,
@@ -1143,10 +1196,9 @@ static BlockDriver bdrv_quorum = {
     .bdrv_add_child                     = quorum_add_child,
     .bdrv_del_child                     = quorum_del_child,
 
-    .bdrv_child_perm                    = bdrv_filter_default_perms,
+    .bdrv_child_perm                    = quorum_child_perm,
 
-    .is_filter                          = true,
-    .bdrv_recurse_is_first_non_filter   = quorum_recurse_is_first_non_filter,
+    .bdrv_recurse_can_replace           = quorum_recurse_can_replace,
 
     .strong_runtime_opts                = quorum_strong_runtime_opts,
 };
diff --git a/block/replication.c b/block/replication.c
index 99532ce521..d6681b6c84 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -306,12 +306,6 @@ out:
     return ret;
 }
 
-static bool replication_recurse_is_first_non_filter(BlockDriverState *bs,
-                                                    BlockDriverState *candidate)
-{
-    return bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
-}
-
 static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp)
 {
     Error *local_err = NULL;
@@ -699,7 +693,6 @@ static BlockDriver bdrv_replication = {
     .bdrv_co_writev             = replication_co_writev,
 
     .is_filter                  = true,
-    .bdrv_recurse_is_first_non_filter = replication_recurse_is_first_non_filter,
 
     .has_variable_length        = true,
     .strong_runtime_opts        = replication_strong_runtime_opts,
diff --git a/block/throttle.c b/block/throttle.c
index 0349f42257..71f4bb0ad1 100644
--- a/block/throttle.c
+++ b/block/throttle.c
@@ -207,12 +207,6 @@ static void throttle_reopen_abort(BDRVReopenState *reopen_state)
     reopen_state->opaque = NULL;
 }
 
-static bool throttle_recurse_is_first_non_filter(BlockDriverState *bs,
-                                                 BlockDriverState *candidate)
-{
-    return bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
-}
-
 static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs)
 {
     ThrottleGroupMember *tgm = bs->opaque;
@@ -252,8 +246,6 @@ static BlockDriver bdrv_throttle = {
     .bdrv_co_pwrite_zeroes              =   throttle_co_pwrite_zeroes,
     .bdrv_co_pdiscard                   =   throttle_co_pdiscard,
 
-    .bdrv_recurse_is_first_non_filter   =   throttle_recurse_is_first_non_filter,
-
     .bdrv_attach_aio_context            =   throttle_attach_aio_context,
     .bdrv_detach_aio_context            =   throttle_detach_aio_context,
 
diff --git a/block/vvfat.c b/block/vvfat.c
index 019b8f1341..ab800c4887 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -3124,17 +3124,10 @@ write_target_commit(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
     return ret;
 }
 
-static void write_target_close(BlockDriverState *bs) {
-    BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque);
-    bdrv_unref_child(s->bs, s->qcow);
-    g_free(s->qcow_filename);
-}
-
 static BlockDriver vvfat_write_target = {
     .format_name        = "vvfat_write_target",
     .instance_size      = sizeof(void*),
     .bdrv_co_pwritev    = write_target_commit,
-    .bdrv_close         = write_target_close,
 };
 
 static void vvfat_qcow_options(int *child_flags, QDict *child_options,
diff --git a/blockdev.c b/blockdev.c
index c6a727cca9..45de0ba37e 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1592,11 +1592,6 @@ static void external_snapshot_prepare(BlkActionState *common,
         }
     }
 
-    if (!bdrv_is_first_non_filter(state->old_bs)) {
-        error_setg(errp, QERR_FEATURE_DISABLED, "snapshot");
-        goto out;
-    }
-
     if (action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC) {
         BlockdevSnapshotSync *s = action->u.blockdev_snapshot_sync.data;
         const char *format = s->has_format ? s->format : "qcow2";
@@ -3336,11 +3331,6 @@ void qmp_block_resize(bool has_device, const char *device,
     aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
 
-    if (!bdrv_is_first_non_filter(bs)) {
-        error_setg(errp, QERR_FEATURE_DISABLED, "resize");
-        goto out;
-    }
-
     if (size < 0) {
         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "size", "a >0 size");
         goto out;
@@ -3471,6 +3461,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
                       bool has_top, const char *top,
                       bool has_backing_file, const char *backing_file,
                       bool has_speed, int64_t speed,
+                      bool has_on_error, BlockdevOnError on_error,
                       bool has_filter_node_name, const char *filter_node_name,
                       bool has_auto_finalize, bool auto_finalize,
                       bool has_auto_dismiss, bool auto_dismiss,
@@ -3481,15 +3472,14 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
     BlockDriverState *base_bs, *top_bs;
     AioContext *aio_context;
     Error *local_err = NULL;
-    /* This will be part of the QMP command, if/when the
-     * BlockdevOnError change for blkmirror makes it in
-     */
-    BlockdevOnError on_error = BLOCKDEV_ON_ERROR_REPORT;
     int job_flags = JOB_DEFAULT;
 
     if (!has_speed) {
         speed = 0;
     }
+    if (!has_on_error) {
+        on_error = BLOCKDEV_ON_ERROR_REPORT;
+    }
     if (!has_filter_node_name) {
         filter_node_name = NULL;
     }
diff --git a/include/block/block.h b/include/block/block.h
index 6cd566324d..314ce63ed9 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -391,11 +391,6 @@ int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts,
                        BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
                        Error **errp);
 
-/* external snapshots */
-bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
-                                      BlockDriverState *candidate);
-bool bdrv_is_first_non_filter(BlockDriverState *candidate);
-
 /* check if a named node can be replaced when doing drive-mirror */
 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
                                         const char *node_name, Error **errp);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 640fb82c78..6f9fd5e20e 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -94,14 +94,13 @@ struct BlockDriver {
      * must implement them and return -ENOTSUP.
      */
     bool is_filter;
-    /* for snapshots block filter like Quorum can implement the
-     * following recursive callback.
-     * It's purpose is to recurse on the filter children while calling
-     * bdrv_recurse_is_first_non_filter on them.
-     * For a sample implementation look in the future Quorum block filter.
+    /*
+     * Return true if @to_replace can be replaced by a BDS with the
+     * same data as @bs without it affecting @bs's behavior (that is,
+     * without it being visible to @bs's parents).
      */
-    bool (*bdrv_recurse_is_first_non_filter)(BlockDriverState *bs,
-                                             BlockDriverState *candidate);
+    bool (*bdrv_recurse_can_replace)(BlockDriverState *bs,
+                                     BlockDriverState *to_replace);
 
     int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
     int (*bdrv_probe_device)(const char *filename);
@@ -1263,6 +1262,9 @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
                                uint64_t perm, uint64_t shared,
                                uint64_t *nperm, uint64_t *nshared);
 
+bool bdrv_recurse_can_replace(BlockDriverState *bs,
+                              BlockDriverState *to_replace);
+
 /*
  * Default implementation for drivers to pass bdrv_co_block_status() to
  * their file.
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 13dad62f44..37d7ea7295 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1164,7 +1164,10 @@
 #          for jobs, cancel the job
 #
 # @ignore: ignore the error, only report a QMP event (BLOCK_IO_ERROR
-#          or BLOCK_JOB_ERROR)
+#          or BLOCK_JOB_ERROR). The backup, mirror and commit block jobs retry
+#          the failing request later and may still complete successfully. The
+#          stream block job continues to stream and will complete with an
+#          error.
 #
 # @enospc: same as @stop on ENOSPC, same as @report otherwise.
 #
@@ -1655,6 +1658,9 @@
 #
 # @speed: the maximum speed, in bytes per second
 #
+# @on-error: the action to take on an error. 'ignore' means that the request
+#            should be retried. (default: report; Since: 5.0)
+#
 # @filter-node-name: the node name that should be assigned to the
 #                    filter driver that the commit job inserts into the graph
 #                    above @top. If this option is not given, a node name is
@@ -1691,6 +1697,7 @@
   'data': { '*job-id': 'str', 'device': 'str', '*base-node': 'str',
             '*base': 'str', '*top-node': 'str', '*top': 'str',
             '*backing-file': 'str', '*speed': 'int',
+            '*on-error': 'BlockdevOnError',
             '*filter-node-name': 'str',
             '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } }
 
diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040
index 2e7ee0e84f..32c82b4ec6 100755
--- a/tests/qemu-iotests/040
+++ b/tests/qemu-iotests/040
@@ -430,6 +430,289 @@ class TestReopenOverlay(ImageCommitTestCase):
     def test_reopen_overlay(self):
         self.run_commit_test(self.img1, self.img0)
 
+class TestErrorHandling(iotests.QMPTestCase):
+    image_len = 2 * 1024 * 1024
+
+    def setUp(self):
+        iotests.create_image(backing_img, self.image_len)
+        qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, mid_img)
+        qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % mid_img, test_img)
+
+        qemu_io('-f', iotests.imgfmt, '-c', 'write -P 0x11 0 512k', mid_img)
+        qemu_io('-f', iotests.imgfmt, '-c', 'write -P 0x22 0 512k', test_img)
+
+        self.vm = iotests.VM()
+        self.vm.launch()
+
+        self.blkdebug_file = iotests.file_path("blkdebug.conf")
+
+    def tearDown(self):
+        self.vm.shutdown()
+        os.remove(test_img)
+        os.remove(mid_img)
+        os.remove(backing_img)
+
+    def blockdev_add(self, **kwargs):
+        result = self.vm.qmp('blockdev-add', **kwargs)
+        self.assert_qmp(result, 'return', {})
+
+    def add_block_nodes(self, base_debug=None, mid_debug=None, top_debug=None):
+        self.blockdev_add(node_name='base-file', driver='file',
+                          filename=backing_img)
+        self.blockdev_add(node_name='mid-file', driver='file',
+                          filename=mid_img)
+        self.blockdev_add(node_name='top-file', driver='file',
+                          filename=test_img)
+
+        if base_debug:
+            self.blockdev_add(node_name='base-dbg', driver='blkdebug',
+                              image='base-file', inject_error=base_debug)
+        if mid_debug:
+            self.blockdev_add(node_name='mid-dbg', driver='blkdebug',
+                              image='mid-file', inject_error=mid_debug)
+        if top_debug:
+            self.blockdev_add(node_name='top-dbg', driver='blkdebug',
+                              image='top-file', inject_error=top_debug)
+
+        self.blockdev_add(node_name='base-fmt', driver='raw',
+                          file=('base-dbg' if base_debug else 'base-file'))
+        self.blockdev_add(node_name='mid-fmt', driver=iotests.imgfmt,
+                          file=('mid-dbg' if mid_debug else 'mid-file'),
+                          backing='base-fmt')
+        self.blockdev_add(node_name='top-fmt', driver=iotests.imgfmt,
+                          file=('top-dbg' if top_debug else 'top-file'),
+                          backing='mid-fmt')
+
+    def run_job(self, expected_events, error_pauses_job=False):
+        match_device = {'data': {'device': 'job0'}}
+        events = [
+            ('BLOCK_JOB_COMPLETED', match_device),
+            ('BLOCK_JOB_CANCELLED', match_device),
+            ('BLOCK_JOB_ERROR', match_device),
+            ('BLOCK_JOB_READY', match_device),
+        ]
+
+        completed = False
+        log = []
+        while not completed:
+            ev = self.vm.events_wait(events, timeout=5.0)
+            if ev['event'] == 'BLOCK_JOB_COMPLETED':
+                completed = True
+            elif ev['event'] == 'BLOCK_JOB_ERROR':
+                if error_pauses_job:
+                    result = self.vm.qmp('block-job-resume', device='job0')
+                    self.assert_qmp(result, 'return', {})
+            elif ev['event'] == 'BLOCK_JOB_READY':
+                result = self.vm.qmp('block-job-complete', device='job0')
+                self.assert_qmp(result, 'return', {})
+            else:
+                self.fail("Unexpected event: %s" % ev)
+            log.append(iotests.filter_qmp_event(ev))
+
+        self.maxDiff = None
+        self.assertEqual(expected_events, log)
+
+    def event_error(self, op, action):
+        return {
+            'event': 'BLOCK_JOB_ERROR',
+            'data': {'action': action, 'device': 'job0', 'operation': op},
+            'timestamp': {'microseconds': 'USECS', 'seconds': 'SECS'}
+        }
+
+    def event_ready(self):
+        return {
+            'event': 'BLOCK_JOB_READY',
+            'data': {'device': 'job0',
+                     'len': 524288,
+                     'offset': 524288,
+                     'speed': 0,
+                     'type': 'commit'},
+            'timestamp': {'microseconds': 'USECS', 'seconds': 'SECS'},
+        }
+
+    def event_completed(self, errmsg=None, active=True):
+        max_len = 524288 if active else self.image_len
+        data = {
+            'device': 'job0',
+            'len': max_len,
+            'offset': 0 if errmsg else max_len,
+            'speed': 0,
+            'type': 'commit'
+        }
+        if errmsg:
+            data['error'] = errmsg
+
+        return {
+            'event': 'BLOCK_JOB_COMPLETED',
+            'data': data,
+            'timestamp': {'microseconds': 'USECS', 'seconds': 'SECS'},
+        }
+
+    def blkdebug_event(self, event, is_raw=False):
+        if event:
+            return [{
+                'event': event,
+                'sector': 512 if is_raw else 1024,
+                'once': True,
+            }]
+        return None
+
+    def prepare_and_start_job(self, on_error, active=True,
+                              top_event=None, mid_event=None, base_event=None):
+
+        top_debug = self.blkdebug_event(top_event)
+        mid_debug = self.blkdebug_event(mid_event)
+        base_debug = self.blkdebug_event(base_event, True)
+
+        self.add_block_nodes(top_debug=top_debug, mid_debug=mid_debug,
+                             base_debug=base_debug)
+
+        result = self.vm.qmp('block-commit', job_id='job0', device='top-fmt',
+                             top_node='top-fmt' if active else 'mid-fmt',
+                             base_node='mid-fmt' if active else 'base-fmt',
+                             on_error=on_error)
+        self.assert_qmp(result, 'return', {})
+
+    def testActiveReadErrorReport(self):
+        self.prepare_and_start_job('report', top_event='read_aio')
+        self.run_job([
+            self.event_error('read', 'report'),
+            self.event_completed('Input/output error')
+        ])
+
+        self.vm.shutdown()
+        self.assertFalse(iotests.compare_images(test_img, mid_img),
+                         'target image matches source after error')
+
+    def testActiveReadErrorStop(self):
+        self.prepare_and_start_job('stop', top_event='read_aio')
+        self.run_job([
+            self.event_error('read', 'stop'),
+            self.event_ready(),
+            self.event_completed()
+        ], error_pauses_job=True)
+
+        self.vm.shutdown()
+        self.assertTrue(iotests.compare_images(test_img, mid_img),
+                        'target image does not match source after commit')
+
+    def testActiveReadErrorIgnore(self):
+        self.prepare_and_start_job('ignore', top_event='read_aio')
+        self.run_job([
+            self.event_error('read', 'ignore'),
+            self.event_ready(),
+            self.event_completed()
+        ])
+
+        # For commit, 'ignore' actually means retry, so this will succeed
+        self.vm.shutdown()
+        self.assertTrue(iotests.compare_images(test_img, mid_img),
+                        'target image does not match source after commit')
+
+    def testActiveWriteErrorReport(self):
+        self.prepare_and_start_job('report', mid_event='write_aio')
+        self.run_job([
+            self.event_error('write', 'report'),
+            self.event_completed('Input/output error')
+        ])
+
+        self.vm.shutdown()
+        self.assertFalse(iotests.compare_images(test_img, mid_img),
+                         'target image matches source after error')
+
+    def testActiveWriteErrorStop(self):
+        self.prepare_and_start_job('stop', mid_event='write_aio')
+        self.run_job([
+            self.event_error('write', 'stop'),
+            self.event_ready(),
+            self.event_completed()
+        ], error_pauses_job=True)
+
+        self.vm.shutdown()
+        self.assertTrue(iotests.compare_images(test_img, mid_img),
+                        'target image does not match source after commit')
+
+    def testActiveWriteErrorIgnore(self):
+        self.prepare_and_start_job('ignore', mid_event='write_aio')
+        self.run_job([
+            self.event_error('write', 'ignore'),
+            self.event_ready(),
+            self.event_completed()
+        ])
+
+        # For commit, 'ignore' actually means retry, so this will succeed
+        self.vm.shutdown()
+        self.assertTrue(iotests.compare_images(test_img, mid_img),
+                        'target image does not match source after commit')
+
+    def testIntermediateReadErrorReport(self):
+        self.prepare_and_start_job('report', active=False, mid_event='read_aio')
+        self.run_job([
+            self.event_error('read', 'report'),
+            self.event_completed('Input/output error', active=False)
+        ])
+
+        self.vm.shutdown()
+        self.assertFalse(iotests.compare_images(mid_img, backing_img, fmt2='raw'),
+                         'target image matches source after error')
+
+    def testIntermediateReadErrorStop(self):
+        self.prepare_and_start_job('stop', active=False, mid_event='read_aio')
+        self.run_job([
+            self.event_error('read', 'stop'),
+            self.event_completed(active=False)
+        ], error_pauses_job=True)
+
+        self.vm.shutdown()
+        self.assertTrue(iotests.compare_images(mid_img, backing_img, fmt2='raw'),
+                        'target image does not match source after commit')
+
+    def testIntermediateReadErrorIgnore(self):
+        self.prepare_and_start_job('ignore', active=False, mid_event='read_aio')
+        self.run_job([
+            self.event_error('read', 'ignore'),
+            self.event_completed(active=False)
+        ])
+
+        # For commit, 'ignore' actually means retry, so this will succeed
+        self.vm.shutdown()
+        self.assertTrue(iotests.compare_images(mid_img, backing_img, fmt2='raw'),
+                        'target image does not match source after commit')
+
+    def testIntermediateWriteErrorReport(self):
+        self.prepare_and_start_job('report', active=False, base_event='write_aio')
+        self.run_job([
+            self.event_error('write', 'report'),
+            self.event_completed('Input/output error', active=False)
+        ])
+
+        self.vm.shutdown()
+        self.assertFalse(iotests.compare_images(mid_img, backing_img, fmt2='raw'),
+                         'target image matches source after error')
+
+    def testIntermediateWriteErrorStop(self):
+        self.prepare_and_start_job('stop', active=False, base_event='write_aio')
+        self.run_job([
+            self.event_error('write', 'stop'),
+            self.event_completed(active=False)
+        ], error_pauses_job=True)
+
+        self.vm.shutdown()
+        self.assertTrue(iotests.compare_images(mid_img, backing_img, fmt2='raw'),
+                        'target image does not match source after commit')
+
+    def testIntermediateWriteErrorIgnore(self):
+        self.prepare_and_start_job('ignore', active=False, base_event='write_aio')
+        self.run_job([
+            self.event_error('write', 'ignore'),
+            self.event_completed(active=False)
+        ])
+
+        # For commit, 'ignore' actually means retry, so this will succeed
+        self.vm.shutdown()
+        self.assertTrue(iotests.compare_images(mid_img, backing_img, fmt2='raw'),
+                        'target image does not match source after commit')
+
 if __name__ == '__main__':
     iotests.main(supported_fmts=['qcow2', 'qed'],
                  supported_protocols=['file'])
diff --git a/tests/qemu-iotests/040.out b/tests/qemu-iotests/040.out
index 220a5fa82c..6a917130b6 100644
--- a/tests/qemu-iotests/040.out
+++ b/tests/qemu-iotests/040.out
@@ -1,5 +1,5 @@
-...............................................
+...........................................................
 ----------------------------------------------------------------------
-Ran 47 tests
+Ran 59 tests
 
 OK
diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index 43556b9727..5d67bf14bf 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -20,6 +20,7 @@
 
 import time
 import os
+import re
 import iotests
 from iotests import qemu_img, qemu_io
 
@@ -34,6 +35,8 @@ quorum_img3 = os.path.join(iotests.test_dir, 'quorum3.img')
 quorum_repair_img = os.path.join(iotests.test_dir, 'quorum_repair.img')
 quorum_snapshot_file = os.path.join(iotests.test_dir, 'quorum_snapshot.img')
 
+nbd_sock_path = os.path.join(iotests.test_dir, 'nbd.sock')
+
 class TestSingleDrive(iotests.QMPTestCase):
     image_len = 1 * 1024 * 1024 # MB
     qmp_cmd = 'drive-mirror'
@@ -80,7 +83,6 @@ class TestSingleDrive(iotests.QMPTestCase):
         self.cancel_and_wait(force=True)
         result = self.vm.qmp('query-block')
         self.assert_qmp(result, 'return[0]/inserted/file', test_img)
-        self.vm.shutdown()
 
     def test_cancel_after_ready(self):
         self.assert_no_active_block_jobs()
@@ -201,8 +203,6 @@ class TestSingleDrive(iotests.QMPTestCase):
         self.assert_qmp(result, 'return[0]/node-name', 'top')
         self.assert_qmp(result, 'return[0]/backing/node-name', 'base')
 
-        self.vm.shutdown()
-
     def test_medium_not_found(self):
         if iotests.qemu_default_machine != 'pc':
             return
@@ -455,7 +455,6 @@ new_state = "1"
                     self.assert_qmp(event, 'data/id', 'drive0')
 
         self.assert_no_active_block_jobs()
-        self.vm.shutdown()
 
     def test_ignore_read(self):
         self.assert_no_active_block_jobs()
@@ -475,7 +474,6 @@ new_state = "1"
         result = self.vm.qmp('query-block-jobs')
         self.assert_qmp(result, 'return[0]/paused', False)
         self.complete_and_wait()
-        self.vm.shutdown()
 
     def test_large_cluster(self):
         self.assert_no_active_block_jobs()
@@ -540,7 +538,6 @@ new_state = "1"
 
         self.complete_and_wait(wait_ready=False)
         self.assert_no_active_block_jobs()
-        self.vm.shutdown()
 
 class TestWriteErrors(iotests.QMPTestCase):
     image_len = 2 * 1024 * 1024 # MB
@@ -614,7 +611,6 @@ new_state = "1"
                     completed = True
 
         self.assert_no_active_block_jobs()
-        self.vm.shutdown()
 
     def test_ignore_write(self):
         self.assert_no_active_block_jobs()
@@ -631,7 +627,6 @@ new_state = "1"
         result = self.vm.qmp('query-block-jobs')
         self.assert_qmp(result, 'return[0]/paused', False)
         self.complete_and_wait()
-        self.vm.shutdown()
 
     def test_stop_write(self):
         self.assert_no_active_block_jobs()
@@ -667,7 +662,6 @@ new_state = "1"
 
         self.complete_and_wait(wait_ready=False)
         self.assert_no_active_block_jobs()
-        self.vm.shutdown()
 
 class TestSetSpeed(iotests.QMPTestCase):
     image_len = 80 * 1024 * 1024 # MB
@@ -881,11 +875,14 @@ class TestRepairQuorum(iotests.QMPTestCase):
         # Add each individual quorum images
         for i in self.IMAGES:
             qemu_img('create', '-f', iotests.imgfmt, i,
-                     str(TestSingleDrive.image_len))
+                     str(self.image_len))
             # Assign a node name to each quorum image in order to manipulate
             # them
             opts = "node-name=img%i" % self.IMAGES.index(i)
-            self.vm = self.vm.add_drive(i, opts)
+            opts += ',driver=%s' % iotests.imgfmt
+            opts += ',file.driver=file'
+            opts += ',file.filename=%s' % i
+            self.vm = self.vm.add_blockdev(opts)
 
         self.vm.launch()
 
@@ -898,7 +895,8 @@ class TestRepairQuorum(iotests.QMPTestCase):
 
     def tearDown(self):
         self.vm.shutdown()
-        for i in self.IMAGES + [ quorum_repair_img, quorum_snapshot_file ]:
+        for i in self.IMAGES + [ quorum_repair_img, quorum_snapshot_file,
+                                 nbd_sock_path ]:
             # Do a try/except because the test may have deleted some images
             try:
                 os.remove(i)
@@ -915,8 +913,7 @@ class TestRepairQuorum(iotests.QMPTestCase):
 
         self.complete_and_wait(drive="job0")
         self.assert_has_block_node("repair0", quorum_repair_img)
-        # TODO: a better test requiring some QEMU infrastructure will be added
-        #       to check that this file is really driven by quorum
+        self.vm.assert_block_path('quorum0', '/children.1', 'repair0')
         self.vm.shutdown()
         self.assertTrue(iotests.compare_images(quorum_img2, quorum_repair_img),
                         'target image does not match source after mirroring')
@@ -933,7 +930,6 @@ class TestRepairQuorum(iotests.QMPTestCase):
         # here we check that the last registered quorum file has not been
         # swapped out and unref
         self.assert_has_block_node(None, quorum_img3)
-        self.vm.shutdown()
 
     def test_cancel_after_ready(self):
         self.assert_no_active_block_jobs()
@@ -1038,9 +1034,71 @@ class TestRepairQuorum(iotests.QMPTestCase):
 
         self.complete_and_wait('job0')
         self.assert_has_block_node("repair0", quorum_repair_img)
-        # TODO: a better test requiring some QEMU infrastructure will be added
-        #       to check that this file is really driven by quorum
+        self.vm.assert_block_path('quorum0', '/children.1', 'repair0')
+
+    def test_with_other_parent(self):
+        """
+        Check that we cannot replace a Quorum child when it has other
+        parents.
+        """
+        result = self.vm.qmp('nbd-server-start',
+                             addr={
+                                 'type': 'unix',
+                                 'data': {'path': nbd_sock_path}
+                             })
+        self.assert_qmp(result, 'return', {})
+
+        result = self.vm.qmp('nbd-server-add', device='img1')
+        self.assert_qmp(result, 'return', {})
+
+        result = self.vm.qmp('drive-mirror', job_id='mirror', device='quorum0',
+                             sync='full', node_name='repair0', replaces='img1',
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'error/desc',
+                        "Cannot replace 'img1' by a node mirrored from "
+                        "'quorum0', because it cannot be guaranteed that doing "
+                        "so would not lead to an abrupt change of visible data")
+
+    def test_with_other_parents_after_mirror_start(self):
+        """
+        The same as test_with_other_parent(), but add the NBD server
+        only when the mirror job is already running.
+        """
+        result = self.vm.qmp('nbd-server-start',
+                             addr={
+                                 'type': 'unix',
+                                 'data': {'path': nbd_sock_path}
+                             })
+        self.assert_qmp(result, 'return', {})
+
+        result = self.vm.qmp('drive-mirror', job_id='mirror', device='quorum0',
+                             sync='full', node_name='repair0', replaces='img1',
+                             target=quorum_repair_img, format=iotests.imgfmt)
+        self.assert_qmp(result, 'return', {})
+
+        result = self.vm.qmp('nbd-server-add', device='img1')
+        self.assert_qmp(result, 'return', {})
+
+        # The full error message goes to stderr, we will check it later
+        self.complete_and_wait('mirror',
+                               completion_error='Operation not permitted')
+
+        # Should not have been replaced
+        self.vm.assert_block_path('quorum0', '/children.1', 'img1')
+
+        # Check the full error message now
         self.vm.shutdown()
+        log = self.vm.get_log()
+        log = re.sub(r'^\[I \d+\.\d+\] OPENED\n', '', log)
+        log = re.sub(r'^Formatting.*\n', '', log)
+        log = re.sub(r'\n\[I \+\d+\.\d+\] CLOSED\n?$', '', log)
+        log = re.sub(r'^%s: ' % os.path.basename(iotests.qemu_prog), '', log)
+
+        self.assertEqual(log,
+                         "Can no longer replace 'img1' by 'repair0', because " +
+                         "it can no longer be guaranteed that doing so would " +
+                         "not lead to an abrupt change of visible data")
+
 
 # Test mirroring with a source that does not have any parents (not even a
 # BlockBackend)
@@ -1132,6 +1190,52 @@ class TestOrphanedSource(iotests.QMPTestCase):
         self.assertFalse('mirror-filter' in nodes,
                          'Mirror filter node did not disappear')
 
+# Test cases for @replaces that do not necessarily involve Quorum
+class TestReplaces(iotests.QMPTestCase):
+    # Each of these test cases needs their own block graph, so do not
+    # create any nodes here
+    def setUp(self):
+        self.vm = iotests.VM()
+        self.vm.launch()
+
+    def tearDown(self):
+        self.vm.shutdown()
+        for img in (test_img, target_img):
+            try:
+                os.remove(img)
+            except OSError:
+                pass
+
+    @iotests.skip_if_unsupported(['copy-on-read'])
+    def test_replace_filter(self):
+        """
+        Check that we can replace filter nodes.
+        """
+        result = self.vm.qmp('blockdev-add', **{
+                                 'driver': 'copy-on-read',
+                                 'node-name': 'filter0',
+                                 'file': {
+                                     'driver': 'copy-on-read',
+                                     'node-name': 'filter1',
+                                     'file': {
+                                         'driver': 'null-co'
+                                     }
+                                 }
+                             })
+        self.assert_qmp(result, 'return', {})
+
+        result = self.vm.qmp('blockdev-add',
+                             node_name='target', driver='null-co')
+        self.assert_qmp(result, 'return', {})
+
+        result = self.vm.qmp('blockdev-mirror', job_id='mirror', device='filter0',
+                             target='target', sync='full', replaces='filter1')
+        self.assert_qmp(result, 'return', {})
+
+        self.complete_and_wait('mirror')
+
+        self.vm.assert_block_path('filter0', '/file', 'target')
+
 if __name__ == '__main__':
     iotests.main(supported_fmts=['qcow2', 'qed'],
                  supported_protocols=['file'],
diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out
index f496be9197..877b76fd31 100644
--- a/tests/qemu-iotests/041.out
+++ b/tests/qemu-iotests/041.out
@@ -1,5 +1,5 @@
-...........................................................................................
+..............................................................................................
 ----------------------------------------------------------------------
-Ran 91 tests
+Ran 94 tests
 
 OK
diff --git a/tests/qemu-iotests/155 b/tests/qemu-iotests/155
index e35b1d534b..f237868710 100755
--- a/tests/qemu-iotests/155
+++ b/tests/qemu-iotests/155
@@ -163,12 +163,7 @@ class MirrorBaseClass(BaseClass):
 
         self.assert_qmp(result, 'return', {})
 
-        self.vm.event_wait('BLOCK_JOB_READY')
-
-        result = self.vm.qmp('block-job-complete', device='mirror-job')
-        self.assert_qmp(result, 'return', {})
-
-        self.vm.event_wait('BLOCK_JOB_COMPLETED')
+        self.complete_and_wait('mirror-job')
 
     def testFull(self):
         self.runMirror('full')
diff --git a/tests/qemu-iotests/244 b/tests/qemu-iotests/244
index 0d1efee6ef..2ec1815e6f 100755
--- a/tests/qemu-iotests/244
+++ b/tests/qemu-iotests/244
@@ -197,6 +197,20 @@ $QEMU_IO -c 'read -P 0x11 0 1M' -f $IMGFMT "$TEST_IMG" | _filter_qemu_io
 $QEMU_IMG map --output=human "$TEST_IMG" | _filter_testdir
 $QEMU_IMG map --output=json "$TEST_IMG"
 
+echo
+echo "=== Copy offloading ==="
+echo
+
+# Make use of copy offloading if the test host can provide it
+_make_test_img -o "data_file=$TEST_IMG.data" 64M
+$QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n -C "$TEST_IMG.src" "$TEST_IMG"
+$QEMU_IMG compare -f $IMGFMT -F $IMGFMT "$TEST_IMG.src" "$TEST_IMG"
+
+# blkdebug doesn't support copy offloading, so this tests the error path
+$QEMU_IMG amend -f $IMGFMT -o "data_file=blkdebug::$TEST_IMG.data" "$TEST_IMG"
+$QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n -C "$TEST_IMG.src" "$TEST_IMG"
+$QEMU_IMG compare -f $IMGFMT -F $IMGFMT "$TEST_IMG.src" "$TEST_IMG"
+
 # success, all done
 echo "*** done"
 rm -f $seq.full
diff --git a/tests/qemu-iotests/244.out b/tests/qemu-iotests/244.out
index 6a3d0067cc..e6f4dc7993 100644
--- a/tests/qemu-iotests/244.out
+++ b/tests/qemu-iotests/244.out
@@ -122,4 +122,10 @@ Offset          Length          Mapped to       File
 0               0x100000        0               TEST_DIR/t.qcow2.data
 [{ "start": 0, "length": 1048576, "depth": 0, "zero": false, "data": true, "offset": 0},
 { "start": 1048576, "length": 66060288, "depth": 0, "zero": true, "data": false}]
+
+=== Copy offloading ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 data_file=TEST_DIR/t.IMGFMT.data
+Images are identical.
+Images are identical.
 *** done
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 0473e824ed..8815052eb5 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -714,6 +714,65 @@ class VM(qtest.QEMUQtestMachine):
 
         return fields.items() <= ret.items()
 
+    def assert_block_path(self, root, path, expected_node, graph=None):
+        """
+        Check whether the node under the given path in the block graph
+        is @expected_node.
+
+        @root is the node name of the node where the @path is rooted.
+
+        @path is a string that consists of child names separated by
+        slashes.  It must begin with a slash.
+
+        Examples for @root + @path:
+          - root="qcow2-node", path="/backing/file"
+          - root="quorum-node", path="/children.2/file"
+
+        Hypothetically, @path could be empty, in which case it would
+        point to @root.  However, in practice this case is not useful
+        and hence not allowed.
+
+        @expected_node may be None.  (All elements of the path but the
+        leaf must still exist.)
+
+        @graph may be None or the result of an x-debug-query-block-graph
+        call that has already been performed.
+        """
+        if graph is None:
+            graph = self.qmp('x-debug-query-block-graph')['return']
+
+        iter_path = iter(path.split('/'))
+
+        # Must start with a /
+        assert next(iter_path) == ''
+
+        node = next((node for node in graph['nodes'] if node['name'] == root),
+                    None)
+
+        # An empty @path is not allowed, so the root node must be present
+        assert node is not None, 'Root node %s not found' % root
+
+        for child_name in iter_path:
+            assert node is not None, 'Cannot follow path %s%s' % (root, path)
+
+            try:
+                node_id = next(edge['child'] for edge in graph['edges'] \
+                                             if edge['parent'] == node['id'] and
+                                                edge['name'] == child_name)
+
+                node = next(node for node in graph['nodes'] \
+                                 if node['id'] == node_id)
+            except StopIteration:
+                node = None
+
+        if node is None:
+            assert expected_node is None, \
+                   'No node found under %s (but expected %s)' % \
+                   (path, expected_node)
+        else:
+            assert node['name'] == expected_node, \
+                   'Found node %s under %s (but expected %s)' % \
+                   (node['name'], path, expected_node)
 
 index_re = re.compile(r'([^\[]+)\[([^\]]+)\]')
author	Peter Maydell <peter.maydell@linaro.org>	2020-02-18 14:23:43 +0000
committer	Peter Maydell <peter.maydell@linaro.org>	2020-02-18 14:23:43 +0000
commit	672f9d0df10a68a5c5f2b32cbc8284abf9f5ee18 (patch)
tree	2288bb7887cf822679bf7d538ba4c085fa8d798a
parent	6c599282f8ab382fe59f03a6cae755b89561a7b3 (diff)
parent	c45a88f4429d7a8f384b75f3fd3fed5138a6edca (diff)