diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/blkverify.c | 20 | ||||
-rw-r--r-- | block/commit.c | 37 | ||||
-rw-r--r-- | block/copy-on-read.c | 9 | ||||
-rw-r--r-- | block/filter-compress.c | 9 | ||||
-rw-r--r-- | block/io_uring.c | 2 | ||||
-rw-r--r-- | block/mirror.c | 37 | ||||
-rw-r--r-- | block/qcow2-bitmap.c | 1 | ||||
-rw-r--r-- | block/qcow2-cluster.c | 7 | ||||
-rw-r--r-- | block/qcow2-refcount.c | 1 | ||||
-rw-r--r-- | block/qcow2-threads.c | 12 | ||||
-rw-r--r-- | block/qcow2.c | 2 | ||||
-rw-r--r-- | block/quorum.c | 70 | ||||
-rw-r--r-- | block/replication.c | 7 | ||||
-rw-r--r-- | block/throttle.c | 8 | ||||
-rw-r--r-- | block/vvfat.c | 7 |
15 files changed, 125 insertions, 104 deletions
diff --git a/block/blkverify.c b/block/blkverify.c index 304b0a1368..ba6b1853ae 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -268,18 +268,18 @@ static int blkverify_co_flush(BlockDriverState *bs) return bdrv_co_flush(s->test_file->bs); } -static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs, - BlockDriverState *candidate) +static bool blkverify_recurse_can_replace(BlockDriverState *bs, + BlockDriverState *to_replace) { BDRVBlkverifyState *s = bs->opaque; - bool perm = bdrv_recurse_is_first_non_filter(bs->file->bs, candidate); - - if (perm) { - return true; - } - - return bdrv_recurse_is_first_non_filter(s->test_file->bs, candidate); + /* + * blkverify quits the whole qemu process if there is a mismatch + * between bs->file->bs and s->test_file->bs. Therefore, we know + * know that both must match bs and we can recurse down to either. + */ + return bdrv_recurse_can_replace(bs->file->bs, to_replace) || + bdrv_recurse_can_replace(s->test_file->bs, to_replace); } static void blkverify_refresh_filename(BlockDriverState *bs) @@ -327,7 +327,7 @@ static BlockDriver bdrv_blkverify = { .bdrv_co_flush = blkverify_co_flush, .is_filter = true, - .bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter, + .bdrv_recurse_can_replace = blkverify_recurse_can_replace, }; static void bdrv_blkverify_init(void) diff --git a/block/commit.c b/block/commit.c index 23c90b3b91..8e672799af 100644 --- a/block/commit.c +++ b/block/commit.c @@ -43,27 +43,6 @@ typedef struct CommitBlockJob { char *backing_file_str; } CommitBlockJob; -static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base, - int64_t offset, uint64_t bytes, - void *buf) -{ - int ret = 0; - - assert(bytes < SIZE_MAX); - - ret = blk_co_pread(bs, offset, bytes, buf, 0); - if (ret < 0) { - return ret; - } - - ret = blk_co_pwrite(base, offset, bytes, buf, 0); - if (ret < 0) { - return ret; - } - - return 0; -} - static int commit_prepare(Job *job) { CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); @@ -140,7 +119,6 @@ static int coroutine_fn commit_run(Job *job, Error **errp) int ret = 0; int64_t n = 0; /* bytes */ void *buf = NULL; - int bytes_written = 0; int64_t len, base_len; ret = len = blk_getlength(s->top); @@ -165,6 +143,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp) for (offset = 0; offset < len; offset += n) { bool copy; + bool error_in_source = true; /* Note that even when no rate limit is applied we need to yield * with no pending I/O here so that bdrv_drain_all() returns. @@ -179,12 +158,20 @@ static int coroutine_fn commit_run(Job *job, Error **errp) copy = (ret == 1); trace_commit_one_iteration(s, offset, n, ret); if (copy) { - ret = commit_populate(s->top, s->base, offset, n, buf); - bytes_written += n; + assert(n < SIZE_MAX); + + ret = blk_co_pread(s->top, offset, n, buf, 0); + if (ret >= 0) { + ret = blk_co_pwrite(s->base, offset, n, buf, 0); + if (ret < 0) { + error_in_source = false; + } + } } if (ret < 0) { BlockErrorAction action = - block_job_error_action(&s->common, false, s->on_error, -ret); + block_job_error_action(&s->common, s->on_error, + error_in_source, -ret); if (action == BLOCK_ERROR_ACTION_REPORT) { goto out; } else { diff --git a/block/copy-on-read.c b/block/copy-on-read.c index e95223d3cb..242d3ff055 100644 --- a/block/copy-on-read.c +++ b/block/copy-on-read.c @@ -118,13 +118,6 @@ static void cor_lock_medium(BlockDriverState *bs, bool locked) } -static bool cor_recurse_is_first_non_filter(BlockDriverState *bs, - BlockDriverState *candidate) -{ - return bdrv_recurse_is_first_non_filter(bs->file->bs, candidate); -} - - static BlockDriver bdrv_copy_on_read = { .format_name = "copy-on-read", @@ -143,8 +136,6 @@ static BlockDriver bdrv_copy_on_read = { .bdrv_co_block_status = bdrv_co_block_status_from_file, - .bdrv_recurse_is_first_non_filter = cor_recurse_is_first_non_filter, - .has_variable_length = true, .is_filter = true, }; diff --git a/block/filter-compress.c b/block/filter-compress.c index 60137fb680..82c315b298 100644 --- a/block/filter-compress.c +++ b/block/filter-compress.c @@ -128,13 +128,6 @@ static void compress_lock_medium(BlockDriverState *bs, bool locked) } -static bool compress_recurse_is_first_non_filter(BlockDriverState *bs, - BlockDriverState *candidate) -{ - return bdrv_recurse_is_first_non_filter(bs->file->bs, candidate); -} - - static BlockDriver bdrv_compress = { .format_name = "compress", @@ -154,8 +147,6 @@ static BlockDriver bdrv_compress = { .bdrv_co_block_status = bdrv_co_block_status_from_file, - .bdrv_recurse_is_first_non_filter = compress_recurse_is_first_non_filter, - .has_variable_length = true, .is_filter = true, }; diff --git a/block/io_uring.c b/block/io_uring.c index 56892fd1ab..a3142ca989 100644 --- a/block/io_uring.c +++ b/block/io_uring.c @@ -187,7 +187,7 @@ static void luring_process_completions(LuringState *s) ret = 0; } } else { - ret = -ENOSPC;; + ret = -ENOSPC; } } end: diff --git a/block/mirror.c b/block/mirror.c index f0f2d9dff1..447051dbc6 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -103,6 +103,7 @@ struct MirrorOp { bool is_pseudo_op; bool is_active_write; CoQueue waiting_requests; + Coroutine *co; QTAILQ_ENTRY(MirrorOp) next; }; @@ -282,11 +283,14 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset, } static inline void coroutine_fn -mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) +mirror_wait_for_any_operation(MirrorBlockJob *s, MirrorOp *self, bool active) { MirrorOp *op; QTAILQ_FOREACH(op, &s->ops_in_flight, next) { + if (self == op) { + continue; + } /* Do not wait on pseudo ops, because it may in turn wait on * some other operation to start, which may in fact be the * caller of this function. Since there is only one pseudo op @@ -301,10 +305,10 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) } static inline void coroutine_fn -mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s) +mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s, MirrorOp *self) { /* Only non-active operations use up in-flight slots */ - mirror_wait_for_any_operation(s, false); + mirror_wait_for_any_operation(s, self, false); } /* Perform a mirror copy operation. @@ -347,7 +351,7 @@ static void coroutine_fn mirror_co_read(void *opaque) while (s->buf_free_count < nb_chunks) { trace_mirror_yield_in_flight(s, op->offset, s->in_flight); - mirror_wait_for_free_in_flight_slot(s); + mirror_wait_for_free_in_flight_slot(s, op); } /* Now make a QEMUIOVector taking enough granularity-sized chunks @@ -429,6 +433,7 @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset, default: abort(); } + op->co = co; QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next); qemu_coroutine_enter(co); @@ -553,7 +558,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) while (s->in_flight >= MAX_IN_FLIGHT) { trace_mirror_yield_in_flight(s, offset, s->in_flight); - mirror_wait_for_free_in_flight_slot(s); + mirror_wait_for_free_in_flight_slot(s, pseudo_op); } if (s->ret < 0) { @@ -607,7 +612,7 @@ static void mirror_free_init(MirrorBlockJob *s) static void coroutine_fn mirror_wait_for_all_io(MirrorBlockJob *s) { while (s->in_flight > 0) { - mirror_wait_for_free_in_flight_slot(s); + mirror_wait_for_free_in_flight_slot(s, NULL); } } @@ -695,7 +700,19 @@ static int mirror_exit_common(Job *job) * drain potential other users of the BDS before changing the graph. */ assert(s->in_drain); bdrv_drained_begin(target_bs); - bdrv_replace_node(to_replace, target_bs, &local_err); + /* + * Cannot use check_to_replace_node() here, because that would + * check for an op blocker on @to_replace, and we have our own + * there. + */ + if (bdrv_recurse_can_replace(src, to_replace)) { + bdrv_replace_node(to_replace, target_bs, &local_err); + } else { + error_setg(&local_err, "Can no longer replace '%s' by '%s', " + "because it can no longer be guaranteed that doing so " + "would not lead to an abrupt change of visible data", + to_replace->node_name, target_bs->node_name); + } bdrv_drained_end(target_bs); if (local_err) { error_report_err(local_err); @@ -792,7 +809,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) if (s->in_flight >= MAX_IN_FLIGHT) { trace_mirror_yield(s, UINT64_MAX, s->buf_free_count, s->in_flight); - mirror_wait_for_free_in_flight_slot(s); + mirror_wait_for_free_in_flight_slot(s, NULL); continue; } @@ -945,7 +962,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) /* Do not start passive operations while there are active * writes in progress */ while (s->in_active_write_counter) { - mirror_wait_for_any_operation(s, true); + mirror_wait_for_any_operation(s, NULL, true); } if (s->ret < 0) { @@ -971,7 +988,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 || (cnt == 0 && s->in_flight > 0)) { trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight); - mirror_wait_for_free_in_flight_slot(s); + mirror_wait_for_free_in_flight_slot(s, NULL); continue; } else if (cnt != 0) { delay_ns = mirror_iteration(s); diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c index d41f5d049b..8cccc2c9f3 100644 --- a/block/qcow2-bitmap.c +++ b/block/qcow2-bitmap.c @@ -647,7 +647,6 @@ static Qcow2BitmapList *bitmap_list_load(BlockDriverState *bs, uint64_t offset, return bm_list; broken_dir: - ret = -EINVAL; error_setg(errp, "Broken bitmap directory"); fail: diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 1947f13a2d..78c95dfa16 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -1026,8 +1026,11 @@ err: void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m) { BDRVQcow2State *s = bs->opaque; - qcow2_free_clusters(bs, m->alloc_offset, m->nb_clusters << s->cluster_bits, - QCOW2_DISCARD_NEVER); + if (!has_data_file(bs)) { + qcow2_free_clusters(bs, m->alloc_offset, + m->nb_clusters << s->cluster_bits, + QCOW2_DISCARD_NEVER); + } } /* diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index c963bc8de1..7ef1c0e42a 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -889,6 +889,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, offset); if (table != NULL) { qcow2_cache_put(s->refcount_block_cache, &refcount_block); + old_table_index = -1; qcow2_cache_discard(s->refcount_block_cache, table); } diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c index 8f5a0d1ebe..77bb578cdf 100644 --- a/block/qcow2-threads.c +++ b/block/qcow2-threads.c @@ -246,12 +246,15 @@ qcow2_co_encdec(BlockDriverState *bs, uint64_t host_offset, .len = len, .func = func, }; + uint64_t sector_size; - assert(QEMU_IS_ALIGNED(guest_offset, BDRV_SECTOR_SIZE)); - assert(QEMU_IS_ALIGNED(host_offset, BDRV_SECTOR_SIZE)); - assert(QEMU_IS_ALIGNED(len, BDRV_SECTOR_SIZE)); assert(s->crypto); + sector_size = qcrypto_block_get_sector_size(s->crypto); + assert(QEMU_IS_ALIGNED(guest_offset, sector_size)); + assert(QEMU_IS_ALIGNED(host_offset, sector_size)); + assert(QEMU_IS_ALIGNED(len, sector_size)); + return len == 0 ? 0 : qcow2_co_process(bs, qcow2_encdec_pool_func, &arg); } @@ -270,7 +273,8 @@ qcow2_co_encdec(BlockDriverState *bs, uint64_t host_offset, * will be written to the underlying storage device at * @host_offset * - * @len - length of the buffer (must be a BDRV_SECTOR_SIZE multiple) + * @len - length of the buffer (must be a multiple of the encryption + * sector size) * * Depending on the encryption method, @host_offset and/or @guest_offset * may be used for generating the initialization vector for diff --git a/block/qcow2.c b/block/qcow2.c index ef96606f8d..8dcee5efec 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2068,8 +2068,6 @@ qcow2_co_preadv_encrypted(BlockDriverState *bs, goto fail; } - assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)); - assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); if (qcow2_co_decrypt(bs, file_cluster_offset + offset_into_cluster(s, offset), offset, buf, bytes) < 0) diff --git a/block/quorum.c b/block/quorum.c index df68adcfaa..6d7a56bd93 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -796,17 +796,53 @@ static coroutine_fn int quorum_co_flush(BlockDriverState *bs) return result; } -static bool quorum_recurse_is_first_non_filter(BlockDriverState *bs, - BlockDriverState *candidate) +static bool quorum_recurse_can_replace(BlockDriverState *bs, + BlockDriverState *to_replace) { BDRVQuorumState *s = bs->opaque; int i; for (i = 0; i < s->num_children; i++) { - bool perm = bdrv_recurse_is_first_non_filter(s->children[i]->bs, - candidate); - if (perm) { - return true; + /* + * We have no idea whether our children show the same data as + * this node (@bs). It is actually highly likely that + * @to_replace does not, because replacing a broken child is + * one of the main use cases here. + * + * We do know that the new BDS will match @bs, so replacing + * any of our children by it will be safe. It cannot change + * the data this quorum node presents to its parents. + * + * However, replacing @to_replace by @bs in any of our + * children's chains may change visible data somewhere in + * there. We therefore cannot recurse down those chains with + * bdrv_recurse_can_replace(). + * (More formally, bdrv_recurse_can_replace() requires that + * @to_replace will be replaced by something matching the @bs + * passed to it. We cannot guarantee that.) + * + * Thus, we can only check whether any of our immediate + * children matches @to_replace. + * + * (In the future, we might add a function to recurse down a + * chain that checks that nothing there cares about a change + * in data from the respective child in question. For + * example, most filters do not care when their child's data + * suddenly changes, as long as their parents do not care.) + */ + if (s->children[i]->bs == to_replace) { + /* + * We now have to ensure that there is no other parent + * that cares about replacing this child by a node with + * potentially different data. + * We do so by checking whether there are any other parents + * at all, which is stricter than necessary, but also very + * simple. (We may decide to implement something more + * complex and permissive when there is an actual need for + * it.) + */ + return QLIST_FIRST(&to_replace->parents) == s->children[i] && + QLIST_NEXT(s->children[i], next_parent) == NULL; } } @@ -1114,6 +1150,23 @@ static char *quorum_dirname(BlockDriverState *bs, Error **errp) return NULL; } +static void quorum_child_perm(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + BlockReopenQueue *reopen_queue, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + *nperm = perm & DEFAULT_PERM_PASSTHROUGH; + + /* + * We cannot share RESIZE or WRITE, as this would make the + * children differ from each other. + */ + *nshared = (shared & (BLK_PERM_CONSISTENT_READ | + BLK_PERM_WRITE_UNCHANGED)) + | DEFAULT_PERM_UNCHANGED; +} + static const char *const quorum_strong_runtime_opts[] = { QUORUM_OPT_VOTE_THRESHOLD, QUORUM_OPT_BLKVERIFY, @@ -1143,10 +1196,9 @@ static BlockDriver bdrv_quorum = { .bdrv_add_child = quorum_add_child, .bdrv_del_child = quorum_del_child, - .bdrv_child_perm = bdrv_filter_default_perms, + .bdrv_child_perm = quorum_child_perm, - .is_filter = true, - .bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter, + .bdrv_recurse_can_replace = quorum_recurse_can_replace, .strong_runtime_opts = quorum_strong_runtime_opts, }; diff --git a/block/replication.c b/block/replication.c index 99532ce521..d6681b6c84 100644 --- a/block/replication.c +++ b/block/replication.c @@ -306,12 +306,6 @@ out: return ret; } -static bool replication_recurse_is_first_non_filter(BlockDriverState *bs, - BlockDriverState *candidate) -{ - return bdrv_recurse_is_first_non_filter(bs->file->bs, candidate); -} - static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp) { Error *local_err = NULL; @@ -699,7 +693,6 @@ static BlockDriver bdrv_replication = { .bdrv_co_writev = replication_co_writev, .is_filter = true, - .bdrv_recurse_is_first_non_filter = replication_recurse_is_first_non_filter, .has_variable_length = true, .strong_runtime_opts = replication_strong_runtime_opts, diff --git a/block/throttle.c b/block/throttle.c index 0349f42257..71f4bb0ad1 100644 --- a/block/throttle.c +++ b/block/throttle.c @@ -207,12 +207,6 @@ static void throttle_reopen_abort(BDRVReopenState *reopen_state) reopen_state->opaque = NULL; } -static bool throttle_recurse_is_first_non_filter(BlockDriverState *bs, - BlockDriverState *candidate) -{ - return bdrv_recurse_is_first_non_filter(bs->file->bs, candidate); -} - static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs) { ThrottleGroupMember *tgm = bs->opaque; @@ -252,8 +246,6 @@ static BlockDriver bdrv_throttle = { .bdrv_co_pwrite_zeroes = throttle_co_pwrite_zeroes, .bdrv_co_pdiscard = throttle_co_pdiscard, - .bdrv_recurse_is_first_non_filter = throttle_recurse_is_first_non_filter, - .bdrv_attach_aio_context = throttle_attach_aio_context, .bdrv_detach_aio_context = throttle_detach_aio_context, diff --git a/block/vvfat.c b/block/vvfat.c index 019b8f1341..ab800c4887 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -3124,17 +3124,10 @@ write_target_commit(BlockDriverState *bs, uint64_t offset, uint64_t bytes, return ret; } -static void write_target_close(BlockDriverState *bs) { - BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque); - bdrv_unref_child(s->bs, s->qcow); - g_free(s->qcow_filename); -} - static BlockDriver vvfat_write_target = { .format_name = "vvfat_write_target", .instance_size = sizeof(void*), .bdrv_co_pwritev = write_target_commit, - .bdrv_close = write_target_close, }; static void vvfat_qcow_options(int *child_flags, QDict *child_options, |