aboutsummaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/blkverify.c20
-rw-r--r--block/commit.c37
-rw-r--r--block/copy-on-read.c9
-rw-r--r--block/filter-compress.c9
-rw-r--r--block/io_uring.c2
-rw-r--r--block/mirror.c37
-rw-r--r--block/qcow2-bitmap.c1
-rw-r--r--block/qcow2-cluster.c7
-rw-r--r--block/qcow2-refcount.c1
-rw-r--r--block/qcow2-threads.c12
-rw-r--r--block/qcow2.c2
-rw-r--r--block/quorum.c70
-rw-r--r--block/replication.c7
-rw-r--r--block/throttle.c8
-rw-r--r--block/vvfat.c7
15 files changed, 125 insertions, 104 deletions
diff --git a/block/blkverify.c b/block/blkverify.c
index 304b0a1368..ba6b1853ae 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -268,18 +268,18 @@ static int blkverify_co_flush(BlockDriverState *bs)
return bdrv_co_flush(s->test_file->bs);
}
-static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
- BlockDriverState *candidate)
+static bool blkverify_recurse_can_replace(BlockDriverState *bs,
+ BlockDriverState *to_replace)
{
BDRVBlkverifyState *s = bs->opaque;
- bool perm = bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
-
- if (perm) {
- return true;
- }
-
- return bdrv_recurse_is_first_non_filter(s->test_file->bs, candidate);
+ /*
+ * blkverify quits the whole qemu process if there is a mismatch
+ * between bs->file->bs and s->test_file->bs. Therefore, we know
+ * know that both must match bs and we can recurse down to either.
+ */
+ return bdrv_recurse_can_replace(bs->file->bs, to_replace) ||
+ bdrv_recurse_can_replace(s->test_file->bs, to_replace);
}
static void blkverify_refresh_filename(BlockDriverState *bs)
@@ -327,7 +327,7 @@ static BlockDriver bdrv_blkverify = {
.bdrv_co_flush = blkverify_co_flush,
.is_filter = true,
- .bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter,
+ .bdrv_recurse_can_replace = blkverify_recurse_can_replace,
};
static void bdrv_blkverify_init(void)
diff --git a/block/commit.c b/block/commit.c
index 23c90b3b91..8e672799af 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -43,27 +43,6 @@ typedef struct CommitBlockJob {
char *backing_file_str;
} CommitBlockJob;
-static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base,
- int64_t offset, uint64_t bytes,
- void *buf)
-{
- int ret = 0;
-
- assert(bytes < SIZE_MAX);
-
- ret = blk_co_pread(bs, offset, bytes, buf, 0);
- if (ret < 0) {
- return ret;
- }
-
- ret = blk_co_pwrite(base, offset, bytes, buf, 0);
- if (ret < 0) {
- return ret;
- }
-
- return 0;
-}
-
static int commit_prepare(Job *job)
{
CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
@@ -140,7 +119,6 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
int ret = 0;
int64_t n = 0; /* bytes */
void *buf = NULL;
- int bytes_written = 0;
int64_t len, base_len;
ret = len = blk_getlength(s->top);
@@ -165,6 +143,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
for (offset = 0; offset < len; offset += n) {
bool copy;
+ bool error_in_source = true;
/* Note that even when no rate limit is applied we need to yield
* with no pending I/O here so that bdrv_drain_all() returns.
@@ -179,12 +158,20 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
copy = (ret == 1);
trace_commit_one_iteration(s, offset, n, ret);
if (copy) {
- ret = commit_populate(s->top, s->base, offset, n, buf);
- bytes_written += n;
+ assert(n < SIZE_MAX);
+
+ ret = blk_co_pread(s->top, offset, n, buf, 0);
+ if (ret >= 0) {
+ ret = blk_co_pwrite(s->base, offset, n, buf, 0);
+ if (ret < 0) {
+ error_in_source = false;
+ }
+ }
}
if (ret < 0) {
BlockErrorAction action =
- block_job_error_action(&s->common, false, s->on_error, -ret);
+ block_job_error_action(&s->common, s->on_error,
+ error_in_source, -ret);
if (action == BLOCK_ERROR_ACTION_REPORT) {
goto out;
} else {
diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index e95223d3cb..242d3ff055 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -118,13 +118,6 @@ static void cor_lock_medium(BlockDriverState *bs, bool locked)
}
-static bool cor_recurse_is_first_non_filter(BlockDriverState *bs,
- BlockDriverState *candidate)
-{
- return bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
-}
-
-
static BlockDriver bdrv_copy_on_read = {
.format_name = "copy-on-read",
@@ -143,8 +136,6 @@ static BlockDriver bdrv_copy_on_read = {
.bdrv_co_block_status = bdrv_co_block_status_from_file,
- .bdrv_recurse_is_first_non_filter = cor_recurse_is_first_non_filter,
-
.has_variable_length = true,
.is_filter = true,
};
diff --git a/block/filter-compress.c b/block/filter-compress.c
index 60137fb680..82c315b298 100644
--- a/block/filter-compress.c
+++ b/block/filter-compress.c
@@ -128,13 +128,6 @@ static void compress_lock_medium(BlockDriverState *bs, bool locked)
}
-static bool compress_recurse_is_first_non_filter(BlockDriverState *bs,
- BlockDriverState *candidate)
-{
- return bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
-}
-
-
static BlockDriver bdrv_compress = {
.format_name = "compress",
@@ -154,8 +147,6 @@ static BlockDriver bdrv_compress = {
.bdrv_co_block_status = bdrv_co_block_status_from_file,
- .bdrv_recurse_is_first_non_filter = compress_recurse_is_first_non_filter,
-
.has_variable_length = true,
.is_filter = true,
};
diff --git a/block/io_uring.c b/block/io_uring.c
index 56892fd1ab..a3142ca989 100644
--- a/block/io_uring.c
+++ b/block/io_uring.c
@@ -187,7 +187,7 @@ static void luring_process_completions(LuringState *s)
ret = 0;
}
} else {
- ret = -ENOSPC;;
+ ret = -ENOSPC;
}
}
end:
diff --git a/block/mirror.c b/block/mirror.c
index f0f2d9dff1..447051dbc6 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -103,6 +103,7 @@ struct MirrorOp {
bool is_pseudo_op;
bool is_active_write;
CoQueue waiting_requests;
+ Coroutine *co;
QTAILQ_ENTRY(MirrorOp) next;
};
@@ -282,11 +283,14 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset,
}
static inline void coroutine_fn
-mirror_wait_for_any_operation(MirrorBlockJob *s, bool active)
+mirror_wait_for_any_operation(MirrorBlockJob *s, MirrorOp *self, bool active)
{
MirrorOp *op;
QTAILQ_FOREACH(op, &s->ops_in_flight, next) {
+ if (self == op) {
+ continue;
+ }
/* Do not wait on pseudo ops, because it may in turn wait on
* some other operation to start, which may in fact be the
* caller of this function. Since there is only one pseudo op
@@ -301,10 +305,10 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, bool active)
}
static inline void coroutine_fn
-mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s)
+mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s, MirrorOp *self)
{
/* Only non-active operations use up in-flight slots */
- mirror_wait_for_any_operation(s, false);
+ mirror_wait_for_any_operation(s, self, false);
}
/* Perform a mirror copy operation.
@@ -347,7 +351,7 @@ static void coroutine_fn mirror_co_read(void *opaque)
while (s->buf_free_count < nb_chunks) {
trace_mirror_yield_in_flight(s, op->offset, s->in_flight);
- mirror_wait_for_free_in_flight_slot(s);
+ mirror_wait_for_free_in_flight_slot(s, op);
}
/* Now make a QEMUIOVector taking enough granularity-sized chunks
@@ -429,6 +433,7 @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset,
default:
abort();
}
+ op->co = co;
QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next);
qemu_coroutine_enter(co);
@@ -553,7 +558,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
while (s->in_flight >= MAX_IN_FLIGHT) {
trace_mirror_yield_in_flight(s, offset, s->in_flight);
- mirror_wait_for_free_in_flight_slot(s);
+ mirror_wait_for_free_in_flight_slot(s, pseudo_op);
}
if (s->ret < 0) {
@@ -607,7 +612,7 @@ static void mirror_free_init(MirrorBlockJob *s)
static void coroutine_fn mirror_wait_for_all_io(MirrorBlockJob *s)
{
while (s->in_flight > 0) {
- mirror_wait_for_free_in_flight_slot(s);
+ mirror_wait_for_free_in_flight_slot(s, NULL);
}
}
@@ -695,7 +700,19 @@ static int mirror_exit_common(Job *job)
* drain potential other users of the BDS before changing the graph. */
assert(s->in_drain);
bdrv_drained_begin(target_bs);
- bdrv_replace_node(to_replace, target_bs, &local_err);
+ /*
+ * Cannot use check_to_replace_node() here, because that would
+ * check for an op blocker on @to_replace, and we have our own
+ * there.
+ */
+ if (bdrv_recurse_can_replace(src, to_replace)) {
+ bdrv_replace_node(to_replace, target_bs, &local_err);
+ } else {
+ error_setg(&local_err, "Can no longer replace '%s' by '%s', "
+ "because it can no longer be guaranteed that doing so "
+ "would not lead to an abrupt change of visible data",
+ to_replace->node_name, target_bs->node_name);
+ }
bdrv_drained_end(target_bs);
if (local_err) {
error_report_err(local_err);
@@ -792,7 +809,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
if (s->in_flight >= MAX_IN_FLIGHT) {
trace_mirror_yield(s, UINT64_MAX, s->buf_free_count,
s->in_flight);
- mirror_wait_for_free_in_flight_slot(s);
+ mirror_wait_for_free_in_flight_slot(s, NULL);
continue;
}
@@ -945,7 +962,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
/* Do not start passive operations while there are active
* writes in progress */
while (s->in_active_write_counter) {
- mirror_wait_for_any_operation(s, true);
+ mirror_wait_for_any_operation(s, NULL, true);
}
if (s->ret < 0) {
@@ -971,7 +988,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 ||
(cnt == 0 && s->in_flight > 0)) {
trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight);
- mirror_wait_for_free_in_flight_slot(s);
+ mirror_wait_for_free_in_flight_slot(s, NULL);
continue;
} else if (cnt != 0) {
delay_ns = mirror_iteration(s);
diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index d41f5d049b..8cccc2c9f3 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -647,7 +647,6 @@ static Qcow2BitmapList *bitmap_list_load(BlockDriverState *bs, uint64_t offset,
return bm_list;
broken_dir:
- ret = -EINVAL;
error_setg(errp, "Broken bitmap directory");
fail:
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 1947f13a2d..78c95dfa16 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1026,8 +1026,11 @@ err:
void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m)
{
BDRVQcow2State *s = bs->opaque;
- qcow2_free_clusters(bs, m->alloc_offset, m->nb_clusters << s->cluster_bits,
- QCOW2_DISCARD_NEVER);
+ if (!has_data_file(bs)) {
+ qcow2_free_clusters(bs, m->alloc_offset,
+ m->nb_clusters << s->cluster_bits,
+ QCOW2_DISCARD_NEVER);
+ }
}
/*
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index c963bc8de1..7ef1c0e42a 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -889,6 +889,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
offset);
if (table != NULL) {
qcow2_cache_put(s->refcount_block_cache, &refcount_block);
+ old_table_index = -1;
qcow2_cache_discard(s->refcount_block_cache, table);
}
diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c
index 8f5a0d1ebe..77bb578cdf 100644
--- a/block/qcow2-threads.c
+++ b/block/qcow2-threads.c
@@ -246,12 +246,15 @@ qcow2_co_encdec(BlockDriverState *bs, uint64_t host_offset,
.len = len,
.func = func,
};
+ uint64_t sector_size;
- assert(QEMU_IS_ALIGNED(guest_offset, BDRV_SECTOR_SIZE));
- assert(QEMU_IS_ALIGNED(host_offset, BDRV_SECTOR_SIZE));
- assert(QEMU_IS_ALIGNED(len, BDRV_SECTOR_SIZE));
assert(s->crypto);
+ sector_size = qcrypto_block_get_sector_size(s->crypto);
+ assert(QEMU_IS_ALIGNED(guest_offset, sector_size));
+ assert(QEMU_IS_ALIGNED(host_offset, sector_size));
+ assert(QEMU_IS_ALIGNED(len, sector_size));
+
return len == 0 ? 0 : qcow2_co_process(bs, qcow2_encdec_pool_func, &arg);
}
@@ -270,7 +273,8 @@ qcow2_co_encdec(BlockDriverState *bs, uint64_t host_offset,
* will be written to the underlying storage device at
* @host_offset
*
- * @len - length of the buffer (must be a BDRV_SECTOR_SIZE multiple)
+ * @len - length of the buffer (must be a multiple of the encryption
+ * sector size)
*
* Depending on the encryption method, @host_offset and/or @guest_offset
* may be used for generating the initialization vector for
diff --git a/block/qcow2.c b/block/qcow2.c
index ef96606f8d..8dcee5efec 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2068,8 +2068,6 @@ qcow2_co_preadv_encrypted(BlockDriverState *bs,
goto fail;
}
- assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
- assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
if (qcow2_co_decrypt(bs,
file_cluster_offset + offset_into_cluster(s, offset),
offset, buf, bytes) < 0)
diff --git a/block/quorum.c b/block/quorum.c
index df68adcfaa..6d7a56bd93 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -796,17 +796,53 @@ static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
return result;
}
-static bool quorum_recurse_is_first_non_filter(BlockDriverState *bs,
- BlockDriverState *candidate)
+static bool quorum_recurse_can_replace(BlockDriverState *bs,
+ BlockDriverState *to_replace)
{
BDRVQuorumState *s = bs->opaque;
int i;
for (i = 0; i < s->num_children; i++) {
- bool perm = bdrv_recurse_is_first_non_filter(s->children[i]->bs,
- candidate);
- if (perm) {
- return true;
+ /*
+ * We have no idea whether our children show the same data as
+ * this node (@bs). It is actually highly likely that
+ * @to_replace does not, because replacing a broken child is
+ * one of the main use cases here.
+ *
+ * We do know that the new BDS will match @bs, so replacing
+ * any of our children by it will be safe. It cannot change
+ * the data this quorum node presents to its parents.
+ *
+ * However, replacing @to_replace by @bs in any of our
+ * children's chains may change visible data somewhere in
+ * there. We therefore cannot recurse down those chains with
+ * bdrv_recurse_can_replace().
+ * (More formally, bdrv_recurse_can_replace() requires that
+ * @to_replace will be replaced by something matching the @bs
+ * passed to it. We cannot guarantee that.)
+ *
+ * Thus, we can only check whether any of our immediate
+ * children matches @to_replace.
+ *
+ * (In the future, we might add a function to recurse down a
+ * chain that checks that nothing there cares about a change
+ * in data from the respective child in question. For
+ * example, most filters do not care when their child's data
+ * suddenly changes, as long as their parents do not care.)
+ */
+ if (s->children[i]->bs == to_replace) {
+ /*
+ * We now have to ensure that there is no other parent
+ * that cares about replacing this child by a node with
+ * potentially different data.
+ * We do so by checking whether there are any other parents
+ * at all, which is stricter than necessary, but also very
+ * simple. (We may decide to implement something more
+ * complex and permissive when there is an actual need for
+ * it.)
+ */
+ return QLIST_FIRST(&to_replace->parents) == s->children[i] &&
+ QLIST_NEXT(s->children[i], next_parent) == NULL;
}
}
@@ -1114,6 +1150,23 @@ static char *quorum_dirname(BlockDriverState *bs, Error **errp)
return NULL;
}
+static void quorum_child_perm(BlockDriverState *bs, BdrvChild *c,
+ const BdrvChildRole *role,
+ BlockReopenQueue *reopen_queue,
+ uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared)
+{
+ *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
+
+ /*
+ * We cannot share RESIZE or WRITE, as this would make the
+ * children differ from each other.
+ */
+ *nshared = (shared & (BLK_PERM_CONSISTENT_READ |
+ BLK_PERM_WRITE_UNCHANGED))
+ | DEFAULT_PERM_UNCHANGED;
+}
+
static const char *const quorum_strong_runtime_opts[] = {
QUORUM_OPT_VOTE_THRESHOLD,
QUORUM_OPT_BLKVERIFY,
@@ -1143,10 +1196,9 @@ static BlockDriver bdrv_quorum = {
.bdrv_add_child = quorum_add_child,
.bdrv_del_child = quorum_del_child,
- .bdrv_child_perm = bdrv_filter_default_perms,
+ .bdrv_child_perm = quorum_child_perm,
- .is_filter = true,
- .bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter,
+ .bdrv_recurse_can_replace = quorum_recurse_can_replace,
.strong_runtime_opts = quorum_strong_runtime_opts,
};
diff --git a/block/replication.c b/block/replication.c
index 99532ce521..d6681b6c84 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -306,12 +306,6 @@ out:
return ret;
}
-static bool replication_recurse_is_first_non_filter(BlockDriverState *bs,
- BlockDriverState *candidate)
-{
- return bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
-}
-
static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp)
{
Error *local_err = NULL;
@@ -699,7 +693,6 @@ static BlockDriver bdrv_replication = {
.bdrv_co_writev = replication_co_writev,
.is_filter = true,
- .bdrv_recurse_is_first_non_filter = replication_recurse_is_first_non_filter,
.has_variable_length = true,
.strong_runtime_opts = replication_strong_runtime_opts,
diff --git a/block/throttle.c b/block/throttle.c
index 0349f42257..71f4bb0ad1 100644
--- a/block/throttle.c
+++ b/block/throttle.c
@@ -207,12 +207,6 @@ static void throttle_reopen_abort(BDRVReopenState *reopen_state)
reopen_state->opaque = NULL;
}
-static bool throttle_recurse_is_first_non_filter(BlockDriverState *bs,
- BlockDriverState *candidate)
-{
- return bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
-}
-
static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs)
{
ThrottleGroupMember *tgm = bs->opaque;
@@ -252,8 +246,6 @@ static BlockDriver bdrv_throttle = {
.bdrv_co_pwrite_zeroes = throttle_co_pwrite_zeroes,
.bdrv_co_pdiscard = throttle_co_pdiscard,
- .bdrv_recurse_is_first_non_filter = throttle_recurse_is_first_non_filter,
-
.bdrv_attach_aio_context = throttle_attach_aio_context,
.bdrv_detach_aio_context = throttle_detach_aio_context,
diff --git a/block/vvfat.c b/block/vvfat.c
index 019b8f1341..ab800c4887 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -3124,17 +3124,10 @@ write_target_commit(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
return ret;
}
-static void write_target_close(BlockDriverState *bs) {
- BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque);
- bdrv_unref_child(s->bs, s->qcow);
- g_free(s->qcow_filename);
-}
-
static BlockDriver vvfat_write_target = {
.format_name = "vvfat_write_target",
.instance_size = sizeof(void*),
.bdrv_co_pwritev = write_target_commit,
- .bdrv_close = write_target_close,
};
static void vvfat_qcow_options(int *child_flags, QDict *child_options,