aboutsummaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorStefan Hajnoczi <stefanha@redhat.com>2023-09-21 09:31:27 -0400
committerStefan Hajnoczi <stefanha@redhat.com>2023-09-21 09:31:28 -0400
commit3da71a2111de9f0bc475f8292d009265ab34365f (patch)
tree102ffb4b18cea24d8b75a0d7f22f0eea14d3ba8b /block
parentf2df7e7705e832a8a65422c227e9ef1bdac226c1 (diff)
parentc428b392590df6364a025d5841e3e8a589ebfd4a (diff)
Merge tag 'for-upstream' of https://repo.or.cz/qemu/kevin into staging
Block layer patches - Graph locking part 4 (node management) - qemu-img map: report compressed data blocks - block-backend: process I/O in the current AioContext # -----BEGIN PGP SIGNATURE----- # # iQJFBAABCAAvFiEE3D3rFZqa+V09dFb+fwmycsiPL9YFAmULHnURHGt3b2xmQHJl # ZGhhdC5jb20ACgkQfwmycsiPL9aB5hAAqH8To7WIUtg1rj1PY809ck78ghm18PKg # TNdN7IbrXQghX5foh2VgPwVVl+JaW2CSrJYWQcAO6AbvFduNIi9iKzI6RT0xKXpb # b8oQXS7zntFzwBv8ohOU5NSVJOgVmNP4h5qJIMmXgB9ZcLFG40zggVH2qQT7guUf # 9MAc81kI/d5vvSHY0ZjdHjNOgwG4q1j8yytL7OFqWUfB8sXloUCA9lT7w4jIYD8L # v2StUOLWB01Zts2o8SCNaFxuajs6wUee8b/DM1cyPyLy4KtOdXvLKhq2NlXpLo2i # aZFr4PtizTVwrQZIJttA9jqM+QCsDOsiSat3BLNNsKUaCWHZB0rOGLCzMCtisyOo # 4PzuL4UI21ik2zieO1qVM+Thqvw16kHtp6dD9pGk4X4ogGreGYEIxzBl79luR+AV # NCRizoeFWTHKymS1tSoKrWT9ZNHcLmwemO6Tt1rMYk9jV3T4uY5e1NwxaUavEfsX # f8dLfQjhNiySOoDknT1OSerBOVdTXURS2ri5H3GZxrxvJ4jOeFkn52C8r3YlZ3Wp # Cr9LCUJZeXgwY+Q1JQ3D4VLY8aZ83txpw6XKEy0eTEv5wxkBj5LWhXx7hNb5F3lg # bqaRYijVJn+P82wVxlftIzMfNeVBFHzFE90taPV5grJjr8lgrGBFmD7Puc97kfDX # oTDBwRxJeew= # =qTNA # -----END PGP SIGNATURE----- # gpg: Signature made Wed 20 Sep 2023 12:31:49 EDT # gpg: using RSA key DC3DEB159A9AF95D3D7456FE7F09B272C88F2FD6 # gpg: issuer "kwolf@redhat.com" # gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full] # Primary key fingerprint: DC3D EB15 9A9A F95D 3D74 56FE 7F09 B272 C88F 2FD6 * tag 'for-upstream' of https://repo.or.cz/qemu/kevin: (28 commits) block: mark aio_poll as non-coroutine block-backend: process zoned requests in the current AioContext block-backend: process I/O in the current AioContext test-bdrv-drain: avoid race with BH in IOThread drain test block: remove AIOCBInfo->get_aio_context() qemu-img: map: report compressed data blocks block: add BDRV_BLOCK_COMPRESSED flag for bdrv_block_status() block: Mark bdrv_add/del_child() and caller GRAPH_WRLOCK block: Mark bdrv_unref_child() GRAPH_WRLOCK block: Mark bdrv_root_unref_child() GRAPH_WRLOCK block: Take graph rdlock in bdrv_change_aio_context() block: Take graph rdlock in bdrv_drop_intermediate() block: Mark bdrv_parent_cb_change_media() GRAPH_RDLOCK block: Mark bdrv_child_perm() GRAPH_RDLOCK block: Mark bdrv_get_cumulative_perm() and callers GRAPH_RDLOCK block: Mark bdrv_parent_perms_conflict() and callers GRAPH_RDLOCK block: Mark bdrv_attach_child() GRAPH_WRLOCK block: Call transaction callbacks with lock held block: Mark bdrv_attach_child_common() GRAPH_WRLOCK block: Mark bdrv_replace_child_tran() GRAPH_WRLOCK ... Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'block')
-rw-r--r--block/blklogwrites.c4
-rw-r--r--block/blkverify.c2
-rw-r--r--block/block-backend.c64
-rw-r--r--block/copy-before-write.c10
-rw-r--r--block/crypto.c6
-rw-r--r--block/graph-lock.c26
-rw-r--r--block/io.c23
-rw-r--r--block/mirror.c8
-rw-r--r--block/preallocate.c133
-rw-r--r--block/qcow.c5
-rw-r--r--block/qcow2.c7
-rw-r--r--block/quorum.c23
-rw-r--r--block/replication.c9
-rw-r--r--block/snapshot.c2
-rw-r--r--block/stream.c20
-rw-r--r--block/vmdk.c15
16 files changed, 232 insertions, 125 deletions
diff --git a/block/blklogwrites.c b/block/blklogwrites.c
index 3ea7141cb5..a0d70729bb 100644
--- a/block/blklogwrites.c
+++ b/block/blklogwrites.c
@@ -251,7 +251,9 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
ret = 0;
fail_log:
if (ret < 0) {
+ bdrv_graph_wrlock(NULL);
bdrv_unref_child(bs, s->log_file);
+ bdrv_graph_wrunlock();
s->log_file = NULL;
}
fail:
@@ -263,8 +265,10 @@ static void blk_log_writes_close(BlockDriverState *bs)
{
BDRVBlkLogWritesState *s = bs->opaque;
+ bdrv_graph_wrlock(NULL);
bdrv_unref_child(bs, s->log_file);
s->log_file = NULL;
+ bdrv_graph_wrunlock();
}
static int64_t coroutine_fn GRAPH_RDLOCK
diff --git a/block/blkverify.c b/block/blkverify.c
index 7326461f30..dae9716a26 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -151,8 +151,10 @@ static void blkverify_close(BlockDriverState *bs)
{
BDRVBlkverifyState *s = bs->opaque;
+ bdrv_graph_wrlock(NULL);
bdrv_unref_child(bs, s->test_file);
s->test_file = NULL;
+ bdrv_graph_wrunlock();
}
static int64_t coroutine_fn GRAPH_RDLOCK
diff --git a/block/block-backend.c b/block/block-backend.c
index 47d360c97a..efe2e7cbf8 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -33,8 +33,6 @@
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
-static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
-
typedef struct BlockBackendAioNotifier {
void (*attached_aio_context)(AioContext *new_context, void *opaque);
void (*detach_aio_context)(void *opaque);
@@ -103,7 +101,6 @@ typedef struct BlockBackendAIOCB {
} BlockBackendAIOCB;
static const AIOCBInfo block_backend_aiocb_info = {
- .get_aio_context = blk_aiocb_get_aio_context,
.aiocb_size = sizeof(BlockBackendAIOCB),
};
@@ -121,6 +118,10 @@ static QTAILQ_HEAD(, BlockBackend) block_backends =
static QTAILQ_HEAD(, BlockBackend) monitor_block_backends =
QTAILQ_HEAD_INITIALIZER(monitor_block_backends);
+static int coroutine_mixed_fn GRAPH_RDLOCK
+blk_set_perm_locked(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
+ Error **errp);
+
static void blk_root_inherit_options(BdrvChildRole role, bool parent_is_format,
int *child_flags, QDict *child_options,
int parent_flags, QDict *parent_options)
@@ -186,7 +187,7 @@ static void blk_vm_state_changed(void *opaque, bool running, RunState state)
*
* If an error is returned, the VM cannot be allowed to be resumed.
*/
-static void blk_root_activate(BdrvChild *child, Error **errp)
+static void GRAPH_RDLOCK blk_root_activate(BdrvChild *child, Error **errp)
{
BlockBackend *blk = child->opaque;
Error *local_err = NULL;
@@ -207,7 +208,7 @@ static void blk_root_activate(BdrvChild *child, Error **errp)
*/
saved_shared_perm = blk->shared_perm;
- blk_set_perm(blk, blk->perm, BLK_PERM_ALL, &local_err);
+ blk_set_perm_locked(blk, blk->perm, BLK_PERM_ALL, &local_err);
if (local_err) {
error_propagate(errp, local_err);
blk->disable_perm = true;
@@ -226,7 +227,7 @@ static void blk_root_activate(BdrvChild *child, Error **errp)
return;
}
- blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
+ blk_set_perm_locked(blk, blk->perm, blk->shared_perm, &local_err);
if (local_err) {
error_propagate(errp, local_err);
blk->disable_perm = true;
@@ -259,7 +260,7 @@ static bool blk_can_inactivate(BlockBackend *blk)
return blk->force_allow_inactivate;
}
-static int blk_root_inactivate(BdrvChild *child)
+static int GRAPH_RDLOCK blk_root_inactivate(BdrvChild *child)
{
BlockBackend *blk = child->opaque;
@@ -911,7 +912,10 @@ void blk_remove_bs(BlockBackend *blk)
blk_drain(blk);
root = blk->root;
blk->root = NULL;
+
+ bdrv_graph_wrlock(NULL);
bdrv_root_unref_child(root);
+ bdrv_graph_wrunlock();
}
/*
@@ -953,8 +957,9 @@ int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp)
/*
* Sets the permission bitmasks that the user of the BlockBackend needs.
*/
-int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
- Error **errp)
+static int coroutine_mixed_fn GRAPH_RDLOCK
+blk_set_perm_locked(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
+ Error **errp)
{
int ret;
GLOBAL_STATE_CODE();
@@ -972,6 +977,15 @@ int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
return 0;
}
+int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
+ Error **errp)
+{
+ GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+ return blk_set_perm_locked(blk, perm, shared_perm, errp);
+}
+
void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
{
GLOBAL_STATE_CODE();
@@ -1533,7 +1547,7 @@ BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
acb->blk = blk;
acb->ret = ret;
- replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
+ replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
error_callback_bh, acb);
return &acb->common;
}
@@ -1545,16 +1559,8 @@ typedef struct BlkAioEmAIOCB {
bool has_returned;
} BlkAioEmAIOCB;
-static AioContext *blk_aio_em_aiocb_get_aio_context(BlockAIOCB *acb_)
-{
- BlkAioEmAIOCB *acb = container_of(acb_, BlkAioEmAIOCB, common);
-
- return blk_get_aio_context(acb->rwco.blk);
-}
-
static const AIOCBInfo blk_aio_em_aiocb_info = {
.aiocb_size = sizeof(BlkAioEmAIOCB),
- .get_aio_context = blk_aio_em_aiocb_get_aio_context,
};
static void blk_aio_complete(BlkAioEmAIOCB *acb)
@@ -1595,11 +1601,11 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset,
acb->has_returned = false;
co = qemu_coroutine_create(co_entry, acb);
- aio_co_enter(blk_get_aio_context(blk), co);
+ aio_co_enter(qemu_get_current_aio_context(), co);
acb->has_returned = true;
if (acb->rwco.ret != NOT_DONE) {
- replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
+ replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
blk_aio_complete_bh, acb);
}
@@ -1901,11 +1907,11 @@ BlockAIOCB *blk_aio_zone_report(BlockBackend *blk, int64_t offset,
acb->has_returned = false;
co = qemu_coroutine_create(blk_aio_zone_report_entry, acb);
- aio_co_enter(blk_get_aio_context(blk), co);
+ aio_co_enter(qemu_get_current_aio_context(), co);
acb->has_returned = true;
if (acb->rwco.ret != NOT_DONE) {
- replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
+ replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
blk_aio_complete_bh, acb);
}
@@ -1942,11 +1948,11 @@ BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
acb->has_returned = false;
co = qemu_coroutine_create(blk_aio_zone_mgmt_entry, acb);
- aio_co_enter(blk_get_aio_context(blk), co);
+ aio_co_enter(qemu_get_current_aio_context(), co);
acb->has_returned = true;
if (acb->rwco.ret != NOT_DONE) {
- replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
+ replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
blk_aio_complete_bh, acb);
}
@@ -1982,10 +1988,10 @@ BlockAIOCB *blk_aio_zone_append(BlockBackend *blk, int64_t *offset,
acb->has_returned = false;
co = qemu_coroutine_create(blk_aio_zone_append_entry, acb);
- aio_co_enter(blk_get_aio_context(blk), co);
+ aio_co_enter(qemu_get_current_aio_context(), co);
acb->has_returned = true;
if (acb->rwco.ret != NOT_DONE) {
- replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
+ replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(),
blk_aio_complete_bh, acb);
}
@@ -2434,12 +2440,6 @@ AioContext *blk_get_aio_context(BlockBackend *blk)
return blk->ctx;
}
-static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
-{
- BlockBackendAIOCB *blk_acb = DO_UPCAST(BlockBackendAIOCB, common, acb);
- return blk_get_aio_context(blk_acb->blk);
-}
-
int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
Error **errp)
{
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index 9a0e2b69d9..aeaff3bb82 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -341,11 +341,11 @@ static void cbw_refresh_filename(BlockDriverState *bs)
bs->file->bs->filename);
}
-static void cbw_child_perm(BlockDriverState *bs, BdrvChild *c,
- BdrvChildRole role,
- BlockReopenQueue *reopen_queue,
- uint64_t perm, uint64_t shared,
- uint64_t *nperm, uint64_t *nshared)
+static void GRAPH_RDLOCK
+cbw_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
+ BlockReopenQueue *reopen_queue,
+ uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared)
{
if (!(role & BDRV_CHILD_FILTERED)) {
/*
diff --git a/block/crypto.c b/block/crypto.c
index 6ee8d46d30..c9c9a39fa3 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -777,7 +777,7 @@ block_crypto_get_specific_info_luks(BlockDriverState *bs, Error **errp)
return spec_info;
}
-static int
+static int GRAPH_RDLOCK
block_crypto_amend_prepare(BlockDriverState *bs, Error **errp)
{
BlockCrypto *crypto = bs->opaque;
@@ -793,7 +793,7 @@ block_crypto_amend_prepare(BlockDriverState *bs, Error **errp)
return ret;
}
-static void
+static void GRAPH_RDLOCK
block_crypto_amend_cleanup(BlockDriverState *bs)
{
BlockCrypto *crypto = bs->opaque;
@@ -841,6 +841,8 @@ block_crypto_amend_options_luks(BlockDriverState *bs,
QCryptoBlockAmendOptions *amend_options = NULL;
int ret = -EINVAL;
+ assume_graph_lock(); /* FIXME */
+
assert(crypto);
assert(crypto->block);
diff --git a/block/graph-lock.c b/block/graph-lock.c
index f357a2c0b1..58a799065f 100644
--- a/block/graph-lock.c
+++ b/block/graph-lock.c
@@ -163,17 +163,29 @@ void bdrv_graph_wrlock(BlockDriverState *bs)
void bdrv_graph_wrunlock(void)
{
GLOBAL_STATE_CODE();
- QEMU_LOCK_GUARD(&aio_context_list_lock);
assert(qatomic_read(&has_writer));
+ WITH_QEMU_LOCK_GUARD(&aio_context_list_lock) {
+ /*
+ * No need for memory barriers, this works in pair with
+ * the slow path of rdlock() and both take the lock.
+ */
+ qatomic_store_release(&has_writer, 0);
+
+ /* Wake up all coroutines that are waiting to read the graph */
+ qemu_co_enter_all(&reader_queue, &aio_context_list_lock);
+ }
+
/*
- * No need for memory barriers, this works in pair with
- * the slow path of rdlock() and both take the lock.
+ * Run any BHs that were scheduled during the wrlock section and that
+ * callers might expect to have finished (in particular, this is important
+ * for bdrv_schedule_unref()).
+ *
+ * Do this only after restarting coroutines so that nested event loops in
+ * BHs don't deadlock if their condition relies on the coroutine making
+ * progress.
*/
- qatomic_store_release(&has_writer, 0);
-
- /* Wake up all coroutine that are waiting to read the graph */
- qemu_co_enter_all(&reader_queue, &aio_context_list_lock);
+ aio_bh_poll(qemu_get_aio_context());
}
void coroutine_fn bdrv_graph_co_rdlock(void)
diff --git a/block/io.c b/block/io.c
index ba23a9bcd3..209a6da0c8 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2950,25 +2950,18 @@ int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
/**************************************************************/
/* async I/Os */
+/**
+ * Synchronously cancels an acb. Must be called with the BQL held and the acb
+ * must be processed with the BQL held too (IOThreads are not allowed).
+ *
+ * Use bdrv_aio_cancel_async() instead when possible.
+ */
void bdrv_aio_cancel(BlockAIOCB *acb)
{
- IO_CODE();
+ GLOBAL_STATE_CODE();
qemu_aio_ref(acb);
bdrv_aio_cancel_async(acb);
- while (acb->refcnt > 1) {
- if (acb->aiocb_info->get_aio_context) {
- aio_poll(acb->aiocb_info->get_aio_context(acb), true);
- } else if (acb->bs) {
- /* qemu_aio_ref and qemu_aio_unref are not thread-safe, so
- * assert that we're not using an I/O thread. Thread-safe
- * code should use bdrv_aio_cancel_async exclusively.
- */
- assert(bdrv_get_aio_context(acb->bs) == qemu_get_aio_context());
- aio_poll(bdrv_get_aio_context(acb->bs), true);
- } else {
- abort();
- }
- }
+ AIO_WAIT_WHILE_UNLOCKED(NULL, acb->refcnt > 1);
qemu_aio_unref(acb);
}
diff --git a/block/mirror.c b/block/mirror.c
index aae4bebbb6..3cc0757a03 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -702,8 +702,12 @@ static int mirror_exit_common(Job *job)
* mirror_top_bs from now on, so keep it drained. */
bdrv_drained_begin(mirror_top_bs);
bs_opaque->stop = true;
+
+ bdrv_graph_rdlock_main_loop();
bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
&error_abort);
+ bdrv_graph_rdunlock_main_loop();
+
if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
BlockDriverState *backing = s->is_none_mode ? src : s->base;
BlockDriverState *unfiltered_target = bdrv_skip_filters(target_bs);
@@ -1670,6 +1674,8 @@ static BlockJob *mirror_start_job(
uint64_t target_perms, target_shared_perms;
int ret;
+ GLOBAL_STATE_CODE();
+
if (granularity == 0) {
granularity = bdrv_get_default_bitmap_granularity(target);
}
@@ -1906,8 +1912,10 @@ fail:
}
bs_opaque->stop = true;
+ bdrv_graph_rdlock_main_loop();
bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
&error_abort);
+ bdrv_graph_rdunlock_main_loop();
bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
bdrv_unref(mirror_top_bs);
diff --git a/block/preallocate.c b/block/preallocate.c
index 3d0f621003..bfb638d8b1 100644
--- a/block/preallocate.c
+++ b/block/preallocate.c
@@ -75,8 +75,14 @@ typedef struct BDRVPreallocateState {
* be invalid (< 0) when we don't have both exclusive BLK_PERM_RESIZE and
* BLK_PERM_WRITE permissions on file child.
*/
+
+ /* Gives up the resize permission on children when parents don't need it */
+ QEMUBH *drop_resize_bh;
} BDRVPreallocateState;
+static int preallocate_drop_resize(BlockDriverState *bs, Error **errp);
+static void preallocate_drop_resize_bh(void *opaque);
+
#define PREALLOCATE_OPT_PREALLOC_ALIGN "prealloc-align"
#define PREALLOCATE_OPT_PREALLOC_SIZE "prealloc-size"
static QemuOptsList runtime_opts = {
@@ -142,6 +148,7 @@ static int preallocate_open(BlockDriverState *bs, QDict *options, int flags,
* For this to work, mark them invalid.
*/
s->file_end = s->zero_start = s->data_end = -EINVAL;
+ s->drop_resize_bh = qemu_bh_new(preallocate_drop_resize_bh, bs);
ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
if (ret < 0) {
@@ -162,26 +169,42 @@ static int preallocate_open(BlockDriverState *bs, QDict *options, int flags,
return 0;
}
-static void preallocate_close(BlockDriverState *bs)
+static int preallocate_truncate_to_real_size(BlockDriverState *bs, Error **errp)
{
- int ret;
BDRVPreallocateState *s = bs->opaque;
-
- if (s->data_end < 0) {
- return;
- }
+ int ret;
if (s->file_end < 0) {
s->file_end = bdrv_getlength(bs->file->bs);
if (s->file_end < 0) {
- return;
+ error_setg_errno(errp, -s->file_end, "Failed to get file length");
+ return s->file_end;
}
}
if (s->data_end < s->file_end) {
ret = bdrv_truncate(bs->file, s->data_end, true, PREALLOC_MODE_OFF, 0,
NULL);
- s->file_end = ret < 0 ? ret : s->data_end;
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Failed to drop preallocation");
+ s->file_end = ret;
+ return ret;
+ }
+ s->file_end = s->data_end;
+ }
+
+ return 0;
+}
+
+static void preallocate_close(BlockDriverState *bs)
+{
+ BDRVPreallocateState *s = bs->opaque;
+
+ qemu_bh_cancel(s->drop_resize_bh);
+ qemu_bh_delete(s->drop_resize_bh);
+
+ if (s->data_end >= 0) {
+ preallocate_truncate_to_real_size(bs, NULL);
}
}
@@ -198,6 +221,7 @@ static int preallocate_reopen_prepare(BDRVReopenState *reopen_state,
BlockReopenQueue *queue, Error **errp)
{
PreallocateOpts *opts = g_new0(PreallocateOpts, 1);
+ int ret;
if (!preallocate_absorb_opts(opts, reopen_state->options,
reopen_state->bs->file->bs, errp)) {
@@ -205,6 +229,19 @@ static int preallocate_reopen_prepare(BDRVReopenState *reopen_state,
return -EINVAL;
}
+ /*
+ * Drop the preallocation already here if reopening read-only. The child
+ * might also be reopened read-only and then scheduling a BH during the
+ * permission update is too late.
+ */
+ if ((reopen_state->flags & BDRV_O_RDWR) == 0) {
+ ret = preallocate_drop_resize(reopen_state->bs, errp);
+ if (ret < 0) {
+ g_free(opts);
+ return ret;
+ }
+ }
+
reopen_state->opaque = opts;
return 0;
@@ -462,58 +499,61 @@ preallocate_co_getlength(BlockDriverState *bs)
return ret;
}
-static int preallocate_check_perm(BlockDriverState *bs,
- uint64_t perm, uint64_t shared, Error **errp)
+static int preallocate_drop_resize(BlockDriverState *bs, Error **errp)
{
BDRVPreallocateState *s = bs->opaque;
+ int ret;
- if (s->data_end >= 0 && !can_write_resize(perm)) {
- /*
- * Lose permissions.
- * We should truncate in check_perm, as in set_perm bs->file->perm will
- * be already changed, and we should not violate it.
- */
- if (s->file_end < 0) {
- s->file_end = bdrv_getlength(bs->file->bs);
- if (s->file_end < 0) {
- error_setg(errp, "Failed to get file length");
- return s->file_end;
- }
- }
+ if (s->data_end < 0) {
+ return 0;
+ }
- if (s->data_end < s->file_end) {
- int ret = bdrv_truncate(bs->file, s->data_end, true,
- PREALLOC_MODE_OFF, 0, NULL);
- if (ret < 0) {
- error_setg(errp, "Failed to drop preallocation");
- s->file_end = ret;
- return ret;
- }
- s->file_end = s->data_end;
- }
+ /*
+ * Before switching children to be read-only, truncate them to remove
+ * the preallocation and let them have the real size.
+ */
+ ret = preallocate_truncate_to_real_size(bs, errp);
+ if (ret < 0) {
+ return ret;
}
+ /*
+ * We'll drop our permissions and will allow other users to take write and
+ * resize permissions (see preallocate_child_perm). Anyone will be able to
+ * change the child, so mark all states invalid. We'll regain control if a
+ * parent requests write access again.
+ */
+ s->data_end = s->file_end = s->zero_start = -EINVAL;
+
+ bdrv_graph_rdlock_main_loop();
+ bdrv_child_refresh_perms(bs, bs->file, NULL);
+ bdrv_graph_rdunlock_main_loop();
+
return 0;
}
+static void preallocate_drop_resize_bh(void *opaque)
+{
+ /*
+ * In case of errors, we'll simply keep the exclusive lock on the image
+ * indefinitely.
+ */
+ preallocate_drop_resize(opaque, NULL);
+}
+
static void preallocate_set_perm(BlockDriverState *bs,
uint64_t perm, uint64_t shared)
{
BDRVPreallocateState *s = bs->opaque;
if (can_write_resize(perm)) {
+ qemu_bh_cancel(s->drop_resize_bh);
if (s->data_end < 0) {
s->data_end = s->file_end = s->zero_start =
- bdrv_getlength(bs->file->bs);
+ bs->file->bs->total_sectors * BDRV_SECTOR_SIZE;
}
} else {
- /*
- * We drop our permissions, as well as allow shared
- * permissions (see preallocate_child_perm), anyone will be able to
- * change the child, so mark all states invalid. We'll regain control if
- * get good permissions back.
- */
- s->data_end = s->file_end = s->zero_start = -EINVAL;
+ qemu_bh_schedule(s->drop_resize_bh);
}
}
@@ -521,10 +561,16 @@ static void preallocate_child_perm(BlockDriverState *bs, BdrvChild *c,
BdrvChildRole role, BlockReopenQueue *reopen_queue,
uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared)
{
+ BDRVPreallocateState *s = bs->opaque;
+
bdrv_default_perms(bs, c, role, reopen_queue, perm, shared, nperm, nshared);
- if (can_write_resize(perm)) {
- /* This should come by default, but let's enforce: */
+ /*
+ * We need exclusive write and resize permissions on the child not only when
+ * the parent can write to it, but also after the parent gave up write
+ * permissions until preallocate_drop_resize() has completed.
+ */
+ if (can_write_resize(perm) || s->data_end != -EINVAL) {
*nperm |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
/*
@@ -554,7 +600,6 @@ static BlockDriver bdrv_preallocate_filter = {
.bdrv_co_flush = preallocate_co_flush,
.bdrv_co_truncate = preallocate_co_truncate,
- .bdrv_check_perm = preallocate_check_perm,
.bdrv_set_perm = preallocate_set_perm,
.bdrv_child_perm = preallocate_child_perm,
diff --git a/block/qcow.c b/block/qcow.c
index 577bd70324..d56d24ab6d 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -549,7 +549,10 @@ qcow_co_block_status(BlockDriverState *bs, bool want_zero,
if (!cluster_offset) {
return 0;
}
- if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->crypto) {
+ if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
+ return BDRV_BLOCK_DATA | BDRV_BLOCK_COMPRESSED;
+ }
+ if (s->crypto) {
return BDRV_BLOCK_DATA;
}
*map = cluster_offset | index_in_cluster;
diff --git a/block/qcow2.c b/block/qcow2.c
index b48cd9ce63..af43d59d76 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1880,7 +1880,7 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
g_free(s->image_data_file);
if (open_data_file && has_data_file(bs)) {
bdrv_graph_co_rdunlock();
- bdrv_unref_child(bs, s->data_file);
+ bdrv_co_unref_child(bs, s->data_file);
bdrv_graph_co_rdlock();
s->data_file = NULL;
}
@@ -2162,6 +2162,9 @@ qcow2_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset,
{
status |= BDRV_BLOCK_RECURSE;
}
+ if (type == QCOW2_SUBCLUSTER_COMPRESSED) {
+ status |= BDRV_BLOCK_COMPRESSED;
+ }
return status;
}
@@ -2790,7 +2793,9 @@ static void qcow2_do_close(BlockDriverState *bs, bool close_data_file)
g_free(s->image_backing_format);
if (close_data_file && has_data_file(bs)) {
+ bdrv_graph_wrlock(NULL);
bdrv_unref_child(bs, s->data_file);
+ bdrv_graph_wrunlock();
s->data_file = NULL;
}
diff --git a/block/quorum.c b/block/quorum.c
index f28758cf2b..05220cab7f 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -1037,12 +1037,14 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
close_exit:
/* cleanup on error */
+ bdrv_graph_wrlock(NULL);
for (i = 0; i < s->num_children; i++) {
if (!opened[i]) {
continue;
}
bdrv_unref_child(bs, s->children[i]);
}
+ bdrv_graph_wrunlock();
g_free(s->children);
g_free(opened);
exit:
@@ -1055,15 +1057,17 @@ static void quorum_close(BlockDriverState *bs)
BDRVQuorumState *s = bs->opaque;
int i;
+ bdrv_graph_wrlock(NULL);
for (i = 0; i < s->num_children; i++) {
bdrv_unref_child(bs, s->children[i]);
}
+ bdrv_graph_wrunlock();
g_free(s->children);
}
-static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
- Error **errp)
+static void GRAPH_WRLOCK
+quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs, Error **errp)
{
BDRVQuorumState *s = bs->opaque;
BdrvChild *child;
@@ -1089,8 +1093,6 @@ static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
}
s->next_child_index++;
- bdrv_drained_begin(bs);
-
/* We can safely add the child now */
bdrv_ref(child_bs);
@@ -1098,18 +1100,15 @@ static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
BDRV_CHILD_DATA, errp);
if (child == NULL) {
s->next_child_index--;
- goto out;
+ return;
}
s->children = g_renew(BdrvChild *, s->children, s->num_children + 1);
s->children[s->num_children++] = child;
quorum_refresh_flags(bs);
-
-out:
- bdrv_drained_end(bs);
}
-static void quorum_del_child(BlockDriverState *bs, BdrvChild *child,
- Error **errp)
+static void GRAPH_WRLOCK
+quorum_del_child(BlockDriverState *bs, BdrvChild *child, Error **errp)
{
BDRVQuorumState *s = bs->opaque;
char indexstr[INDEXSTR_LEN];
@@ -1139,16 +1138,14 @@ static void quorum_del_child(BlockDriverState *bs, BdrvChild *child,
s->next_child_index--;
}
- bdrv_drained_begin(bs);
-
/* We can safely remove this child now */
memmove(&s->children[i], &s->children[i + 1],
(s->num_children - i - 1) * sizeof(BdrvChild *));
s->children = g_renew(BdrvChild *, s->children, --s->num_children);
+
bdrv_unref_child(bs, child);
quorum_refresh_flags(bs);
- bdrv_drained_end(bs);
}
static void quorum_gather_child_options(BlockDriverState *bs, QDict *target,
diff --git a/block/replication.c b/block/replication.c
index ea4bf1aa80..dd166d2d82 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -542,12 +542,15 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
return;
}
+ bdrv_graph_wrlock(bs);
+
bdrv_ref(hidden_disk->bs);
s->hidden_disk = bdrv_attach_child(bs, hidden_disk->bs, "hidden disk",
&child_of_bds, BDRV_CHILD_DATA,
&local_err);
if (local_err) {
error_propagate(errp, local_err);
+ bdrv_graph_wrunlock();
aio_context_release(aio_context);
return;
}
@@ -558,10 +561,13 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
BDRV_CHILD_DATA, &local_err);
if (local_err) {
error_propagate(errp, local_err);
+ bdrv_graph_wrunlock();
aio_context_release(aio_context);
return;
}
+ bdrv_graph_wrunlock();
+
/* start backup job now */
error_setg(&s->blocker,
"Block device is in use by internal backup job");
@@ -666,10 +672,13 @@ static void replication_done(void *opaque, int ret)
if (ret == 0) {
s->stage = BLOCK_REPLICATION_DONE;
+ bdrv_graph_wrlock(NULL);
bdrv_unref_child(bs, s->secondary_disk);
s->secondary_disk = NULL;
bdrv_unref_child(bs, s->hidden_disk);
s->hidden_disk = NULL;
+ bdrv_graph_wrunlock();
+
s->error = 0;
} else {
s->stage = BLOCK_REPLICATION_FAILOVER_FAILED;
diff --git a/block/snapshot.c b/block/snapshot.c
index e22ac3eac6..b86b5b24ad 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -281,7 +281,9 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
}
/* .bdrv_open() will re-attach it */
+ bdrv_graph_wrlock(NULL);
bdrv_unref_child(bs, fallback);
+ bdrv_graph_wrunlock();
ret = bdrv_snapshot_goto(fallback_bs, snapshot_id, errp);
open_ret = drv->bdrv_open(bs, options, bs->open_flags, &local_err);
diff --git a/block/stream.c b/block/stream.c
index e522bbdec5..e4da214f1f 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -54,6 +54,7 @@ static int stream_prepare(Job *job)
{
StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
+ BlockDriverState *unfiltered_bs_cow = bdrv_cow_bs(unfiltered_bs);
BlockDriverState *base;
BlockDriverState *unfiltered_base;
Error *local_err = NULL;
@@ -64,13 +65,18 @@ static int stream_prepare(Job *job)
s->cor_filter_bs = NULL;
/*
- * bdrv_set_backing_hd() requires that unfiltered_bs is drained. Drain
- * already here and use bdrv_set_backing_hd_drained() instead because
- * the polling during drained_begin() might change the graph, and if we do
- * this only later, we may end up working with the wrong base node (or it
- * might even have gone away by the time we want to use it).
+ * bdrv_set_backing_hd() requires that the unfiltered_bs and the COW child
+ * of unfiltered_bs is drained. Drain already here and use
+ * bdrv_set_backing_hd_drained() instead because the polling during
+ * drained_begin() might change the graph, and if we do this only later, we
+ * may end up working with the wrong base node (or it might even have gone
+ * away by the time we want to use it).
*/
bdrv_drained_begin(unfiltered_bs);
+ if (unfiltered_bs_cow) {
+ bdrv_ref(unfiltered_bs_cow);
+ bdrv_drained_begin(unfiltered_bs_cow);
+ }
base = bdrv_filter_or_cow_bs(s->above_base);
unfiltered_base = bdrv_skip_filters(base);
@@ -100,6 +106,10 @@ static int stream_prepare(Job *job)
}
out:
+ if (unfiltered_bs_cow) {
+ bdrv_drained_end(unfiltered_bs_cow);
+ bdrv_unref(unfiltered_bs_cow);
+ }
bdrv_drained_end(unfiltered_bs);
return ret;
}
diff --git a/block/vmdk.c b/block/vmdk.c
index 58ce290e9c..e90649c8bf 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -272,6 +272,7 @@ static void vmdk_free_extents(BlockDriverState *bs)
BDRVVmdkState *s = bs->opaque;
VmdkExtent *e;
+ bdrv_graph_wrlock(NULL);
for (i = 0; i < s->num_extents; i++) {
e = &s->extents[i];
g_free(e->l1_table);
@@ -282,6 +283,8 @@ static void vmdk_free_extents(BlockDriverState *bs)
bdrv_unref_child(bs, e->file);
}
}
+ bdrv_graph_wrunlock();
+
g_free(s->extents);
}
@@ -1220,7 +1223,9 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
ret = vmdk_add_extent(bs, extent_file, true, sectors,
0, 0, 0, 0, 0, &extent, errp);
if (ret < 0) {
+ bdrv_graph_wrlock(NULL);
bdrv_unref_child(bs, extent_file);
+ bdrv_graph_wrunlock();
goto out;
}
extent->flat_start_offset = flat_offset << 9;
@@ -1235,20 +1240,26 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
}
g_free(buf);
if (ret) {
+ bdrv_graph_wrlock(NULL);
bdrv_unref_child(bs, extent_file);
+ bdrv_graph_wrunlock();
goto out;
}
extent = &s->extents[s->num_extents - 1];
} else if (!strcmp(type, "SESPARSE")) {
ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp);
if (ret) {
+ bdrv_graph_wrlock(NULL);
bdrv_unref_child(bs, extent_file);
+ bdrv_graph_wrunlock();
goto out;
}
extent = &s->extents[s->num_extents - 1];
} else {
error_setg(errp, "Unsupported extent type '%s'", type);
+ bdrv_graph_wrlock(NULL);
bdrv_unref_child(bs, extent_file);
+ bdrv_graph_wrunlock();
ret = -ENOTSUP;
goto out;
}
@@ -1309,6 +1320,8 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
BDRVVmdkState *s = bs->opaque;
uint32_t magic;
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
if (ret < 0) {
return ret;
@@ -1770,6 +1783,8 @@ vmdk_co_block_status(BlockDriverState *bs, bool want_zero,
if (extent->flat) {
ret |= BDRV_BLOCK_RECURSE;
}
+ } else {
+ ret |= BDRV_BLOCK_COMPRESSED;
}
*file = extent->file->bs;
break;