diff options
author | Stefan Hajnoczi <stefanha@redhat.com> | 2023-09-21 09:31:27 -0400 |
---|---|---|
committer | Stefan Hajnoczi <stefanha@redhat.com> | 2023-09-21 09:31:28 -0400 |
commit | 3da71a2111de9f0bc475f8292d009265ab34365f (patch) | |
tree | 102ffb4b18cea24d8b75a0d7f22f0eea14d3ba8b /block | |
parent | f2df7e7705e832a8a65422c227e9ef1bdac226c1 (diff) | |
parent | c428b392590df6364a025d5841e3e8a589ebfd4a (diff) |
Merge tag 'for-upstream' of https://repo.or.cz/qemu/kevin into staging
Block layer patches
- Graph locking part 4 (node management)
- qemu-img map: report compressed data blocks
- block-backend: process I/O in the current AioContext
# -----BEGIN PGP SIGNATURE-----
#
# iQJFBAABCAAvFiEE3D3rFZqa+V09dFb+fwmycsiPL9YFAmULHnURHGt3b2xmQHJl
# ZGhhdC5jb20ACgkQfwmycsiPL9aB5hAAqH8To7WIUtg1rj1PY809ck78ghm18PKg
# TNdN7IbrXQghX5foh2VgPwVVl+JaW2CSrJYWQcAO6AbvFduNIi9iKzI6RT0xKXpb
# b8oQXS7zntFzwBv8ohOU5NSVJOgVmNP4h5qJIMmXgB9ZcLFG40zggVH2qQT7guUf
# 9MAc81kI/d5vvSHY0ZjdHjNOgwG4q1j8yytL7OFqWUfB8sXloUCA9lT7w4jIYD8L
# v2StUOLWB01Zts2o8SCNaFxuajs6wUee8b/DM1cyPyLy4KtOdXvLKhq2NlXpLo2i
# aZFr4PtizTVwrQZIJttA9jqM+QCsDOsiSat3BLNNsKUaCWHZB0rOGLCzMCtisyOo
# 4PzuL4UI21ik2zieO1qVM+Thqvw16kHtp6dD9pGk4X4ogGreGYEIxzBl79luR+AV
# NCRizoeFWTHKymS1tSoKrWT9ZNHcLmwemO6Tt1rMYk9jV3T4uY5e1NwxaUavEfsX
# f8dLfQjhNiySOoDknT1OSerBOVdTXURS2ri5H3GZxrxvJ4jOeFkn52C8r3YlZ3Wp
# Cr9LCUJZeXgwY+Q1JQ3D4VLY8aZ83txpw6XKEy0eTEv5wxkBj5LWhXx7hNb5F3lg
# bqaRYijVJn+P82wVxlftIzMfNeVBFHzFE90taPV5grJjr8lgrGBFmD7Puc97kfDX
# oTDBwRxJeew=
# =qTNA
# -----END PGP SIGNATURE-----
# gpg: Signature made Wed 20 Sep 2023 12:31:49 EDT
# gpg: using RSA key DC3DEB159A9AF95D3D7456FE7F09B272C88F2FD6
# gpg: issuer "kwolf@redhat.com"
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full]
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74 56FE 7F09 B272 C88F 2FD6
* tag 'for-upstream' of https://repo.or.cz/qemu/kevin: (28 commits)
block: mark aio_poll as non-coroutine
block-backend: process zoned requests in the current AioContext
block-backend: process I/O in the current AioContext
test-bdrv-drain: avoid race with BH in IOThread drain test
block: remove AIOCBInfo->get_aio_context()
qemu-img: map: report compressed data blocks
block: add BDRV_BLOCK_COMPRESSED flag for bdrv_block_status()
block: Mark bdrv_add/del_child() and caller GRAPH_WRLOCK
block: Mark bdrv_unref_child() GRAPH_WRLOCK
block: Mark bdrv_root_unref_child() GRAPH_WRLOCK
block: Take graph rdlock in bdrv_change_aio_context()
block: Take graph rdlock in bdrv_drop_intermediate()
block: Mark bdrv_parent_cb_change_media() GRAPH_RDLOCK
block: Mark bdrv_child_perm() GRAPH_RDLOCK
block: Mark bdrv_get_cumulative_perm() and callers GRAPH_RDLOCK
block: Mark bdrv_parent_perms_conflict() and callers GRAPH_RDLOCK
block: Mark bdrv_attach_child() GRAPH_WRLOCK
block: Call transaction callbacks with lock held
block: Mark bdrv_attach_child_common() GRAPH_WRLOCK
block: Mark bdrv_replace_child_tran() GRAPH_WRLOCK
...
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'block')
-rw-r--r-- | block/blklogwrites.c | 4 | ||||
-rw-r--r-- | block/blkverify.c | 2 | ||||
-rw-r--r-- | block/block-backend.c | 64 | ||||
-rw-r--r-- | block/copy-before-write.c | 10 | ||||
-rw-r--r-- | block/crypto.c | 6 | ||||
-rw-r--r-- | block/graph-lock.c | 26 | ||||
-rw-r--r-- | block/io.c | 23 | ||||
-rw-r--r-- | block/mirror.c | 8 | ||||
-rw-r--r-- | block/preallocate.c | 133 | ||||
-rw-r--r-- | block/qcow.c | 5 | ||||
-rw-r--r-- | block/qcow2.c | 7 | ||||
-rw-r--r-- | block/quorum.c | 23 | ||||
-rw-r--r-- | block/replication.c | 9 | ||||
-rw-r--r-- | block/snapshot.c | 2 | ||||
-rw-r--r-- | block/stream.c | 20 | ||||
-rw-r--r-- | block/vmdk.c | 15 |
16 files changed, 232 insertions, 125 deletions
diff --git a/block/blklogwrites.c b/block/blklogwrites.c index 3ea7141cb5..a0d70729bb 100644 --- a/block/blklogwrites.c +++ b/block/blklogwrites.c @@ -251,7 +251,9 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, ret = 0; fail_log: if (ret < 0) { + bdrv_graph_wrlock(NULL); bdrv_unref_child(bs, s->log_file); + bdrv_graph_wrunlock(); s->log_file = NULL; } fail: @@ -263,8 +265,10 @@ static void blk_log_writes_close(BlockDriverState *bs) { BDRVBlkLogWritesState *s = bs->opaque; + bdrv_graph_wrlock(NULL); bdrv_unref_child(bs, s->log_file); s->log_file = NULL; + bdrv_graph_wrunlock(); } static int64_t coroutine_fn GRAPH_RDLOCK diff --git a/block/blkverify.c b/block/blkverify.c index 7326461f30..dae9716a26 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -151,8 +151,10 @@ static void blkverify_close(BlockDriverState *bs) { BDRVBlkverifyState *s = bs->opaque; + bdrv_graph_wrlock(NULL); bdrv_unref_child(bs, s->test_file); s->test_file = NULL; + bdrv_graph_wrunlock(); } static int64_t coroutine_fn GRAPH_RDLOCK diff --git a/block/block-backend.c b/block/block-backend.c index 47d360c97a..efe2e7cbf8 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -33,8 +33,6 @@ #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ -static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb); - typedef struct BlockBackendAioNotifier { void (*attached_aio_context)(AioContext *new_context, void *opaque); void (*detach_aio_context)(void *opaque); @@ -103,7 +101,6 @@ typedef struct BlockBackendAIOCB { } BlockBackendAIOCB; static const AIOCBInfo block_backend_aiocb_info = { - .get_aio_context = blk_aiocb_get_aio_context, .aiocb_size = sizeof(BlockBackendAIOCB), }; @@ -121,6 +118,10 @@ static QTAILQ_HEAD(, BlockBackend) block_backends = static QTAILQ_HEAD(, BlockBackend) monitor_block_backends = QTAILQ_HEAD_INITIALIZER(monitor_block_backends); +static int coroutine_mixed_fn GRAPH_RDLOCK +blk_set_perm_locked(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, + Error **errp); + static void blk_root_inherit_options(BdrvChildRole role, bool parent_is_format, int *child_flags, QDict *child_options, int parent_flags, QDict *parent_options) @@ -186,7 +187,7 @@ static void blk_vm_state_changed(void *opaque, bool running, RunState state) * * If an error is returned, the VM cannot be allowed to be resumed. */ -static void blk_root_activate(BdrvChild *child, Error **errp) +static void GRAPH_RDLOCK blk_root_activate(BdrvChild *child, Error **errp) { BlockBackend *blk = child->opaque; Error *local_err = NULL; @@ -207,7 +208,7 @@ static void blk_root_activate(BdrvChild *child, Error **errp) */ saved_shared_perm = blk->shared_perm; - blk_set_perm(blk, blk->perm, BLK_PERM_ALL, &local_err); + blk_set_perm_locked(blk, blk->perm, BLK_PERM_ALL, &local_err); if (local_err) { error_propagate(errp, local_err); blk->disable_perm = true; @@ -226,7 +227,7 @@ static void blk_root_activate(BdrvChild *child, Error **errp) return; } - blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err); + blk_set_perm_locked(blk, blk->perm, blk->shared_perm, &local_err); if (local_err) { error_propagate(errp, local_err); blk->disable_perm = true; @@ -259,7 +260,7 @@ static bool blk_can_inactivate(BlockBackend *blk) return blk->force_allow_inactivate; } -static int blk_root_inactivate(BdrvChild *child) +static int GRAPH_RDLOCK blk_root_inactivate(BdrvChild *child) { BlockBackend *blk = child->opaque; @@ -911,7 +912,10 @@ void blk_remove_bs(BlockBackend *blk) blk_drain(blk); root = blk->root; blk->root = NULL; + + bdrv_graph_wrlock(NULL); bdrv_root_unref_child(root); + bdrv_graph_wrunlock(); } /* @@ -953,8 +957,9 @@ int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp) /* * Sets the permission bitmasks that the user of the BlockBackend needs. */ -int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, - Error **errp) +static int coroutine_mixed_fn GRAPH_RDLOCK +blk_set_perm_locked(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, + Error **errp) { int ret; GLOBAL_STATE_CODE(); @@ -972,6 +977,15 @@ int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, return 0; } +int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm, + Error **errp) +{ + GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + + return blk_set_perm_locked(blk, perm, shared_perm, errp); +} + void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm) { GLOBAL_STATE_CODE(); @@ -1533,7 +1547,7 @@ BlockAIOCB *blk_abort_aio_request(BlockBackend *blk, acb->blk = blk; acb->ret = ret; - replay_bh_schedule_oneshot_event(blk_get_aio_context(blk), + replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(), error_callback_bh, acb); return &acb->common; } @@ -1545,16 +1559,8 @@ typedef struct BlkAioEmAIOCB { bool has_returned; } BlkAioEmAIOCB; -static AioContext *blk_aio_em_aiocb_get_aio_context(BlockAIOCB *acb_) -{ - BlkAioEmAIOCB *acb = container_of(acb_, BlkAioEmAIOCB, common); - - return blk_get_aio_context(acb->rwco.blk); -} - static const AIOCBInfo blk_aio_em_aiocb_info = { .aiocb_size = sizeof(BlkAioEmAIOCB), - .get_aio_context = blk_aio_em_aiocb_get_aio_context, }; static void blk_aio_complete(BlkAioEmAIOCB *acb) @@ -1595,11 +1601,11 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, acb->has_returned = false; co = qemu_coroutine_create(co_entry, acb); - aio_co_enter(blk_get_aio_context(blk), co); + aio_co_enter(qemu_get_current_aio_context(), co); acb->has_returned = true; if (acb->rwco.ret != NOT_DONE) { - replay_bh_schedule_oneshot_event(blk_get_aio_context(blk), + replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(), blk_aio_complete_bh, acb); } @@ -1901,11 +1907,11 @@ BlockAIOCB *blk_aio_zone_report(BlockBackend *blk, int64_t offset, acb->has_returned = false; co = qemu_coroutine_create(blk_aio_zone_report_entry, acb); - aio_co_enter(blk_get_aio_context(blk), co); + aio_co_enter(qemu_get_current_aio_context(), co); acb->has_returned = true; if (acb->rwco.ret != NOT_DONE) { - replay_bh_schedule_oneshot_event(blk_get_aio_context(blk), + replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(), blk_aio_complete_bh, acb); } @@ -1942,11 +1948,11 @@ BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, BlockZoneOp op, acb->has_returned = false; co = qemu_coroutine_create(blk_aio_zone_mgmt_entry, acb); - aio_co_enter(blk_get_aio_context(blk), co); + aio_co_enter(qemu_get_current_aio_context(), co); acb->has_returned = true; if (acb->rwco.ret != NOT_DONE) { - replay_bh_schedule_oneshot_event(blk_get_aio_context(blk), + replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(), blk_aio_complete_bh, acb); } @@ -1982,10 +1988,10 @@ BlockAIOCB *blk_aio_zone_append(BlockBackend *blk, int64_t *offset, acb->has_returned = false; co = qemu_coroutine_create(blk_aio_zone_append_entry, acb); - aio_co_enter(blk_get_aio_context(blk), co); + aio_co_enter(qemu_get_current_aio_context(), co); acb->has_returned = true; if (acb->rwco.ret != NOT_DONE) { - replay_bh_schedule_oneshot_event(blk_get_aio_context(blk), + replay_bh_schedule_oneshot_event(qemu_get_current_aio_context(), blk_aio_complete_bh, acb); } @@ -2434,12 +2440,6 @@ AioContext *blk_get_aio_context(BlockBackend *blk) return blk->ctx; } -static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb) -{ - BlockBackendAIOCB *blk_acb = DO_UPCAST(BlockBackendAIOCB, common, acb); - return blk_get_aio_context(blk_acb->blk); -} - int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, Error **errp) { diff --git a/block/copy-before-write.c b/block/copy-before-write.c index 9a0e2b69d9..aeaff3bb82 100644 --- a/block/copy-before-write.c +++ b/block/copy-before-write.c @@ -341,11 +341,11 @@ static void cbw_refresh_filename(BlockDriverState *bs) bs->file->bs->filename); } -static void cbw_child_perm(BlockDriverState *bs, BdrvChild *c, - BdrvChildRole role, - BlockReopenQueue *reopen_queue, - uint64_t perm, uint64_t shared, - uint64_t *nperm, uint64_t *nshared) +static void GRAPH_RDLOCK +cbw_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role, + BlockReopenQueue *reopen_queue, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) { if (!(role & BDRV_CHILD_FILTERED)) { /* diff --git a/block/crypto.c b/block/crypto.c index 6ee8d46d30..c9c9a39fa3 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -777,7 +777,7 @@ block_crypto_get_specific_info_luks(BlockDriverState *bs, Error **errp) return spec_info; } -static int +static int GRAPH_RDLOCK block_crypto_amend_prepare(BlockDriverState *bs, Error **errp) { BlockCrypto *crypto = bs->opaque; @@ -793,7 +793,7 @@ block_crypto_amend_prepare(BlockDriverState *bs, Error **errp) return ret; } -static void +static void GRAPH_RDLOCK block_crypto_amend_cleanup(BlockDriverState *bs) { BlockCrypto *crypto = bs->opaque; @@ -841,6 +841,8 @@ block_crypto_amend_options_luks(BlockDriverState *bs, QCryptoBlockAmendOptions *amend_options = NULL; int ret = -EINVAL; + assume_graph_lock(); /* FIXME */ + assert(crypto); assert(crypto->block); diff --git a/block/graph-lock.c b/block/graph-lock.c index f357a2c0b1..58a799065f 100644 --- a/block/graph-lock.c +++ b/block/graph-lock.c @@ -163,17 +163,29 @@ void bdrv_graph_wrlock(BlockDriverState *bs) void bdrv_graph_wrunlock(void) { GLOBAL_STATE_CODE(); - QEMU_LOCK_GUARD(&aio_context_list_lock); assert(qatomic_read(&has_writer)); + WITH_QEMU_LOCK_GUARD(&aio_context_list_lock) { + /* + * No need for memory barriers, this works in pair with + * the slow path of rdlock() and both take the lock. + */ + qatomic_store_release(&has_writer, 0); + + /* Wake up all coroutines that are waiting to read the graph */ + qemu_co_enter_all(&reader_queue, &aio_context_list_lock); + } + /* - * No need for memory barriers, this works in pair with - * the slow path of rdlock() and both take the lock. + * Run any BHs that were scheduled during the wrlock section and that + * callers might expect to have finished (in particular, this is important + * for bdrv_schedule_unref()). + * + * Do this only after restarting coroutines so that nested event loops in + * BHs don't deadlock if their condition relies on the coroutine making + * progress. */ - qatomic_store_release(&has_writer, 0); - - /* Wake up all coroutine that are waiting to read the graph */ - qemu_co_enter_all(&reader_queue, &aio_context_list_lock); + aio_bh_poll(qemu_get_aio_context()); } void coroutine_fn bdrv_graph_co_rdlock(void) diff --git a/block/io.c b/block/io.c index ba23a9bcd3..209a6da0c8 100644 --- a/block/io.c +++ b/block/io.c @@ -2950,25 +2950,18 @@ int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, /**************************************************************/ /* async I/Os */ +/** + * Synchronously cancels an acb. Must be called with the BQL held and the acb + * must be processed with the BQL held too (IOThreads are not allowed). + * + * Use bdrv_aio_cancel_async() instead when possible. + */ void bdrv_aio_cancel(BlockAIOCB *acb) { - IO_CODE(); + GLOBAL_STATE_CODE(); qemu_aio_ref(acb); bdrv_aio_cancel_async(acb); - while (acb->refcnt > 1) { - if (acb->aiocb_info->get_aio_context) { - aio_poll(acb->aiocb_info->get_aio_context(acb), true); - } else if (acb->bs) { - /* qemu_aio_ref and qemu_aio_unref are not thread-safe, so - * assert that we're not using an I/O thread. Thread-safe - * code should use bdrv_aio_cancel_async exclusively. - */ - assert(bdrv_get_aio_context(acb->bs) == qemu_get_aio_context()); - aio_poll(bdrv_get_aio_context(acb->bs), true); - } else { - abort(); - } - } + AIO_WAIT_WHILE_UNLOCKED(NULL, acb->refcnt > 1); qemu_aio_unref(acb); } diff --git a/block/mirror.c b/block/mirror.c index aae4bebbb6..3cc0757a03 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -702,8 +702,12 @@ static int mirror_exit_common(Job *job) * mirror_top_bs from now on, so keep it drained. */ bdrv_drained_begin(mirror_top_bs); bs_opaque->stop = true; + + bdrv_graph_rdlock_main_loop(); bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing, &error_abort); + bdrv_graph_rdunlock_main_loop(); + if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { BlockDriverState *backing = s->is_none_mode ? src : s->base; BlockDriverState *unfiltered_target = bdrv_skip_filters(target_bs); @@ -1670,6 +1674,8 @@ static BlockJob *mirror_start_job( uint64_t target_perms, target_shared_perms; int ret; + GLOBAL_STATE_CODE(); + if (granularity == 0) { granularity = bdrv_get_default_bitmap_granularity(target); } @@ -1906,8 +1912,10 @@ fail: } bs_opaque->stop = true; + bdrv_graph_rdlock_main_loop(); bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing, &error_abort); + bdrv_graph_rdunlock_main_loop(); bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort); bdrv_unref(mirror_top_bs); diff --git a/block/preallocate.c b/block/preallocate.c index 3d0f621003..bfb638d8b1 100644 --- a/block/preallocate.c +++ b/block/preallocate.c @@ -75,8 +75,14 @@ typedef struct BDRVPreallocateState { * be invalid (< 0) when we don't have both exclusive BLK_PERM_RESIZE and * BLK_PERM_WRITE permissions on file child. */ + + /* Gives up the resize permission on children when parents don't need it */ + QEMUBH *drop_resize_bh; } BDRVPreallocateState; +static int preallocate_drop_resize(BlockDriverState *bs, Error **errp); +static void preallocate_drop_resize_bh(void *opaque); + #define PREALLOCATE_OPT_PREALLOC_ALIGN "prealloc-align" #define PREALLOCATE_OPT_PREALLOC_SIZE "prealloc-size" static QemuOptsList runtime_opts = { @@ -142,6 +148,7 @@ static int preallocate_open(BlockDriverState *bs, QDict *options, int flags, * For this to work, mark them invalid. */ s->file_end = s->zero_start = s->data_end = -EINVAL; + s->drop_resize_bh = qemu_bh_new(preallocate_drop_resize_bh, bs); ret = bdrv_open_file_child(NULL, options, "file", bs, errp); if (ret < 0) { @@ -162,26 +169,42 @@ static int preallocate_open(BlockDriverState *bs, QDict *options, int flags, return 0; } -static void preallocate_close(BlockDriverState *bs) +static int preallocate_truncate_to_real_size(BlockDriverState *bs, Error **errp) { - int ret; BDRVPreallocateState *s = bs->opaque; - - if (s->data_end < 0) { - return; - } + int ret; if (s->file_end < 0) { s->file_end = bdrv_getlength(bs->file->bs); if (s->file_end < 0) { - return; + error_setg_errno(errp, -s->file_end, "Failed to get file length"); + return s->file_end; } } if (s->data_end < s->file_end) { ret = bdrv_truncate(bs->file, s->data_end, true, PREALLOC_MODE_OFF, 0, NULL); - s->file_end = ret < 0 ? ret : s->data_end; + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to drop preallocation"); + s->file_end = ret; + return ret; + } + s->file_end = s->data_end; + } + + return 0; +} + +static void preallocate_close(BlockDriverState *bs) +{ + BDRVPreallocateState *s = bs->opaque; + + qemu_bh_cancel(s->drop_resize_bh); + qemu_bh_delete(s->drop_resize_bh); + + if (s->data_end >= 0) { + preallocate_truncate_to_real_size(bs, NULL); } } @@ -198,6 +221,7 @@ static int preallocate_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, Error **errp) { PreallocateOpts *opts = g_new0(PreallocateOpts, 1); + int ret; if (!preallocate_absorb_opts(opts, reopen_state->options, reopen_state->bs->file->bs, errp)) { @@ -205,6 +229,19 @@ static int preallocate_reopen_prepare(BDRVReopenState *reopen_state, return -EINVAL; } + /* + * Drop the preallocation already here if reopening read-only. The child + * might also be reopened read-only and then scheduling a BH during the + * permission update is too late. + */ + if ((reopen_state->flags & BDRV_O_RDWR) == 0) { + ret = preallocate_drop_resize(reopen_state->bs, errp); + if (ret < 0) { + g_free(opts); + return ret; + } + } + reopen_state->opaque = opts; return 0; @@ -462,58 +499,61 @@ preallocate_co_getlength(BlockDriverState *bs) return ret; } -static int preallocate_check_perm(BlockDriverState *bs, - uint64_t perm, uint64_t shared, Error **errp) +static int preallocate_drop_resize(BlockDriverState *bs, Error **errp) { BDRVPreallocateState *s = bs->opaque; + int ret; - if (s->data_end >= 0 && !can_write_resize(perm)) { - /* - * Lose permissions. - * We should truncate in check_perm, as in set_perm bs->file->perm will - * be already changed, and we should not violate it. - */ - if (s->file_end < 0) { - s->file_end = bdrv_getlength(bs->file->bs); - if (s->file_end < 0) { - error_setg(errp, "Failed to get file length"); - return s->file_end; - } - } + if (s->data_end < 0) { + return 0; + } - if (s->data_end < s->file_end) { - int ret = bdrv_truncate(bs->file, s->data_end, true, - PREALLOC_MODE_OFF, 0, NULL); - if (ret < 0) { - error_setg(errp, "Failed to drop preallocation"); - s->file_end = ret; - return ret; - } - s->file_end = s->data_end; - } + /* + * Before switching children to be read-only, truncate them to remove + * the preallocation and let them have the real size. + */ + ret = preallocate_truncate_to_real_size(bs, errp); + if (ret < 0) { + return ret; } + /* + * We'll drop our permissions and will allow other users to take write and + * resize permissions (see preallocate_child_perm). Anyone will be able to + * change the child, so mark all states invalid. We'll regain control if a + * parent requests write access again. + */ + s->data_end = s->file_end = s->zero_start = -EINVAL; + + bdrv_graph_rdlock_main_loop(); + bdrv_child_refresh_perms(bs, bs->file, NULL); + bdrv_graph_rdunlock_main_loop(); + return 0; } +static void preallocate_drop_resize_bh(void *opaque) +{ + /* + * In case of errors, we'll simply keep the exclusive lock on the image + * indefinitely. + */ + preallocate_drop_resize(opaque, NULL); +} + static void preallocate_set_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared) { BDRVPreallocateState *s = bs->opaque; if (can_write_resize(perm)) { + qemu_bh_cancel(s->drop_resize_bh); if (s->data_end < 0) { s->data_end = s->file_end = s->zero_start = - bdrv_getlength(bs->file->bs); + bs->file->bs->total_sectors * BDRV_SECTOR_SIZE; } } else { - /* - * We drop our permissions, as well as allow shared - * permissions (see preallocate_child_perm), anyone will be able to - * change the child, so mark all states invalid. We'll regain control if - * get good permissions back. - */ - s->data_end = s->file_end = s->zero_start = -EINVAL; + qemu_bh_schedule(s->drop_resize_bh); } } @@ -521,10 +561,16 @@ static void preallocate_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role, BlockReopenQueue *reopen_queue, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) { + BDRVPreallocateState *s = bs->opaque; + bdrv_default_perms(bs, c, role, reopen_queue, perm, shared, nperm, nshared); - if (can_write_resize(perm)) { - /* This should come by default, but let's enforce: */ + /* + * We need exclusive write and resize permissions on the child not only when + * the parent can write to it, but also after the parent gave up write + * permissions until preallocate_drop_resize() has completed. + */ + if (can_write_resize(perm) || s->data_end != -EINVAL) { *nperm |= BLK_PERM_WRITE | BLK_PERM_RESIZE; /* @@ -554,7 +600,6 @@ static BlockDriver bdrv_preallocate_filter = { .bdrv_co_flush = preallocate_co_flush, .bdrv_co_truncate = preallocate_co_truncate, - .bdrv_check_perm = preallocate_check_perm, .bdrv_set_perm = preallocate_set_perm, .bdrv_child_perm = preallocate_child_perm, diff --git a/block/qcow.c b/block/qcow.c index 577bd70324..d56d24ab6d 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -549,7 +549,10 @@ qcow_co_block_status(BlockDriverState *bs, bool want_zero, if (!cluster_offset) { return 0; } - if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->crypto) { + if (cluster_offset & QCOW_OFLAG_COMPRESSED) { + return BDRV_BLOCK_DATA | BDRV_BLOCK_COMPRESSED; + } + if (s->crypto) { return BDRV_BLOCK_DATA; } *map = cluster_offset | index_in_cluster; diff --git a/block/qcow2.c b/block/qcow2.c index b48cd9ce63..af43d59d76 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1880,7 +1880,7 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, g_free(s->image_data_file); if (open_data_file && has_data_file(bs)) { bdrv_graph_co_rdunlock(); - bdrv_unref_child(bs, s->data_file); + bdrv_co_unref_child(bs, s->data_file); bdrv_graph_co_rdlock(); s->data_file = NULL; } @@ -2162,6 +2162,9 @@ qcow2_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset, { status |= BDRV_BLOCK_RECURSE; } + if (type == QCOW2_SUBCLUSTER_COMPRESSED) { + status |= BDRV_BLOCK_COMPRESSED; + } return status; } @@ -2790,7 +2793,9 @@ static void qcow2_do_close(BlockDriverState *bs, bool close_data_file) g_free(s->image_backing_format); if (close_data_file && has_data_file(bs)) { + bdrv_graph_wrlock(NULL); bdrv_unref_child(bs, s->data_file); + bdrv_graph_wrunlock(); s->data_file = NULL; } diff --git a/block/quorum.c b/block/quorum.c index f28758cf2b..05220cab7f 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -1037,12 +1037,14 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, close_exit: /* cleanup on error */ + bdrv_graph_wrlock(NULL); for (i = 0; i < s->num_children; i++) { if (!opened[i]) { continue; } bdrv_unref_child(bs, s->children[i]); } + bdrv_graph_wrunlock(); g_free(s->children); g_free(opened); exit: @@ -1055,15 +1057,17 @@ static void quorum_close(BlockDriverState *bs) BDRVQuorumState *s = bs->opaque; int i; + bdrv_graph_wrlock(NULL); for (i = 0; i < s->num_children; i++) { bdrv_unref_child(bs, s->children[i]); } + bdrv_graph_wrunlock(); g_free(s->children); } -static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs, - Error **errp) +static void GRAPH_WRLOCK +quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs, Error **errp) { BDRVQuorumState *s = bs->opaque; BdrvChild *child; @@ -1089,8 +1093,6 @@ static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs, } s->next_child_index++; - bdrv_drained_begin(bs); - /* We can safely add the child now */ bdrv_ref(child_bs); @@ -1098,18 +1100,15 @@ static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs, BDRV_CHILD_DATA, errp); if (child == NULL) { s->next_child_index--; - goto out; + return; } s->children = g_renew(BdrvChild *, s->children, s->num_children + 1); s->children[s->num_children++] = child; quorum_refresh_flags(bs); - -out: - bdrv_drained_end(bs); } -static void quorum_del_child(BlockDriverState *bs, BdrvChild *child, - Error **errp) +static void GRAPH_WRLOCK +quorum_del_child(BlockDriverState *bs, BdrvChild *child, Error **errp) { BDRVQuorumState *s = bs->opaque; char indexstr[INDEXSTR_LEN]; @@ -1139,16 +1138,14 @@ static void quorum_del_child(BlockDriverState *bs, BdrvChild *child, s->next_child_index--; } - bdrv_drained_begin(bs); - /* We can safely remove this child now */ memmove(&s->children[i], &s->children[i + 1], (s->num_children - i - 1) * sizeof(BdrvChild *)); s->children = g_renew(BdrvChild *, s->children, --s->num_children); + bdrv_unref_child(bs, child); quorum_refresh_flags(bs); - bdrv_drained_end(bs); } static void quorum_gather_child_options(BlockDriverState *bs, QDict *target, diff --git a/block/replication.c b/block/replication.c index ea4bf1aa80..dd166d2d82 100644 --- a/block/replication.c +++ b/block/replication.c @@ -542,12 +542,15 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, return; } + bdrv_graph_wrlock(bs); + bdrv_ref(hidden_disk->bs); s->hidden_disk = bdrv_attach_child(bs, hidden_disk->bs, "hidden disk", &child_of_bds, BDRV_CHILD_DATA, &local_err); if (local_err) { error_propagate(errp, local_err); + bdrv_graph_wrunlock(); aio_context_release(aio_context); return; } @@ -558,10 +561,13 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, BDRV_CHILD_DATA, &local_err); if (local_err) { error_propagate(errp, local_err); + bdrv_graph_wrunlock(); aio_context_release(aio_context); return; } + bdrv_graph_wrunlock(); + /* start backup job now */ error_setg(&s->blocker, "Block device is in use by internal backup job"); @@ -666,10 +672,13 @@ static void replication_done(void *opaque, int ret) if (ret == 0) { s->stage = BLOCK_REPLICATION_DONE; + bdrv_graph_wrlock(NULL); bdrv_unref_child(bs, s->secondary_disk); s->secondary_disk = NULL; bdrv_unref_child(bs, s->hidden_disk); s->hidden_disk = NULL; + bdrv_graph_wrunlock(); + s->error = 0; } else { s->stage = BLOCK_REPLICATION_FAILOVER_FAILED; diff --git a/block/snapshot.c b/block/snapshot.c index e22ac3eac6..b86b5b24ad 100644 --- a/block/snapshot.c +++ b/block/snapshot.c @@ -281,7 +281,9 @@ int bdrv_snapshot_goto(BlockDriverState *bs, } /* .bdrv_open() will re-attach it */ + bdrv_graph_wrlock(NULL); bdrv_unref_child(bs, fallback); + bdrv_graph_wrunlock(); ret = bdrv_snapshot_goto(fallback_bs, snapshot_id, errp); open_ret = drv->bdrv_open(bs, options, bs->open_flags, &local_err); diff --git a/block/stream.c b/block/stream.c index e522bbdec5..e4da214f1f 100644 --- a/block/stream.c +++ b/block/stream.c @@ -54,6 +54,7 @@ static int stream_prepare(Job *job) { StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs); + BlockDriverState *unfiltered_bs_cow = bdrv_cow_bs(unfiltered_bs); BlockDriverState *base; BlockDriverState *unfiltered_base; Error *local_err = NULL; @@ -64,13 +65,18 @@ static int stream_prepare(Job *job) s->cor_filter_bs = NULL; /* - * bdrv_set_backing_hd() requires that unfiltered_bs is drained. Drain - * already here and use bdrv_set_backing_hd_drained() instead because - * the polling during drained_begin() might change the graph, and if we do - * this only later, we may end up working with the wrong base node (or it - * might even have gone away by the time we want to use it). + * bdrv_set_backing_hd() requires that the unfiltered_bs and the COW child + * of unfiltered_bs is drained. Drain already here and use + * bdrv_set_backing_hd_drained() instead because the polling during + * drained_begin() might change the graph, and if we do this only later, we + * may end up working with the wrong base node (or it might even have gone + * away by the time we want to use it). */ bdrv_drained_begin(unfiltered_bs); + if (unfiltered_bs_cow) { + bdrv_ref(unfiltered_bs_cow); + bdrv_drained_begin(unfiltered_bs_cow); + } base = bdrv_filter_or_cow_bs(s->above_base); unfiltered_base = bdrv_skip_filters(base); @@ -100,6 +106,10 @@ static int stream_prepare(Job *job) } out: + if (unfiltered_bs_cow) { + bdrv_drained_end(unfiltered_bs_cow); + bdrv_unref(unfiltered_bs_cow); + } bdrv_drained_end(unfiltered_bs); return ret; } diff --git a/block/vmdk.c b/block/vmdk.c index 58ce290e9c..e90649c8bf 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -272,6 +272,7 @@ static void vmdk_free_extents(BlockDriverState *bs) BDRVVmdkState *s = bs->opaque; VmdkExtent *e; + bdrv_graph_wrlock(NULL); for (i = 0; i < s->num_extents; i++) { e = &s->extents[i]; g_free(e->l1_table); @@ -282,6 +283,8 @@ static void vmdk_free_extents(BlockDriverState *bs) bdrv_unref_child(bs, e->file); } } + bdrv_graph_wrunlock(); + g_free(s->extents); } @@ -1220,7 +1223,9 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, ret = vmdk_add_extent(bs, extent_file, true, sectors, 0, 0, 0, 0, 0, &extent, errp); if (ret < 0) { + bdrv_graph_wrlock(NULL); bdrv_unref_child(bs, extent_file); + bdrv_graph_wrunlock(); goto out; } extent->flat_start_offset = flat_offset << 9; @@ -1235,20 +1240,26 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, } g_free(buf); if (ret) { + bdrv_graph_wrlock(NULL); bdrv_unref_child(bs, extent_file); + bdrv_graph_wrunlock(); goto out; } extent = &s->extents[s->num_extents - 1]; } else if (!strcmp(type, "SESPARSE")) { ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp); if (ret) { + bdrv_graph_wrlock(NULL); bdrv_unref_child(bs, extent_file); + bdrv_graph_wrunlock(); goto out; } extent = &s->extents[s->num_extents - 1]; } else { error_setg(errp, "Unsupported extent type '%s'", type); + bdrv_graph_wrlock(NULL); bdrv_unref_child(bs, extent_file); + bdrv_graph_wrunlock(); ret = -ENOTSUP; goto out; } @@ -1309,6 +1320,8 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags, BDRVVmdkState *s = bs->opaque; uint32_t magic; + GRAPH_RDLOCK_GUARD_MAINLOOP(); + ret = bdrv_open_file_child(NULL, options, "file", bs, errp); if (ret < 0) { return ret; @@ -1770,6 +1783,8 @@ vmdk_co_block_status(BlockDriverState *bs, bool want_zero, if (extent->flat) { ret |= BDRV_BLOCK_RECURSE; } + } else { + ret |= BDRV_BLOCK_COMPRESSED; } *file = extent->file->bs; break; |