From 5e8ac21717373cbe96ef7a91e216bf5788815d63 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:40:58 +0100 Subject: block: Revert .bdrv_drained_begin/end to non-coroutine_fn Polling during bdrv_drained_end() can be problematic (and in the future, we may get cases for bdrv_drained_begin() where polling is forbidden, and we don't care about already in-flight requests, but just want to prevent new requests from arriving). The .bdrv_drained_begin/end callbacks running in a coroutine is the only reason why we have to do this polling, so make them non-coroutine callbacks again. None of the callers actually yield any more. This means that bdrv_drained_end() effectively doesn't poll any more, even if AIO_WAIT_WHILE() loops are still there (their condition is false from the beginning). This is generally not a problem, but in test-bdrv-drain, some additional explicit aio_poll() calls need to be added because the test case wants to verify the final state after BHs have executed. Signed-off-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Emanuele Giuseppe Esposito Reviewed-by: Hanna Reitz Message-Id: <20221118174110.55183-4-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- include/block/block_int-common.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index 31ae91e56e..40d646d1ed 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -735,17 +735,19 @@ struct BlockDriver { void (*bdrv_io_unplug)(BlockDriverState *bs); /** - * bdrv_co_drain_begin is called if implemented in the beginning of a + * bdrv_drain_begin is called if implemented in the beginning of a * drain operation to drain and stop any internal sources of requests in * the driver. - * bdrv_co_drain_end is called if implemented at the end of the drain. + * bdrv_drain_end is called if implemented at the end of the drain. * * They should be used by the driver to e.g. manage scheduled I/O * requests, or toggle an internal state. After the end of the drain new * requests will continue normally. + * + * Implementations of both functions must not call aio_poll(). */ - void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs); - void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs); + void (*bdrv_drain_begin)(BlockDriverState *bs); + void (*bdrv_drain_end)(BlockDriverState *bs); bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs); bool coroutine_fn (*bdrv_co_can_store_new_dirty_bitmap)( -- cgit v1.2.3 From 2f65df6e16dea2d6e7212fa675f4779d9281e26f Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:40:59 +0100 Subject: block: Remove drained_end_counter drained_end_counter is unused now, nobody changes its value any more. It can be removed. In cases where we had two almost identical functions that only differed in whether the caller passes drained_end_counter, or whether they would poll for a local drained_end_counter to reach 0, these become a single function. Signed-off-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Emanuele Giuseppe Esposito Message-Id: <20221118174110.55183-5-kwolf@redhat.com> Reviewed-by: Hanna Reitz Signed-off-by: Kevin Wolf --- include/block/block-io.h | 24 ------------------------ include/block/block_int-common.h | 6 +----- 2 files changed, 1 insertion(+), 29 deletions(-) (limited to 'include') diff --git a/include/block/block-io.h b/include/block/block-io.h index b099d7db45..054e964c9b 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -237,21 +237,6 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset, int64_t bytes, BdrvRequestFlags read_flags, BdrvRequestFlags write_flags); -/** - * bdrv_drained_end_no_poll: - * - * Same as bdrv_drained_end(), but do not poll for the subgraph to - * actually become unquiesced. Therefore, no graph changes will occur - * with this function. - * - * *drained_end_counter is incremented for every background operation - * that is scheduled, and will be decremented for every operation once - * it settles. The caller must poll until it reaches 0. The counter - * should be accessed using atomic operations only. - */ -void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter); - - /* * "I/O or GS" API functions. These functions can run without * the BQL, but only in one specific iothread/main loop. @@ -311,9 +296,6 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); * bdrv_parent_drained_end_single: * * End a quiesced section for the parent of @c. - * - * This polls @bs's AioContext until all scheduled sub-drained_ends - * have settled, which may result in graph changes. */ void bdrv_parent_drained_end_single(BdrvChild *c); @@ -361,12 +343,6 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs); * bdrv_drained_end: * * End a quiescent section started by bdrv_drained_begin(). - * - * This polls @bs's AioContext until all scheduled sub-drained_ends - * have settled. On one hand, that may result in graph changes. On - * the other, this requires that the caller either runs in the main - * loop; or that all involved nodes (@bs and all of its parents) are - * in the caller's AioContext. */ void bdrv_drained_end(BlockDriverState *bs); diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index 40d646d1ed..2b97576f6d 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -939,15 +939,11 @@ struct BdrvChildClass { * These functions must not change the graph (and therefore also must not * call aio_poll(), which could change the graph indirectly). * - * If drained_end() schedules background operations, it must atomically - * increment *drained_end_counter for each such operation and atomically - * decrement it once the operation has settled. - * * Note that this can be nested. If drained_begin() was called twice, new * I/O is allowed only after drained_end() was called twice, too. */ void (*drained_begin)(BdrvChild *child); - void (*drained_end)(BdrvChild *child, int *drained_end_counter); + void (*drained_end)(BdrvChild *child); /* * Returns whether the parent has pending requests for the child. This -- cgit v1.2.3 From 92140b9f3f07d80e2c27edcc6e32f392be2135e6 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:41:04 +0100 Subject: stream: Replace subtree drain with a single node drain The subtree drain was introduced in commit b1e1af394d9 as a way to avoid graph changes between finding the base node and changing the block graph as necessary on completion of the image streaming job. The block graph could change between these two points because bdrv_set_backing_hd() first drains the parent node, which involved polling and can do anything. Subtree draining was an imperfect way to make this less likely (because with it, fewer callbacks are called during this window). Everyone agreed that it's not really the right solution, and it was only committed as a stopgap solution. This replaces the subtree drain with a solution that simply drains the parent node before we try to find the base node, and then call a version of bdrv_set_backing_hd() that doesn't drain, but just asserts that the parent node is already drained. This way, any graph changes caused by draining happen before we start looking at the graph and things stay consistent between finding the base node and changing the graph. Signed-off-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Hanna Reitz Message-Id: <20221118174110.55183-10-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- include/block/block-global-state.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h index c7bd4a2088..00e0cf8aea 100644 --- a/include/block/block-global-state.h +++ b/include/block/block-global-state.h @@ -82,6 +82,9 @@ int bdrv_open_file_child(const char *filename, BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp); int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, Error **errp); +int bdrv_set_backing_hd_drained(BlockDriverState *bs, + BlockDriverState *backing_hd, + Error **errp); int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, const char *bdref_key, Error **errp); BlockDriverState *bdrv_open(const char *filename, const char *reference, -- cgit v1.2.3 From 299403aedaeb7f08d8e98aa8614b29d4e5546066 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:41:05 +0100 Subject: block: Remove subtree drains Subtree drains are not used any more. Remove them. After this, BdrvChildClass.attach/detach() don't poll any more. Signed-off-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Hanna Reitz Message-Id: <20221118174110.55183-11-kwolf@redhat.com> Signed-off-by: Kevin Wolf --- include/block/block-io.h | 18 +++--------------- include/block/block_int-common.h | 1 - include/block/block_int-io.h | 12 ------------ 3 files changed, 3 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/block/block-io.h b/include/block/block-io.h index 054e964c9b..9c36a16a1f 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -302,8 +302,7 @@ void bdrv_parent_drained_end_single(BdrvChild *c); /** * bdrv_drain_poll: * - * Poll for pending requests in @bs, its parents (except for @ignore_parent), - * and if @recursive is true its children as well (used for subtree drain). + * Poll for pending requests in @bs and its parents (except for @ignore_parent). * * If @ignore_bds_parents is true, parents that are BlockDriverStates must * ignore the drain request because they will be drained separately (used for @@ -311,8 +310,8 @@ void bdrv_parent_drained_end_single(BdrvChild *c); * * This is part of bdrv_drained_begin. */ -bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, - BdrvChild *ignore_parent, bool ignore_bds_parents); +bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent, + bool ignore_bds_parents); /** * bdrv_drained_begin: @@ -333,12 +332,6 @@ void bdrv_drained_begin(BlockDriverState *bs); void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent, bool ignore_bds_parents); -/** - * Like bdrv_drained_begin, but recursively begins a quiesced section for - * exclusive access to all child nodes as well. - */ -void bdrv_subtree_drained_begin(BlockDriverState *bs); - /** * bdrv_drained_end: * @@ -346,9 +339,4 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs); */ void bdrv_drained_end(BlockDriverState *bs); -/** - * End a quiescent section started by bdrv_subtree_drained_begin(). - */ -void bdrv_subtree_drained_end(BlockDriverState *bs); - #endif /* BLOCK_IO_H */ diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index 2b97576f6d..791dddfd7d 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -1184,7 +1184,6 @@ struct BlockDriverState { /* Accessed with atomic ops. */ int quiesce_counter; - int recursive_quiesce_counter; unsigned int write_gen; /* Current data generation */ diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h index 4b0b3e17ef..8bc061ebb8 100644 --- a/include/block/block_int-io.h +++ b/include/block/block_int-io.h @@ -179,16 +179,4 @@ void bdrv_bsc_invalidate_range(BlockDriverState *bs, */ void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes); - -/* - * "I/O or GS" API functions. These functions can run without - * the BQL, but only in one specific iothread/main loop. - * - * See include/block/block-io.h for more information about - * the "I/O or GS" API. - */ - -void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent); -void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); - #endif /* BLOCK_INT_IO_H */ -- cgit v1.2.3 From 57e05be343f33f4e5899a8d8946a8596d68424a1 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:41:06 +0100 Subject: block: Call drain callbacks only once We only need to call both the BlockDriver's callback and the parent callbacks when going from undrained to drained or vice versa. A second drain section doesn't make a difference for the driver or the parent, they weren't supposed to send new requests before and after the second drain. One thing that gets in the way is the 'ignore_bds_parents' parameter in bdrv_do_drained_begin_quiesce() and bdrv_do_drained_end(): It means that bdrv_drain_all_begin() increases bs->quiesce_counter, but does not quiesce the parent through BdrvChildClass callbacks. If an additional drain section is started now, bs->quiesce_counter will be non-zero, but we would still need to quiesce the parent through BdrvChildClass in order to keep things consistent (and unquiesce it on the matching bdrv_drained_end(), even though the counter would not reach 0 yet as long as the bdrv_drain_all() section is still active). Instead of keeping track of this, let's just get rid of the parameter. It was introduced in commit 6cd5c9d7b2d as an optimisation so that during bdrv_drain_all(), we wouldn't recursively drain all parents up to the root for each node, resulting in quadratic complexity. As it happens, calling the callbacks only once solves the same problem, so as of this patch, we'll still have O(n) complexity and ignore_bds_parents is not needed any more. This patch only ignores the 'ignore_bds_parents' parameter. It will be removed in a separate patch. Signed-off-by: Kevin Wolf Reviewed-by: Hanna Reitz Message-Id: <20221118174110.55183-12-kwolf@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- include/block/block_int-common.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index 791dddfd7d..a6bc6b7fe9 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -980,13 +980,13 @@ struct BdrvChild { bool frozen; /* - * How many times the parent of this child has been drained + * True if the parent of this child has been drained by this BdrvChild * (through klass->drained_*). - * Usually, this is equal to bs->quiesce_counter (potentially - * reduced by bdrv_drain_all_count). It may differ while the + * + * It is generally true if bs->quiesce_counter > 0. It may differ while the * child is entering or leaving a drained section. */ - int parent_quiesce_counter; + bool quiesced_parent; QLIST_ENTRY(BdrvChild) next; QLIST_ENTRY(BdrvChild) next_parent; -- cgit v1.2.3 From a82a3bd135078d14f1bb4b5e50f51e77d3748270 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:41:07 +0100 Subject: block: Remove ignore_bds_parents parameter from drain_begin/end. ignore_bds_parents is now ignored during drain_begin and drain_end, so we can just remove it there. It is still a valid optimisation for drain_all in bdrv_drained_poll(), so leave it around there. Signed-off-by: Kevin Wolf Message-Id: <20221118174110.55183-13-kwolf@redhat.com> Reviewed-by: Hanna Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- include/block/block-io.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/block/block-io.h b/include/block/block-io.h index 9c36a16a1f..8f5e75756a 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -329,8 +329,7 @@ void bdrv_drained_begin(BlockDriverState *bs); * Quiesces a BDS like bdrv_drained_begin(), but does not wait for already * running requests to complete. */ -void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, - BdrvChild *parent, bool ignore_bds_parents); +void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent); /** * bdrv_drained_end: -- cgit v1.2.3 From 23987471285a26397e3152a9244b652445fd36c4 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:41:09 +0100 Subject: block: Don't poll in bdrv_replace_child_noperm() In order to make sure that bdrv_replace_child_noperm() doesn't have to poll any more, get rid of the bdrv_parent_drained_begin_single() call. This is possible now because we can require that the parent is already drained through the child in question when the function is called and we don't call the parent drain callbacks more than once. The additional drain calls needed in callers cause the test case to run its code in the drain handler too early (bdrv_attach_child() drains now), so modify it to only enable the code after the test setup has completed. Signed-off-by: Kevin Wolf Message-Id: <20221118174110.55183-15-kwolf@redhat.com> Reviewed-by: Hanna Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- include/block/block-io.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/block/block-io.h b/include/block/block-io.h index 8f5e75756a..65e6d2569b 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -292,6 +292,14 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); */ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); +/** + * bdrv_parent_drained_poll_single: + * + * Returns true if there is any pending activity to cease before @c can be + * called quiesced, false otherwise. + */ +bool bdrv_parent_drained_poll_single(BdrvChild *c); + /** * bdrv_parent_drained_end_single: * -- cgit v1.2.3 From 606ed756c1d69cba4822be8923248d2fd714f069 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 18 Nov 2022 18:41:10 +0100 Subject: block: Remove poll parameter from bdrv_parent_drained_begin_single() All callers of bdrv_parent_drained_begin_single() pass poll=false now, so we don't need the parameter any more. Signed-off-by: Kevin Wolf Message-Id: <20221118174110.55183-16-kwolf@redhat.com> Reviewed-by: Hanna Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- include/block/block-io.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/block/block-io.h b/include/block/block-io.h index 65e6d2569b..92aaa7c1e9 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -287,10 +287,9 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); /** * bdrv_parent_drained_begin_single: * - * Begin a quiesced section for the parent of @c. If @poll is true, wait for - * any pending activity to cease. + * Begin a quiesced section for the parent of @c. */ -void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); +void bdrv_parent_drained_begin_single(BdrvChild *c); /** * bdrv_parent_drained_poll_single: -- cgit v1.2.3 From 7b52a921c12c01be3b2ce331081dd9accea99948 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 28 Nov 2022 09:23:24 -0500 Subject: block-io: introduce coroutine_fn duplicates for bdrv_common_block_status_above callers bdrv_common_block_status_above() is a g_c_w, and it is being called by many "wrapper" functions like bdrv_is_allocated(), bdrv_is_allocated_above() and bdrv_block_status_above(). Because we want to eventually split the coroutine from non-coroutine case in g_c_w, create duplicate wrappers that take care of directly calling the same coroutine functions called in the g_c_w. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20221128142337.657646-2-eesposit@redhat.com> Signed-off-by: Kevin Wolf --- include/block/block-io.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/block/block-io.h b/include/block/block-io.h index 92aaa7c1e9..72919254cd 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -94,14 +94,29 @@ bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs); int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file); + +int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs, + BlockDriverState *base, + int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, + BlockDriverState **file); int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base, int64_t offset, int64_t bytes, int64_t *pnum, int64_t *map, BlockDriverState **file); + +int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset, + int64_t bytes, int64_t *pnum); int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes, int64_t *pnum); + +int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top, + BlockDriverState *base, + bool include_base, int64_t offset, + int64_t bytes, int64_t *pnum); int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base, bool include_base, int64_t offset, int64_t bytes, int64_t *pnum); + int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset, int64_t bytes); -- cgit v1.2.3 From 43a0d4f08b7a7bae90c0753db2b49441ef3e7f6e Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 28 Nov 2022 09:23:25 -0500 Subject: block-copy: add coroutine_fn annotations These functions end up calling bdrv_common_block_status_above(), a generated_co_wrapper function. In addition, they also happen to be always called in coroutine context, meaning all callers are coroutine_fn. This means that the g_c_w function will enter the qemu_in_coroutine() case and eventually suspend (or in other words call qemu_coroutine_yield()). Therefore we can mark such functions coroutine_fn too. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Paolo Bonzini Reviewed-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20221128142337.657646-3-eesposit@redhat.com> Signed-off-by: Kevin Wolf --- include/block/block-copy.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/block/block-copy.h b/include/block/block-copy.h index ba0b425d78..8cea4f9b90 100644 --- a/include/block/block-copy.h +++ b/include/block/block-copy.h @@ -36,8 +36,9 @@ void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm); void block_copy_state_free(BlockCopyState *s); void block_copy_reset(BlockCopyState *s, int64_t offset, int64_t bytes); -int64_t block_copy_reset_unallocated(BlockCopyState *s, - int64_t offset, int64_t *count); +int64_t coroutine_fn block_copy_reset_unallocated(BlockCopyState *s, + int64_t offset, + int64_t *count); int coroutine_fn block_copy(BlockCopyState *s, int64_t offset, int64_t bytes, bool ignore_ratelimit, uint64_t timeout_ns, -- cgit v1.2.3 From ff7e261bb91ecf44df12a551582187ddf6b187fe Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 28 Nov 2022 09:23:27 -0500 Subject: block-backend: replace bdrv_*_above with blk_*_above Avoid mixing bdrv_* functions with blk_*, so create blk_* counterparts for bdrv_block_status_above and bdrv_is_allocated_above. Note that since blk_co_block_status_above only calls the g_c_w function bdrv_common_block_status_above and is marked as coroutine_fn, call directly bdrv_co_common_block_status_above() to avoid using a g_c_w. Same applies to blk_co_is_allocated_above. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20221128142337.657646-5-eesposit@redhat.com> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- include/sysemu/block-backend-io.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/sysemu/block-backend-io.h b/include/sysemu/block-backend-io.h index 50f5aa2e07..ee3eb12610 100644 --- a/include/sysemu/block-backend-io.h +++ b/include/sysemu/block-backend-io.h @@ -92,6 +92,15 @@ int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in, int64_t bytes, BdrvRequestFlags read_flags, BdrvRequestFlags write_flags); +int coroutine_fn blk_co_block_status_above(BlockBackend *blk, + BlockDriverState *base, + int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, + BlockDriverState **file); +int coroutine_fn blk_co_is_allocated_above(BlockBackend *blk, + BlockDriverState *base, + bool include_base, int64_t offset, + int64_t bytes, int64_t *pnum); /* * "I/O or GS" API functions. These functions can run without -- cgit v1.2.3 From 2475a0d0f4b0b450f25f7672c7072b4fdae6df00 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 28 Nov 2022 09:23:31 -0500 Subject: block: bdrv_create_file is a coroutine_fn It is always called in coroutine_fn callbacks, therefore it can directly call bdrv_co_create(). Rename it to bdrv_co_create_file too. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20221128142337.657646-9-eesposit@redhat.com> Signed-off-by: Kevin Wolf --- include/block/block-global-state.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h index 00e0cf8aea..387a7cbb2e 100644 --- a/include/block/block-global-state.h +++ b/include/block/block-global-state.h @@ -57,7 +57,8 @@ BlockDriver *bdrv_find_protocol(const char *filename, BlockDriver *bdrv_find_format(const char *format_name); int bdrv_create(BlockDriver *drv, const char* filename, QemuOpts *opts, Error **errp); -int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp); +int coroutine_fn bdrv_co_create_file(const char *filename, QemuOpts *opts, + Error **errp); BlockDriverState *bdrv_new(void); int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, -- cgit v1.2.3 From 1bd542016cc2c132e3d52dbc9e663966dfc10e72 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 28 Nov 2022 09:23:32 -0500 Subject: block: rename generated_co_wrapper in co_wrapper_mixed In preparation to the incoming new function specifiers, rename g_c_w with a more meaningful name and document it. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20221128142337.657646-10-eesposit@redhat.com> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- include/block/block-common.h | 11 ++++--- include/block/block-io.h | 44 ++++++++++++------------- include/sysemu/block-backend-io.h | 68 +++++++++++++++++++-------------------- 3 files changed, 63 insertions(+), 60 deletions(-) (limited to 'include') diff --git a/include/block/block-common.h b/include/block/block-common.h index 297704c1e9..ec2309055b 100644 --- a/include/block/block-common.h +++ b/include/block/block-common.h @@ -35,14 +35,17 @@ #include "qemu/transactions.h" /* - * generated_co_wrapper + * co_wrapper{*}: Function specifiers used by block-coroutine-wrapper.py * - * Function specifier, which does nothing but mark functions to be + * Function specifiers, which do nothing but mark functions to be * generated by scripts/block-coroutine-wrapper.py * - * Read more in docs/devel/block-coroutine-wrapper.rst + * Usage: read docs/devel/block-coroutine-wrapper.rst + * + * co_wrapper_mixed functions can be called by both coroutine and + * non-coroutine context. */ -#define generated_co_wrapper +#define co_wrapper_mixed /* block.c */ typedef struct BlockDriver BlockDriver; diff --git a/include/block/block-io.h b/include/block/block-io.h index 72919254cd..72cf45975b 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -39,19 +39,19 @@ * to catch when they are accidentally called by the wrong API. */ -int generated_co_wrapper bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, - int64_t bytes, - BdrvRequestFlags flags); +int co_wrapper_mixed bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, + int64_t bytes, + BdrvRequestFlags flags); int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags); -int generated_co_wrapper bdrv_pread(BdrvChild *child, int64_t offset, - int64_t bytes, void *buf, - BdrvRequestFlags flags); -int generated_co_wrapper bdrv_pwrite(BdrvChild *child, int64_t offset, - int64_t bytes, const void *buf, - BdrvRequestFlags flags); -int generated_co_wrapper bdrv_pwrite_sync(BdrvChild *child, int64_t offset, - int64_t bytes, const void *buf, - BdrvRequestFlags flags); +int co_wrapper_mixed bdrv_pread(BdrvChild *child, int64_t offset, + int64_t bytes, void *buf, + BdrvRequestFlags flags); +int co_wrapper_mixed bdrv_pwrite(BdrvChild *child, int64_t offset, + int64_t bytes, const void *buf, + BdrvRequestFlags flags); +int co_wrapper_mixed bdrv_pwrite_sync(BdrvChild *child, int64_t offset, + int64_t bytes, const void *buf, + BdrvRequestFlags flags); int coroutine_fn bdrv_co_pwrite_sync(BdrvChild *child, int64_t offset, int64_t bytes, const void *buf, BdrvRequestFlags flags); @@ -281,22 +281,22 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset, void bdrv_drain(BlockDriverState *bs); -int generated_co_wrapper +int co_wrapper_mixed bdrv_truncate(BdrvChild *child, int64_t offset, bool exact, PreallocMode prealloc, BdrvRequestFlags flags, Error **errp); -int generated_co_wrapper bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, - BdrvCheckMode fix); +int co_wrapper_mixed bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, + BdrvCheckMode fix); /* Invalidate any cached metadata used by image formats */ -int generated_co_wrapper bdrv_invalidate_cache(BlockDriverState *bs, - Error **errp); -int generated_co_wrapper bdrv_flush(BlockDriverState *bs); -int generated_co_wrapper bdrv_pdiscard(BdrvChild *child, int64_t offset, - int64_t bytes); -int generated_co_wrapper +int co_wrapper_mixed bdrv_invalidate_cache(BlockDriverState *bs, + Error **errp); +int co_wrapper_mixed bdrv_flush(BlockDriverState *bs); +int co_wrapper_mixed bdrv_pdiscard(BdrvChild *child, int64_t offset, + int64_t bytes); +int co_wrapper_mixed bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); -int generated_co_wrapper +int co_wrapper_mixed bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); /** diff --git a/include/sysemu/block-backend-io.h b/include/sysemu/block-backend-io.h index ee3eb12610..7ec6d978d4 100644 --- a/include/sysemu/block-backend-io.h +++ b/include/sysemu/block-backend-io.h @@ -110,77 +110,77 @@ int coroutine_fn blk_co_is_allocated_above(BlockBackend *blk, * the "I/O or GS" API. */ -int generated_co_wrapper blk_pread(BlockBackend *blk, int64_t offset, - int64_t bytes, void *buf, - BdrvRequestFlags flags); +int co_wrapper_mixed blk_pread(BlockBackend *blk, int64_t offset, + int64_t bytes, void *buf, + BdrvRequestFlags flags); int coroutine_fn blk_co_pread(BlockBackend *blk, int64_t offset, int64_t bytes, void *buf, BdrvRequestFlags flags); -int generated_co_wrapper blk_preadv(BlockBackend *blk, int64_t offset, - int64_t bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags); +int co_wrapper_mixed blk_preadv(BlockBackend *blk, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags); int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags); -int generated_co_wrapper blk_preadv_part(BlockBackend *blk, int64_t offset, - int64_t bytes, QEMUIOVector *qiov, - size_t qiov_offset, - BdrvRequestFlags flags); +int co_wrapper_mixed blk_preadv_part(BlockBackend *blk, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + size_t qiov_offset, + BdrvRequestFlags flags); int coroutine_fn blk_co_preadv_part(BlockBackend *blk, int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags); -int generated_co_wrapper blk_pwrite(BlockBackend *blk, int64_t offset, - int64_t bytes, const void *buf, - BdrvRequestFlags flags); +int co_wrapper_mixed blk_pwrite(BlockBackend *blk, int64_t offset, + int64_t bytes, const void *buf, + BdrvRequestFlags flags); int coroutine_fn blk_co_pwrite(BlockBackend *blk, int64_t offset, int64_t bytes, const void *buf, BdrvRequestFlags flags); -int generated_co_wrapper blk_pwritev(BlockBackend *blk, int64_t offset, - int64_t bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags); +int co_wrapper_mixed blk_pwritev(BlockBackend *blk, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags); int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags); -int generated_co_wrapper blk_pwritev_part(BlockBackend *blk, int64_t offset, - int64_t bytes, QEMUIOVector *qiov, - size_t qiov_offset, - BdrvRequestFlags flags); +int co_wrapper_mixed blk_pwritev_part(BlockBackend *blk, int64_t offset, + int64_t bytes, QEMUIOVector *qiov, + size_t qiov_offset, + BdrvRequestFlags flags); int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags); -int generated_co_wrapper blk_pwrite_compressed(BlockBackend *blk, - int64_t offset, int64_t bytes, - const void *buf); +int co_wrapper_mixed blk_pwrite_compressed(BlockBackend *blk, + int64_t offset, int64_t bytes, + const void *buf); int coroutine_fn blk_co_pwrite_compressed(BlockBackend *blk, int64_t offset, int64_t bytes, const void *buf); -int generated_co_wrapper blk_pwrite_zeroes(BlockBackend *blk, int64_t offset, - int64_t bytes, - BdrvRequestFlags flags); +int co_wrapper_mixed blk_pwrite_zeroes(BlockBackend *blk, int64_t offset, + int64_t bytes, + BdrvRequestFlags flags); int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset, int64_t bytes, BdrvRequestFlags flags); -int generated_co_wrapper blk_pdiscard(BlockBackend *blk, int64_t offset, - int64_t bytes); +int co_wrapper_mixed blk_pdiscard(BlockBackend *blk, int64_t offset, + int64_t bytes); int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes); -int generated_co_wrapper blk_flush(BlockBackend *blk); +int co_wrapper_mixed blk_flush(BlockBackend *blk); int coroutine_fn blk_co_flush(BlockBackend *blk); -int generated_co_wrapper blk_ioctl(BlockBackend *blk, unsigned long int req, - void *buf); +int co_wrapper_mixed blk_ioctl(BlockBackend *blk, unsigned long int req, + void *buf); int coroutine_fn blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf); -int generated_co_wrapper blk_truncate(BlockBackend *blk, int64_t offset, - bool exact, PreallocMode prealloc, - BdrvRequestFlags flags, Error **errp); +int co_wrapper_mixed blk_truncate(BlockBackend *blk, int64_t offset, + bool exact, PreallocMode prealloc, + BdrvRequestFlags flags, Error **errp); int coroutine_fn blk_co_truncate(BlockBackend *blk, int64_t offset, bool exact, PreallocMode prealloc, BdrvRequestFlags flags, Error **errp); -- cgit v1.2.3 From 76a2f554c1e7b8acb332f765034fdf0ab3525202 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 28 Nov 2022 09:23:33 -0500 Subject: block-coroutine-wrapper.py: introduce co_wrapper This new annotation starts just a function wrapper that creates a new coroutine. It assumes the caller is not a coroutine. It will be the default annotation to be used in the future. This is much better as c_w_mixed, because it is clear if the caller is a coroutine or not, and provides the advantage of automating the code creation. In the future all c_w_mixed functions will be substituted by co_wrapper. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20221128142337.657646-11-eesposit@redhat.com> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- include/block/block-common.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/block/block-common.h b/include/block/block-common.h index ec2309055b..847e4d4626 100644 --- a/include/block/block-common.h +++ b/include/block/block-common.h @@ -42,9 +42,13 @@ * * Usage: read docs/devel/block-coroutine-wrapper.rst * - * co_wrapper_mixed functions can be called by both coroutine and - * non-coroutine context. + * There are 2 kind of specifiers: + * - co_wrapper functions can be called by only non-coroutine context, because + * they always generate a new coroutine. + * - co_wrapper_mixed functions can be called by both coroutine and + * non-coroutine context. */ +#define co_wrapper #define co_wrapper_mixed /* block.c */ -- cgit v1.2.3 From 741443eb4301eb130dab812c7ae7cfd71a68a679 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 28 Nov 2022 09:23:36 -0500 Subject: block: convert bdrv_create to co_wrapper This function is never called in coroutine context, therefore instead of manually creating a new coroutine, delegate it to the block-coroutine-wrapper script, defining it as co_wrapper. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20221128142337.657646-14-eesposit@redhat.com> Signed-off-by: Kevin Wolf --- include/block/block-global-state.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h index 387a7cbb2e..1f8b54f2df 100644 --- a/include/block/block-global-state.h +++ b/include/block/block-global-state.h @@ -55,8 +55,12 @@ BlockDriver *bdrv_find_protocol(const char *filename, bool allow_protocol_prefix, Error **errp); BlockDriver *bdrv_find_format(const char *format_name); -int bdrv_create(BlockDriver *drv, const char* filename, - QemuOpts *opts, Error **errp); + +int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename, + QemuOpts *opts, Error **errp); +int co_wrapper bdrv_create(BlockDriver *drv, const char *filename, + QemuOpts *opts, Error **errp); + int coroutine_fn bdrv_co_create_file(const char *filename, QemuOpts *opts, Error **errp); -- cgit v1.2.3 From 0508d0be4b547be8da97c95ea9e1f433077dd2ec Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Mon, 28 Nov 2022 09:23:37 -0500 Subject: block/dirty-bitmap: convert coroutine-only functions to co_wrapper bdrv_can_store_new_dirty_bitmap and bdrv_remove_persistent_dirty_bitmap check if they are running in a coroutine, directly calling the coroutine callback if it's the case. Except that no coroutine calls such functions, therefore that check can be removed, and function creation can be offloaded to c_w. Signed-off-by: Emanuele Giuseppe Esposito Reviewed-by: Kevin Wolf Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20221128142337.657646-15-eesposit@redhat.com> Signed-off-by: Kevin Wolf --- include/block/block-common.h | 5 +++-- include/block/block-io.h | 10 ++++++++-- include/block/dirty-bitmap.h | 10 ++++++++-- 3 files changed, 19 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/block/block-common.h b/include/block/block-common.h index 847e4d4626..6cf603ab06 100644 --- a/include/block/block-common.h +++ b/include/block/block-common.h @@ -29,8 +29,6 @@ #include "qemu/iov.h" #include "qemu/coroutine.h" #include "block/accounting.h" -#include "block/dirty-bitmap.h" -#include "block/blockjob.h" #include "qemu/hbitmap.h" #include "qemu/transactions.h" @@ -51,6 +49,9 @@ #define co_wrapper #define co_wrapper_mixed +#include "block/dirty-bitmap.h" +#include "block/blockjob.h" + /* block.c */ typedef struct BlockDriver BlockDriver; typedef struct BdrvChild BdrvChild; diff --git a/include/block/block-io.h b/include/block/block-io.h index 72cf45975b..52869ea08e 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -215,8 +215,14 @@ AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c); void bdrv_io_plug(BlockDriverState *bs); void bdrv_io_unplug(BlockDriverState *bs); -bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name, - uint32_t granularity, Error **errp); +bool coroutine_fn bdrv_co_can_store_new_dirty_bitmap(BlockDriverState *bs, + const char *name, + uint32_t granularity, + Error **errp); +bool co_wrapper bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, + const char *name, + uint32_t granularity, + Error **errp); /** * diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h index 6528336c4c..c3700cec76 100644 --- a/include/block/dirty-bitmap.h +++ b/include/block/dirty-bitmap.h @@ -34,8 +34,14 @@ int bdrv_dirty_bitmap_check(const BdrvDirtyBitmap *bitmap, uint32_t flags, Error **errp); void bdrv_release_dirty_bitmap(BdrvDirtyBitmap *bitmap); void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs); -int bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name, - Error **errp); + +int coroutine_fn bdrv_co_remove_persistent_dirty_bitmap(BlockDriverState *bs, + const char *name, + Error **errp); +int co_wrapper bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs, + const char *name, + Error **errp); + void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap); void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap); void bdrv_enable_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap); -- cgit v1.2.3 From da0bd744344adb1f285002467d7fa84699dc2fce Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 7 Dec 2022 14:18:21 +0100 Subject: block: Factor out bdrv_drain_all_begin_nopoll() Provide a separate function that just quiesces the users of a node to prevent new requests from coming in, but without waiting for the already in-flight I/O to complete. This function can be used in contexts where polling is not allowed. Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-2-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf --- include/block/block-global-state.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h index 1f8b54f2df..b0a3cfe6b8 100644 --- a/include/block/block-global-state.h +++ b/include/block/block-global-state.h @@ -152,6 +152,7 @@ int bdrv_inactivate_all(void); int bdrv_flush_all(void); void bdrv_close_all(void); void bdrv_drain_all_begin(void); +void bdrv_drain_all_begin_nopoll(void); void bdrv_drain_all_end(void); void bdrv_drain_all(void); -- cgit v1.2.3 From aead9dc9d1acc5876951885c064ec4153fbd0ed8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 7 Dec 2022 14:18:22 +0100 Subject: graph-lock: Introduce a lock to protect block graph operations Block layer graph operations are always run under BQL in the main loop. This is proved by the assertion qemu_in_main_thread() and its wrapper macro GLOBAL_STATE_CODE. However, there are also concurrent coroutines running in other iothreads that always try to traverse the graph. Currently this is protected (among various other things) by the AioContext lock, but once this is removed, we need to make sure that reads do not happen while modifying the graph. We distinguish between writer (main loop, under BQL) that modifies the graph, and readers (all other coroutines running in various AioContext), that go through the graph edges, reading ->parents and->children. The writer (main loop) has "exclusive" access, so it first waits for any current read to finish, and then prevents incoming ones from entering while it has the exclusive access. The readers (coroutines in multiple AioContext) are free to access the graph as long the writer is not modifying the graph. In case it is, they go in a CoQueue and sleep until the writer is done. If a coroutine changes AioContext, the counter in the original and new AioContext are left intact, since the writer does not care where the reader is, but only if there is one. As a result, some AioContexts might have a negative reader count, to balance the positive count of the AioContext that took the lock. This also means that when an AioContext is deleted it may have a nonzero reader count. In that case we transfer the count to a global shared counter so that the writer is always aware of all readers. Signed-off-by: Paolo Bonzini Signed-off-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-3-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- include/block/aio.h | 9 +++ include/block/block_int.h | 1 + include/block/graph-lock.h | 139 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 149 insertions(+) create mode 100644 include/block/graph-lock.h (limited to 'include') diff --git a/include/block/aio.h b/include/block/aio.h index d128558f1d..0f65a3cc9e 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -22,6 +22,7 @@ #include "qemu/event_notifier.h" #include "qemu/thread.h" #include "qemu/timer.h" +#include "block/graph-lock.h" typedef struct BlockAIOCB BlockAIOCB; typedef void BlockCompletionFunc(void *opaque, int ret); @@ -127,6 +128,14 @@ struct AioContext { /* Used by AioContext users to protect from multi-threaded access. */ QemuRecMutex lock; + /* + * Keep track of readers and writers of the block layer graph. + * This is essential to avoid performing additions and removal + * of nodes and edges from block graph while some + * other thread is traversing it. + */ + BdrvGraphRWlock *bdrv_graph; + /* The list of registered AIO handlers. Protected by ctx->list_lock. */ AioHandlerList aio_handlers; diff --git a/include/block/block_int.h b/include/block/block_int.h index 7d50b6bbd1..b35b0138ed 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -26,6 +26,7 @@ #include "block_int-global-state.h" #include "block_int-io.h" +#include "block/graph-lock.h" /* DO NOT ADD ANYTHING IN HERE. USE ONE OF THE HEADERS INCLUDED ABOVE */ diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h new file mode 100644 index 0000000000..82edb62cfa --- /dev/null +++ b/include/block/graph-lock.h @@ -0,0 +1,139 @@ +/* + * Graph lock: rwlock to protect block layer graph manipulations (add/remove + * edges and nodes) + * + * Copyright (c) 2022 Red Hat + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ +#ifndef GRAPH_LOCK_H +#define GRAPH_LOCK_H + +#include "qemu/osdep.h" + +#include "qemu/coroutine.h" + +/** + * Graph Lock API + * This API provides a rwlock used to protect block layer + * graph modifications like edge (BdrvChild) and node (BlockDriverState) + * addition and removal. + * Currently we have 1 writer only, the Main loop, and many + * readers, mostly coroutines running in other AioContext thus other threads. + * + * We distinguish between writer (main loop, under BQL) that modifies the + * graph, and readers (all other coroutines running in various AioContext), + * that go through the graph edges, reading + * BlockDriverState ->parents and->children. + * + * The writer (main loop) has an "exclusive" access, so it first waits for + * current read to finish, and then prevents incoming ones from + * entering while it has the exclusive access. + * + * The readers (coroutines in multiple AioContext) are free to + * access the graph as long the writer is not modifying the graph. + * In case it is, they go in a CoQueue and sleep until the writer + * is done. + * + * If a coroutine changes AioContext, the counter in the original and new + * AioContext are left intact, since the writer does not care where is the + * reader, but only if there is one. + * As a result, some AioContexts might have a negative reader count, to + * balance the positive count of the AioContext that took the lock. + * This also means that when an AioContext is deleted it may have a nonzero + * reader count. In that case we transfer the count to a global shared counter + * so that the writer is always aware of all readers. + */ +typedef struct BdrvGraphRWlock BdrvGraphRWlock; + +/* + * register_aiocontext: + * Add AioContext @ctx to the list of AioContext. + * This list is used to obtain the total number of readers + * currently running the graph. + */ +void register_aiocontext(AioContext *ctx); + +/* + * unregister_aiocontext: + * Removes AioContext @ctx to the list of AioContext. + */ +void unregister_aiocontext(AioContext *ctx); + +/* + * bdrv_graph_wrlock: + * Start an exclusive write operation to modify the graph. This means we are + * adding or removing an edge or a node in the block layer graph. Nobody else + * is allowed to access the graph. + * + * Must only be called from outside bdrv_graph_co_rdlock. + * + * The wrlock can only be taken from the main loop, with BQL held, as only the + * main loop is allowed to modify the graph. + * + * This function polls. Callers must not hold the lock of any AioContext other + * than the current one. + */ +void bdrv_graph_wrlock(void); + +/* + * bdrv_graph_wrunlock: + * Write finished, reset global has_writer to 0 and restart + * all readers that are waiting. + */ +void bdrv_graph_wrunlock(void); + +/* + * bdrv_graph_co_rdlock: + * Read the bs graph. This usually means traversing all nodes in + * the graph, therefore it can't happen while another thread is + * modifying it. + * Increases the reader counter of the current aiocontext, + * and if has_writer is set, it means that the writer is modifying + * the graph, therefore wait in a coroutine queue. + * The writer will then wake this coroutine once it is done. + * + * This lock should be taken from Iothreads (IO_CODE() class of functions) + * because it signals the writer that there are some + * readers currently running, or waits until the current + * write is finished before continuing. + * Calling this function from the Main Loop with BQL held + * is not necessary, since the Main Loop itself is the only + * writer, thus won't be able to read and write at the same time. + * The only exception to that is when we can't take the lock in the + * function/coroutine itself, and need to delegate the caller (usually main + * loop) to take it and wait that the coroutine ends, so that + * we always signal that a reader is running. + */ +void coroutine_fn bdrv_graph_co_rdlock(void); + +/* + * bdrv_graph_rdunlock: + * Read terminated, decrease the count of readers in the current aiocontext. + * If the writer is waiting for reads to finish (has_writer == 1), signal + * the writer that we are done via aio_wait_kick() to let it continue. + */ +void coroutine_fn bdrv_graph_co_rdunlock(void); + +/* + * bdrv_graph_rd{un}lock_main_loop: + * Just a placeholder to mark where the graph rdlock should be taken + * in the main loop. It is just asserting that we are not + * in a coroutine and in GLOBAL_STATE_CODE. + */ +void bdrv_graph_rdlock_main_loop(void); +void bdrv_graph_rdunlock_main_loop(void); + +#endif /* GRAPH_LOCK_H */ + -- cgit v1.2.3 From 8aa77000c2ba8c234f72b9b4162529d02ea00188 Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Wed, 7 Dec 2022 14:18:23 +0100 Subject: graph-lock: Implement guard macros Similar to the implementation in lockable.h, implement macros to automatically take and release the rdlock. Create the empty GraphLockable and GraphLockableMainloop structs only to use it as a type for G_DEFINE_AUTOPTR_CLEANUP_FUNC. Signed-off-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-4-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf --- include/block/graph-lock.h | 66 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) (limited to 'include') diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h index 82edb62cfa..b27d8a5fb1 100644 --- a/include/block/graph-lock.h +++ b/include/block/graph-lock.h @@ -135,5 +135,71 @@ void coroutine_fn bdrv_graph_co_rdunlock(void); void bdrv_graph_rdlock_main_loop(void); void bdrv_graph_rdunlock_main_loop(void); +typedef struct GraphLockable { } GraphLockable; + +/* + * In C, compound literals have the lifetime of an automatic variable. + * In C++ it would be different, but then C++ wouldn't need QemuLockable + * either... + */ +#define GML_OBJ_() (&(GraphLockable) { }) + +static inline GraphLockable *graph_lockable_auto_lock(GraphLockable *x) +{ + bdrv_graph_co_rdlock(); + return x; +} + +static inline void graph_lockable_auto_unlock(GraphLockable *x) +{ + bdrv_graph_co_rdunlock(); +} + +G_DEFINE_AUTOPTR_CLEANUP_FUNC(GraphLockable, graph_lockable_auto_unlock) + +#define WITH_GRAPH_RDLOCK_GUARD_(var) \ + for (g_autoptr(GraphLockable) var = graph_lockable_auto_lock(GML_OBJ_()); \ + var; \ + graph_lockable_auto_unlock(var), var = NULL) + +#define WITH_GRAPH_RDLOCK_GUARD() \ + WITH_GRAPH_RDLOCK_GUARD_(glue(graph_lockable_auto, __COUNTER__)) + +#define GRAPH_RDLOCK_GUARD(x) \ + g_autoptr(GraphLockable) \ + glue(graph_lockable_auto, __COUNTER__) G_GNUC_UNUSED = \ + graph_lockable_auto_lock(GML_OBJ_()) + + +typedef struct GraphLockableMainloop { } GraphLockableMainloop; + +/* + * In C, compound literals have the lifetime of an automatic variable. + * In C++ it would be different, but then C++ wouldn't need QemuLockable + * either... + */ +#define GMLML_OBJ_() (&(GraphLockableMainloop) { }) + +static inline GraphLockableMainloop * +graph_lockable_auto_lock_mainloop(GraphLockableMainloop *x) +{ + bdrv_graph_rdlock_main_loop(); + return x; +} + +static inline void +graph_lockable_auto_unlock_mainloop(GraphLockableMainloop *x) +{ + bdrv_graph_rdunlock_main_loop(); +} + +G_DEFINE_AUTOPTR_CLEANUP_FUNC(GraphLockableMainloop, + graph_lockable_auto_unlock_mainloop) + +#define GRAPH_RDLOCK_GUARD_MAINLOOP(x) \ + g_autoptr(GraphLockableMainloop) \ + glue(graph_lockable_auto, __COUNTER__) G_GNUC_UNUSED = \ + graph_lockable_auto_lock_mainloop(GMLML_OBJ_()) + #endif /* GRAPH_LOCK_H */ -- cgit v1.2.3 From 702152d1c72225d318bb1626b457f87719cc2f25 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 7 Dec 2022 14:18:25 +0100 Subject: Import clang-tsa.h This defines macros that allow clang to perform Thread Safety Analysis based on function and variable annotations that specify the locking rules. On non-clang compilers, the annotations are ignored. Imported tsa.h from the original repository with the pthread_mutex_t wrapper removed: https://github.com/jhi/clang-thread-safety-analysis-for-c.git Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-6-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf --- include/qemu/clang-tsa.h | 101 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 include/qemu/clang-tsa.h (limited to 'include') diff --git a/include/qemu/clang-tsa.h b/include/qemu/clang-tsa.h new file mode 100644 index 0000000000..0a3361dfc8 --- /dev/null +++ b/include/qemu/clang-tsa.h @@ -0,0 +1,101 @@ +#ifndef CLANG_TSA_H +#define CLANG_TSA_H + +/* + * Copyright 2018 Jarkko Hietaniemi + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without + * limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* http://clang.llvm.org/docs/ThreadSafetyAnalysis.html + * + * TSA is available since clang 3.6-ish. + */ +#ifdef __clang__ +# define TSA(x) __attribute__((x)) +#else +# define TSA(x) /* No TSA, make TSA attributes no-ops. */ +#endif + +/* TSA_CAPABILITY() is used to annotate typedefs: + * + * typedef pthread_mutex_t TSA_CAPABILITY("mutex") tsa_mutex; + */ +#define TSA_CAPABILITY(x) TSA(capability(x)) + +/* TSA_GUARDED_BY() is used to annotate global variables, + * the data is guarded: + * + * Foo foo TSA_GUARDED_BY(mutex); + */ +#define TSA_GUARDED_BY(x) TSA(guarded_by(x)) + +/* TSA_PT_GUARDED_BY() is used to annotate global pointers, the data + * behind the pointer is guarded. + * + * Foo* ptr TSA_PT_GUARDED_BY(mutex); + */ +#define TSA_PT_GUARDED_BY(x) TSA(pt_guarded_by(x)) + +/* The TSA_REQUIRES() is used to annotate functions: the caller of the + * function MUST hold the resource, the function will NOT release it. + * + * More than one mutex may be specified, comma-separated. + * + * void Foo(void) TSA_REQUIRES(mutex); + */ +#define TSA_REQUIRES(...) TSA(requires_capability(__VA_ARGS__)) + +/* TSA_EXCLUDES() is used to annotate functions: the caller of the + * function MUST NOT hold resource, the function first acquires the + * resource, and then releases it. + * + * More than one mutex may be specified, comma-separated. + * + * void Foo(void) TSA_EXCLUDES(mutex); + */ +#define TSA_EXCLUDES(...) TSA(locks_excluded(__VA_ARGS__)) + +/* TSA_ACQUIRE() is used to annotate functions: the caller of the + * function MUST NOT hold the resource, the function will acquire the + * resource, but NOT release it. + * + * More than one mutex may be specified, comma-separated. + * + * void Foo(void) TSA_ACQUIRE(mutex); + */ +#define TSA_ACQUIRE(...) TSA(acquire_capability(__VA_ARGS__)) + +/* TSA_RELEASE() is used to annotate functions: the caller of the + * function MUST hold the resource, but the function will then release it. + * + * More than one mutex may be specified, comma-separated. + * + * void Foo(void) TSA_RELEASE(mutex); + */ +#define TSA_RELEASE(...) TSA(release_capability(__VA_ARGS__)) + +/* TSA_NO_TSA is used to annotate functions. Use only when you need to. + * + * void Foo(void) TSA_NO_TSA; + */ +#define TSA_NO_TSA TSA(no_thread_safety_analysis) + +#endif /* #ifndef CLANG_TSA_H */ -- cgit v1.2.3 From b1cc02e9463a406c6edd7ac55e356cf65a0681d7 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 7 Dec 2022 14:18:26 +0100 Subject: clang-tsa: Add TSA_ASSERT() macro Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-7-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf --- include/qemu/clang-tsa.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/qemu/clang-tsa.h b/include/qemu/clang-tsa.h index 0a3361dfc8..211ee0ae73 100644 --- a/include/qemu/clang-tsa.h +++ b/include/qemu/clang-tsa.h @@ -98,4 +98,13 @@ */ #define TSA_NO_TSA TSA(no_thread_safety_analysis) +/* + * TSA_ASSERT() is used to annotate functions: This function will assert that + * the lock is held. When it returns, the caller of the function is assumed to + * already hold the resource. + * + * More than one mutex may be specified, comma-separated. + */ +#define TSA_ASSERT(...) TSA(assert_capability(__VA_ARGS__)) + #endif /* #ifndef CLANG_TSA_H */ -- cgit v1.2.3 From d9bfb9de00d4ee15908cc2f76333d6657b580dea Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 7 Dec 2022 14:18:27 +0100 Subject: clang-tsa: Add macros for shared locks Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-8-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf --- include/qemu/clang-tsa.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/qemu/clang-tsa.h b/include/qemu/clang-tsa.h index 211ee0ae73..ba06fb8c92 100644 --- a/include/qemu/clang-tsa.h +++ b/include/qemu/clang-tsa.h @@ -62,6 +62,7 @@ * void Foo(void) TSA_REQUIRES(mutex); */ #define TSA_REQUIRES(...) TSA(requires_capability(__VA_ARGS__)) +#define TSA_REQUIRES_SHARED(...) TSA(requires_shared_capability(__VA_ARGS__)) /* TSA_EXCLUDES() is used to annotate functions: the caller of the * function MUST NOT hold resource, the function first acquires the @@ -82,6 +83,7 @@ * void Foo(void) TSA_ACQUIRE(mutex); */ #define TSA_ACQUIRE(...) TSA(acquire_capability(__VA_ARGS__)) +#define TSA_ACQUIRE_SHARED(...) TSA(acquire_shared_capability(__VA_ARGS__)) /* TSA_RELEASE() is used to annotate functions: the caller of the * function MUST hold the resource, but the function will then release it. @@ -91,6 +93,7 @@ * void Foo(void) TSA_RELEASE(mutex); */ #define TSA_RELEASE(...) TSA(release_capability(__VA_ARGS__)) +#define TSA_RELEASE_SHARED(...) TSA(release_shared_capability(__VA_ARGS__)) /* TSA_NO_TSA is used to annotate functions. Use only when you need to. * @@ -106,5 +109,6 @@ * More than one mutex may be specified, comma-separated. */ #define TSA_ASSERT(...) TSA(assert_capability(__VA_ARGS__)) +#define TSA_ASSERT_SHARED(...) TSA(assert_shared_capability(__VA_ARGS__)) #endif /* #ifndef CLANG_TSA_H */ -- cgit v1.2.3 From 3f35f82e04923affb3283b451b6d66880f266a5a Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Wed, 7 Dec 2022 14:18:33 +0100 Subject: block: assert that graph read and writes are performed correctly Remove the old assert_bdrv_graph_writable, and replace it with the new version using graph-lock API. See the function documentation for more information. Signed-off-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-14-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- include/block/block_int-global-state.h | 17 ----------------- include/block/graph-lock.h | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h index b49f4eb35b..2f0993f6e9 100644 --- a/include/block/block_int-global-state.h +++ b/include/block/block_int-global-state.h @@ -310,21 +310,4 @@ void bdrv_remove_aio_context_notifier(BlockDriverState *bs, */ void bdrv_drain_all_end_quiesce(BlockDriverState *bs); -/** - * Make sure that the function is running under both drain and BQL. - * The latter protects from concurrent writings - * from the GS API, while the former prevents concurrent reads - * from I/O. - */ -static inline void assert_bdrv_graph_writable(BlockDriverState *bs) -{ - /* - * TODO: this function is incomplete. Because the users of this - * assert lack the necessary drains, check only for BQL. - * Once the necessary drains are added, - * assert also for qatomic_read(&bs->quiesce_counter) > 0 - */ - assert(qemu_in_main_thread()); -} - #endif /* BLOCK_INT_GLOBAL_STATE_H */ diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h index b27d8a5fb1..85e8a53b73 100644 --- a/include/block/graph-lock.h +++ b/include/block/graph-lock.h @@ -135,6 +135,21 @@ void coroutine_fn bdrv_graph_co_rdunlock(void); void bdrv_graph_rdlock_main_loop(void); void bdrv_graph_rdunlock_main_loop(void); +/* + * assert_bdrv_graph_readable: + * Make sure that the reader is either the main loop, + * or there is at least a reader helding the rdlock. + * In this way an incoming writer is aware of the read and waits. + */ +void assert_bdrv_graph_readable(void); + +/* + * assert_bdrv_graph_writable: + * Make sure that the writer is the main loop and has set @has_writer, + * so that incoming readers will pause. + */ +void assert_bdrv_graph_writable(void); + typedef struct GraphLockable { } GraphLockable; /* -- cgit v1.2.3 From 4002ffdc4fa7a2fd9b5a86772311c646a73775f7 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 7 Dec 2022 14:18:34 +0100 Subject: graph-lock: TSA annotations for lock/unlock functions Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-15-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf --- include/block/graph-lock.h | 80 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 70 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h index 85e8a53b73..33c05b331a 100644 --- a/include/block/graph-lock.h +++ b/include/block/graph-lock.h @@ -21,6 +21,7 @@ #define GRAPH_LOCK_H #include "qemu/osdep.h" +#include "qemu/clang-tsa.h" #include "qemu/coroutine.h" @@ -57,6 +58,35 @@ */ typedef struct BdrvGraphRWlock BdrvGraphRWlock; +/* Dummy lock object to use for Thread Safety Analysis (TSA) */ +typedef struct TSA_CAPABILITY("mutex") BdrvGraphLock { +} BdrvGraphLock; + +extern BdrvGraphLock graph_lock; + +/* + * clang doesn't check consistency in locking annotations between forward + * declarations and the function definition. Having the annotation on the + * definition, but not the declaration in a header file, may give the reader + * a false sense of security because the condition actually remains unchecked + * for callers in other source files. + * + * Therefore, as a convention, for public functions, GRAPH_RDLOCK and + * GRAPH_WRLOCK annotations should be present only in the header file. + */ +#define GRAPH_WRLOCK TSA_REQUIRES(graph_lock) +#define GRAPH_RDLOCK TSA_REQUIRES_SHARED(graph_lock) + +/* + * TSA annotations are not part of function types, so checks are defeated when + * using a function pointer. As a workaround, annotate function pointers with + * this macro that will require that the lock is at least taken while reading + * the pointer. In most cases this is equivalent to actually protecting the + * function call. + */ +#define GRAPH_RDLOCK_PTR TSA_GUARDED_BY(graph_lock) +#define GRAPH_WRLOCK_PTR TSA_GUARDED_BY(graph_lock) + /* * register_aiocontext: * Add AioContext @ctx to the list of AioContext. @@ -85,14 +115,14 @@ void unregister_aiocontext(AioContext *ctx); * This function polls. Callers must not hold the lock of any AioContext other * than the current one. */ -void bdrv_graph_wrlock(void); +void bdrv_graph_wrlock(void) TSA_ACQUIRE(graph_lock) TSA_NO_TSA; /* * bdrv_graph_wrunlock: * Write finished, reset global has_writer to 0 and restart * all readers that are waiting. */ -void bdrv_graph_wrunlock(void); +void bdrv_graph_wrunlock(void) TSA_RELEASE(graph_lock) TSA_NO_TSA; /* * bdrv_graph_co_rdlock: @@ -116,7 +146,8 @@ void bdrv_graph_wrunlock(void); * loop) to take it and wait that the coroutine ends, so that * we always signal that a reader is running. */ -void coroutine_fn bdrv_graph_co_rdlock(void); +void coroutine_fn TSA_ACQUIRE_SHARED(graph_lock) TSA_NO_TSA +bdrv_graph_co_rdlock(void); /* * bdrv_graph_rdunlock: @@ -124,7 +155,8 @@ void coroutine_fn bdrv_graph_co_rdlock(void); * If the writer is waiting for reads to finish (has_writer == 1), signal * the writer that we are done via aio_wait_kick() to let it continue. */ -void coroutine_fn bdrv_graph_co_rdunlock(void); +void coroutine_fn TSA_RELEASE_SHARED(graph_lock) TSA_NO_TSA +bdrv_graph_co_rdunlock(void); /* * bdrv_graph_rd{un}lock_main_loop: @@ -132,8 +164,11 @@ void coroutine_fn bdrv_graph_co_rdunlock(void); * in the main loop. It is just asserting that we are not * in a coroutine and in GLOBAL_STATE_CODE. */ -void bdrv_graph_rdlock_main_loop(void); -void bdrv_graph_rdunlock_main_loop(void); +void TSA_ACQUIRE_SHARED(graph_lock) TSA_NO_TSA +bdrv_graph_rdlock_main_loop(void); + +void TSA_RELEASE_SHARED(graph_lock) TSA_NO_TSA +bdrv_graph_rdunlock_main_loop(void); /* * assert_bdrv_graph_readable: @@ -150,6 +185,17 @@ void assert_bdrv_graph_readable(void); */ void assert_bdrv_graph_writable(void); +/* + * Calling this function tells TSA that we know that the lock is effectively + * taken even though we cannot prove it (yet) with GRAPH_RDLOCK. This can be + * useful in intermediate stages of a conversion to using the GRAPH_RDLOCK + * macro. + */ +static inline void TSA_ASSERT_SHARED(graph_lock) TSA_NO_TSA +assume_graph_lock(void) +{ +} + typedef struct GraphLockable { } GraphLockable; /* @@ -159,13 +205,21 @@ typedef struct GraphLockable { } GraphLockable; */ #define GML_OBJ_() (&(GraphLockable) { }) -static inline GraphLockable *graph_lockable_auto_lock(GraphLockable *x) +/* + * This is not marked as TSA_ACQUIRE() because TSA doesn't understand the + * cleanup attribute and would therefore complain that the graph is never + * unlocked. TSA_ASSERT() makes sure that the following calls know that we + * hold the lock while unlocking is left unchecked. + */ +static inline GraphLockable * TSA_ASSERT(graph_lock) TSA_NO_TSA +graph_lockable_auto_lock(GraphLockable *x) { bdrv_graph_co_rdlock(); return x; } -static inline void graph_lockable_auto_unlock(GraphLockable *x) +static inline void TSA_NO_TSA +graph_lockable_auto_unlock(GraphLockable *x) { bdrv_graph_co_rdunlock(); } @@ -195,14 +249,20 @@ typedef struct GraphLockableMainloop { } GraphLockableMainloop; */ #define GMLML_OBJ_() (&(GraphLockableMainloop) { }) -static inline GraphLockableMainloop * +/* + * This is not marked as TSA_ACQUIRE() because TSA doesn't understand the + * cleanup attribute and would therefore complain that the graph is never + * unlocked. TSA_ASSERT() makes sure that the following calls know that we + * hold the lock while unlocking is left unchecked. + */ +static inline GraphLockableMainloop * TSA_ASSERT(graph_lock) TSA_NO_TSA graph_lockable_auto_lock_mainloop(GraphLockableMainloop *x) { bdrv_graph_rdlock_main_loop(); return x; } -static inline void +static inline void TSA_NO_TSA graph_lockable_auto_unlock_mainloop(GraphLockableMainloop *x) { bdrv_graph_rdunlock_main_loop(); -- cgit v1.2.3 From 303de47b2c1e20a7f326ad11976d6006f5498709 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 7 Dec 2022 14:18:35 +0100 Subject: Mark assert_bdrv_graph_readable/writable() GRAPH_RD/WRLOCK Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-16-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf --- include/block/block_int-common.h | 4 ++-- include/block/graph-lock.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index a6bc6b7fe9..b1f0d88307 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -898,8 +898,8 @@ struct BdrvChildClass { void (*activate)(BdrvChild *child, Error **errp); int (*inactivate)(BdrvChild *child); - void (*attach)(BdrvChild *child); - void (*detach)(BdrvChild *child); + void GRAPH_WRLOCK_PTR (*attach)(BdrvChild *child); + void GRAPH_WRLOCK_PTR (*detach)(BdrvChild *child); /* * Notifies the parent that the filename of its child has changed (e.g. diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h index 33c05b331a..4c92cd8edf 100644 --- a/include/block/graph-lock.h +++ b/include/block/graph-lock.h @@ -176,14 +176,14 @@ bdrv_graph_rdunlock_main_loop(void); * or there is at least a reader helding the rdlock. * In this way an incoming writer is aware of the read and waits. */ -void assert_bdrv_graph_readable(void); +void GRAPH_RDLOCK assert_bdrv_graph_readable(void); /* * assert_bdrv_graph_writable: * Make sure that the writer is the main loop and has set @has_writer, * so that incoming readers will pause. */ -void assert_bdrv_graph_writable(void); +void GRAPH_WRLOCK assert_bdrv_graph_writable(void); /* * Calling this function tells TSA that we know that the lock is effectively -- cgit v1.2.3 From e6d3f7a602a370362bc52b0aed7dfff1a0bf726d Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Wed, 7 Dec 2022 14:18:36 +0100 Subject: block-coroutine-wrapper.py: introduce annotations that take the graph rdlock Add co_wrapper_bdrv_rdlock and co_wrapper_mixed_bdrv_rdlock option to the block-coroutine-wrapper.py script. This "_bdrv_rdlock" option takes and releases the graph rdlock when a coroutine function is created. This means that when used together with "_mixed", the function marked with co_wrapper_mixed_bdrv_rdlock will support both coroutine and non-coroutine case, and in the latter case it will create a coroutine that takes and releases the rdlock. When called from a coroutine, the caller must already hold the graph lock. Example: void co_wrapper_mixed_bdrv_rdlock bdrv_f1(); Becomes static void bdrv_co_enter_f1() { bdrv_graph_co_rdlock(); bdrv_co_function(); bdrv_graph_co_rdunlock(); } void bdrv_f1() { if (qemu_in_coroutine) { assume_graph_lock(); bdrv_co_function(); } else { qemu_co_enter(bdrv_co_enter_f1); ... } } When used alone, the function will not work in coroutine context, and when called in non-coroutine context it will create a new coroutine that takes care of taking and releasing the rdlock automatically. Example: void co_wrapper_bdrv_rdlock bdrv_f1(); Becomes static void bdrv_co_enter_f1() { bdrv_graph_co_rdlock(); bdrv_co_function(); bdrv_graph_co_rdunlock(); } void bdrv_f1() { assert(!qemu_in_coroutine()); qemu_co_enter(bdrv_co_enter_f1); ... } About their usage: - co_wrapper does not take the rdlock, so it can be used also outside the block layer. - co_wrapper_mixed will be used by many blk_* functions, since the coroutine function needs to call blk_wait_while_drained() and the rdlock *must* be taken afterwards, otherwise it's a deadlock. In the future this annotation will go away, and blk_* will use co_wrapper directly. - co_wrapper_bdrv_rdlock will be used by BlockDriver callbacks, ideally by all of them in the future. - co_wrapper_mixed_bdrv_rdlock will be used by the remaining functions that are still called by coroutine and non-coroutine context. In the future this annotation will go away, as we will split such mixed functions. Signed-off-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-17-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- include/block/block-common.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/block/block-common.h b/include/block/block-common.h index 6cf603ab06..4749c46a5e 100644 --- a/include/block/block-common.h +++ b/include/block/block-common.h @@ -40,14 +40,21 @@ * * Usage: read docs/devel/block-coroutine-wrapper.rst * - * There are 2 kind of specifiers: + * There are 4 kind of specifiers: * - co_wrapper functions can be called by only non-coroutine context, because * they always generate a new coroutine. * - co_wrapper_mixed functions can be called by both coroutine and * non-coroutine context. + * - co_wrapper_bdrv_rdlock are co_wrapper functions but automatically take and + * release the graph rdlock when creating a new coroutine + * - co_wrapper_mixed_bdrv_rdlock are co_wrapper_mixed functions but + * automatically take and release the graph rdlock when creating a new + * coroutine. */ #define co_wrapper #define co_wrapper_mixed +#define co_wrapper_bdrv_rdlock +#define co_wrapper_mixed_bdrv_rdlock #include "block/dirty-bitmap.h" #include "block/blockjob.h" -- cgit v1.2.3 From 90830f595062ecf0d2c96049f5e01f3d37a2107f Mon Sep 17 00:00:00 2001 From: Emanuele Giuseppe Esposito Date: Wed, 7 Dec 2022 14:18:37 +0100 Subject: block: use co_wrapper_mixed_bdrv_rdlock in functions taking the rdlock Take the rdlock already, before we add the assertions. All these functions either read the graph recursively, or call BlockDriver callbacks that will eventually need to be protected by the graph rdlock. Do it now to all functions together, because many of these recursively call each other. For example, bdrv_co_truncate calls BlockDriver->bdrv_co_truncate, and some driver callbacks implement their own .bdrv_co_truncate by calling bdrv_flush inside. So if bdrv_flush asserts but bdrv_truncate does not take the rdlock yet, the assertion will always fail. Signed-off-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-18-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- include/block/block-io.h | 53 ++++++++++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/block/block-io.h b/include/block/block-io.h index 52869ea08e..2ed6214909 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -39,19 +39,24 @@ * to catch when they are accidentally called by the wrong API. */ -int co_wrapper_mixed bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, - int64_t bytes, - BdrvRequestFlags flags); +int co_wrapper_mixed_bdrv_rdlock +bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, int64_t bytes, + BdrvRequestFlags flags); + int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags); -int co_wrapper_mixed bdrv_pread(BdrvChild *child, int64_t offset, - int64_t bytes, void *buf, - BdrvRequestFlags flags); -int co_wrapper_mixed bdrv_pwrite(BdrvChild *child, int64_t offset, - int64_t bytes, const void *buf, - BdrvRequestFlags flags); -int co_wrapper_mixed bdrv_pwrite_sync(BdrvChild *child, int64_t offset, - int64_t bytes, const void *buf, - BdrvRequestFlags flags); + +int co_wrapper_mixed_bdrv_rdlock +bdrv_pread(BdrvChild *child, int64_t offset, int64_t bytes, void *buf, + BdrvRequestFlags flags); + +int co_wrapper_mixed_bdrv_rdlock +bdrv_pwrite(BdrvChild *child, int64_t offset,int64_t bytes, + const void *buf, BdrvRequestFlags flags); + +int co_wrapper_mixed_bdrv_rdlock +bdrv_pwrite_sync(BdrvChild *child, int64_t offset, int64_t bytes, + const void *buf, BdrvRequestFlags flags); + int coroutine_fn bdrv_co_pwrite_sync(BdrvChild *child, int64_t offset, int64_t bytes, const void *buf, BdrvRequestFlags flags); @@ -287,22 +292,26 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset, void bdrv_drain(BlockDriverState *bs); -int co_wrapper_mixed +int co_wrapper_mixed_bdrv_rdlock bdrv_truncate(BdrvChild *child, int64_t offset, bool exact, PreallocMode prealloc, BdrvRequestFlags flags, Error **errp); -int co_wrapper_mixed bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, - BdrvCheckMode fix); +int co_wrapper_mixed_bdrv_rdlock +bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix); /* Invalidate any cached metadata used by image formats */ -int co_wrapper_mixed bdrv_invalidate_cache(BlockDriverState *bs, - Error **errp); -int co_wrapper_mixed bdrv_flush(BlockDriverState *bs); -int co_wrapper_mixed bdrv_pdiscard(BdrvChild *child, int64_t offset, - int64_t bytes); -int co_wrapper_mixed +int co_wrapper_mixed_bdrv_rdlock +bdrv_invalidate_cache(BlockDriverState *bs, Error **errp); + +int co_wrapper_mixed_bdrv_rdlock bdrv_flush(BlockDriverState *bs); + +int co_wrapper_mixed_bdrv_rdlock +bdrv_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes); + +int co_wrapper_mixed_bdrv_rdlock bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); -int co_wrapper_mixed + +int co_wrapper_mixed_bdrv_rdlock bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); /** -- cgit v1.2.3 From 1b3ff9feb942c2ad0b01ac931e99ad451ab0ef39 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 7 Dec 2022 14:18:38 +0100 Subject: block: GRAPH_RDLOCK for functions only called by co_wrappers The generated coroutine wrappers already take care to take the lock in the non-coroutine path, and assume that the lock is already taken in the coroutine path. The only thing we need to do for the wrapped function is adding the GRAPH_RDLOCK annotation. Doing so also allows us to mark the corresponding callbacks in BlockDriver as GRAPH_RDLOCK_PTR. Signed-off-by: Kevin Wolf Message-Id: <20221207131838.239125-19-kwolf@redhat.com> Reviewed-by: Emanuele Giuseppe Esposito Signed-off-by: Kevin Wolf --- include/block/block_int-common.h | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index b1f0d88307..c34c525fa6 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -641,8 +641,8 @@ struct BlockDriver { /* * Invalidate any cached meta-data. */ - void coroutine_fn (*bdrv_co_invalidate_cache)(BlockDriverState *bs, - Error **errp); + void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_invalidate_cache)( + BlockDriverState *bs, Error **errp); /* * Flushes all data for all layers by calling bdrv_co_flush for underlying @@ -701,12 +701,11 @@ struct BlockDriver { Error **errp); BlockStatsSpecific *(*bdrv_get_specific_stats)(BlockDriverState *bs); - int coroutine_fn (*bdrv_save_vmstate)(BlockDriverState *bs, - QEMUIOVector *qiov, - int64_t pos); - int coroutine_fn (*bdrv_load_vmstate)(BlockDriverState *bs, - QEMUIOVector *qiov, - int64_t pos); + int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_save_vmstate)( + BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); + + int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_load_vmstate)( + BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); /* removable device specific */ bool (*bdrv_is_inserted)(BlockDriverState *bs); @@ -724,9 +723,8 @@ struct BlockDriver { * Returns 0 for completed check, -errno for internal errors. * The check results are stored in result. */ - int coroutine_fn (*bdrv_co_check)(BlockDriverState *bs, - BdrvCheckResult *result, - BdrvCheckMode fix); + int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_check)( + BlockDriverState *bs, BdrvCheckResult *result, BdrvCheckMode fix); void (*bdrv_debug_event)(BlockDriverState *bs, BlkdebugEvent event); -- cgit v1.2.3