diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2019-06-04 17:22:42 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2019-06-04 17:22:42 +0100 |
commit | 47fbad45d47af8af784bb12a5719489edcd89b4c (patch) | |
tree | ece331a0cbb247b6c9b71bb4fe6ed0808a2050aa | |
parent | e2a58ff493a2e00db3e963c1839c5374500110f2 (diff) | |
parent | 11ba81c3cde0bc070cced6e8ef2835fab4fe90c8 (diff) |
Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches:
- block: AioContext management, part 2
- Avoid recursive block_status call (i.e. lseek() calls) if possible
- linux-aio: Drop unused BlockAIOCB submission method
- nvme: add Get/Set Feature Timestamp support
- Fix crash on commit job start with active I/O on base node
- Fix crash in bdrv_drained_end
- Fix integer overflow in qcow2 discard
# gpg: Signature made Tue 04 Jun 2019 16:20:02 BST
# gpg: using RSA key 7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full]
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74 56FE 7F09 B272 C88F 2FD6
* remotes/kevin/tags/for-upstream: (29 commits)
iotests: Fix duplicated diff output on failure
iotests: test big qcow2 shrink
block/io: bdrv_pdiscard: support int64_t bytes parameter
block/qcow2-refcount: add trace-point to qcow2_process_discards
block: Remove bdrv_set_aio_context()
test-bdrv-drain: Use bdrv_try_set_aio_context()
iotests: Attach new devices to node in non-default iothread
virtio-scsi-test: Test attaching new overlay with iothreads
block: Remove wrong bdrv_set_aio_context() calls
blockdev: Use bdrv_try_set_aio_context() for monitor commands
block: Move node without parents to main AioContext
test-block-iothread: BlockBackend AioContext across root node change
test-block-iothread: Test adding parent to iothread node
block: Adjust AioContexts when attaching nodes
scsi-disk: Use qdev_prop_drive_iothread
block: Add qdev_prop_drive_iothread property type
block: Add BlockBackend.ctx
block: Add Error to blk_set_aio_context()
nbd-server: Call blk_set_allow_aio_context_change()
test-block-iothread: Check filter node in test_propagate_mirror
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
72 files changed, 1144 insertions, 272 deletions
@@ -2235,6 +2235,10 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs) bdrv_get_cumulative_perm(old_bs, &perm, &shared_perm); bdrv_check_perm(old_bs, NULL, perm, shared_perm, NULL, &error_abort); bdrv_set_perm(old_bs, perm, shared_perm); + + /* When the parent requiring a non-default AioContext is removed, the + * node moves back to the main AioContext */ + bdrv_try_set_aio_context(old_bs, qemu_get_aio_context(), NULL); } if (new_bs) { @@ -2249,14 +2253,19 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs) * * On failure NULL is returned, errp is set and the reference to * child_bs is also dropped. + * + * The caller must hold the AioContext lock @child_bs, but not that of @ctx + * (unless @child_bs is already in @ctx). */ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, const char *child_name, const BdrvChildRole *child_role, + AioContext *ctx, uint64_t perm, uint64_t shared_perm, void *opaque, Error **errp) { BdrvChild *child; + Error *local_err = NULL; int ret; ret = bdrv_check_update_perm(child_bs, NULL, perm, shared_perm, NULL, errp); @@ -2276,6 +2285,31 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, .opaque = opaque, }; + /* If the AioContexts don't match, first try to move the subtree of + * child_bs into the AioContext of the new parent. If this doesn't work, + * try moving the parent into the AioContext of child_bs instead. */ + if (bdrv_get_aio_context(child_bs) != ctx) { + ret = bdrv_try_set_aio_context(child_bs, ctx, &local_err); + if (ret < 0 && child_role->can_set_aio_ctx) { + GSList *ignore = g_slist_prepend(NULL, child);; + ctx = bdrv_get_aio_context(child_bs); + if (child_role->can_set_aio_ctx(child, ctx, &ignore, NULL)) { + error_free(local_err); + ret = 0; + g_slist_free(ignore); + ignore = g_slist_prepend(NULL, child);; + child_role->set_aio_ctx(child, ctx, &ignore); + } + g_slist_free(ignore); + } + if (ret < 0) { + error_propagate(errp, local_err); + g_free(child); + bdrv_abort_perm_update(child_bs); + return NULL; + } + } + /* This performs the matching bdrv_set_perm() for the above check. */ bdrv_replace_child(child, child_bs); @@ -2289,6 +2323,9 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, * * On failure NULL is returned, errp is set and the reference to * child_bs is also dropped. + * + * If @parent_bs and @child_bs are in different AioContexts, the caller must + * hold the AioContext lock for @child_bs, but not for @parent_bs. */ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, @@ -2302,11 +2339,11 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm); assert(parent_bs->drv); - assert(bdrv_get_aio_context(parent_bs) == bdrv_get_aio_context(child_bs)); bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL, perm, shared_perm, &perm, &shared_perm); child = bdrv_root_attach_child(child_bs, child_name, child_role, + bdrv_get_aio_context(parent_bs), perm, shared_perm, parent_bs, errp); if (child == NULL) { return NULL; @@ -2517,7 +2554,6 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, ret = -EINVAL; goto free_exit; } - bdrv_set_aio_context(backing_hd, bdrv_get_aio_context(bs)); if (implicit_backing) { bdrv_refresh_filename(backing_hd); @@ -2884,7 +2920,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, /* Not requesting BLK_PERM_CONSISTENT_READ because we're only * looking at the header to guess the image format. This works even * in cases where a guest would not see a consistent state. */ - file = blk_new(0, BLK_PERM_ALL); + file = blk_new(bdrv_get_aio_context(file_bs), 0, BLK_PERM_ALL); blk_insert_bs(file, file_bs, &local_err); bdrv_unref(file_bs); if (local_err) { @@ -4017,13 +4053,13 @@ void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, uint64_t perm = 0, shared = BLK_PERM_ALL; int ret; - assert(!atomic_read(&from->in_flight)); - assert(!atomic_read(&to->in_flight)); - /* Make sure that @from doesn't go away until we have successfully attached * all of its parents to @to. */ bdrv_ref(from); + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); + bdrv_drained_begin(from); + /* Put all parents into @list and calculate their cumulative permissions */ QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { assert(c->bs == from); @@ -4064,6 +4100,7 @@ void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, out: g_slist_free(list); + bdrv_drained_end(from); bdrv_unref(from); } @@ -5752,8 +5789,17 @@ static void bdrv_attach_aio_context(BlockDriverState *bs, bs->walking_aio_notifiers = false; } -/* @ignore will accumulate all visited BdrvChild object. The caller is - * responsible for freeing the list afterwards. */ +/* + * Changes the AioContext used for fd handlers, timers, and BHs by this + * BlockDriverState and all its children and parents. + * + * The caller must own the AioContext lock for the old AioContext of bs, but it + * must not own the AioContext lock for new_context (unless new_context is the + * same as the current context of bs). + * + * @ignore will accumulate all visited BdrvChild object. The caller is + * responsible for freeing the list afterwards. + */ void bdrv_set_aio_context_ignore(BlockDriverState *bs, AioContext *new_context, GSList **ignore) { @@ -5776,10 +5822,9 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs, if (g_slist_find(*ignore, child)) { continue; } - if (child->role->set_aio_ctx) { - *ignore = g_slist_prepend(*ignore, child); - child->role->set_aio_ctx(child, new_context, ignore); - } + assert(child->role->set_aio_ctx); + *ignore = g_slist_prepend(*ignore, child); + child->role->set_aio_ctx(child, new_context, ignore); } bdrv_detach_aio_context(bs); @@ -5793,16 +5838,6 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs, aio_context_release(new_context); } -/* The caller must own the AioContext lock for the old AioContext of bs, but it - * must not own the AioContext lock for new_context (unless new_context is - * the same as the current context of bs). */ -void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) -{ - GSList *ignore_list = NULL; - bdrv_set_aio_context_ignore(bs, new_context, &ignore_list); - g_slist_free(ignore_list); -} - static bool bdrv_parent_can_set_aio_context(BdrvChild *c, AioContext *ctx, GSList **ignore, Error **errp) { diff --git a/block/backup.c b/block/backup.c index 00f4f8af53..715e1d3be8 100644 --- a/block/backup.c +++ b/block/backup.c @@ -627,7 +627,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, } /* The target must match the source in size, so no resize here either */ - job->target = blk_new(BLK_PERM_WRITE, + job->target = blk_new(job->common.job.aio_context, + BLK_PERM_WRITE, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD); ret = blk_insert_bs(job->target, target, errp); diff --git a/block/block-backend.c b/block/block-backend.c index ad3e1c882d..f5d9407d20 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -42,6 +42,7 @@ struct BlockBackend { char *name; int refcnt; BdrvChild *root; + AioContext *ctx; DriveInfo *legacy_dinfo; /* null unless created by drive_new() */ QTAILQ_ENTRY(BlockBackend) link; /* for block_backends */ QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */ @@ -322,12 +323,13 @@ static const BdrvChildRole child_root = { * * Return the new BlockBackend on success, null on failure. */ -BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm) +BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm) { BlockBackend *blk; blk = g_new0(BlockBackend, 1); blk->refcnt = 1; + blk->ctx = ctx; blk->perm = perm; blk->shared_perm = shared_perm; blk_set_enable_write_cache(blk, true); @@ -347,6 +349,7 @@ BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm) /* * Creates a new BlockBackend, opens a new BlockDriverState, and connects both. + * The new BlockBackend is in the main AioContext. * * Just as with bdrv_open(), after having called this function the reference to * @options belongs to the block layer (even on failure). @@ -382,14 +385,14 @@ BlockBackend *blk_new_open(const char *filename, const char *reference, perm |= BLK_PERM_RESIZE; } - blk = blk_new(perm, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), perm, BLK_PERM_ALL); bs = bdrv_open(filename, reference, options, flags, errp); if (!bs) { blk_unref(blk); return NULL; } - blk->root = bdrv_root_attach_child(bs, "root", &child_root, + blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk->ctx, perm, BLK_PERM_ALL, blk, errp); if (!blk->root) { blk_unref(blk); @@ -800,7 +803,7 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) { ThrottleGroupMember *tgm = &blk->public.throttle_group_member; bdrv_ref(bs); - blk->root = bdrv_root_attach_child(bs, "root", &child_root, + blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk->ctx, blk->perm, blk->shared_perm, blk, errp); if (blk->root == NULL) { return -EPERM; @@ -1856,7 +1859,14 @@ void blk_op_unblock_all(BlockBackend *blk, Error *reason) AioContext *blk_get_aio_context(BlockBackend *blk) { - return bdrv_get_aio_context(blk_bs(blk)); + BlockDriverState *bs = blk_bs(blk); + + if (bs) { + AioContext *ctx = bdrv_get_aio_context(blk_bs(blk)); + assert(ctx == blk->ctx); + } + + return blk->ctx; } static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb) @@ -1865,30 +1875,37 @@ static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb) return blk_get_aio_context(blk_acb->blk); } -static void blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context, - bool update_root_node) +static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context, + bool update_root_node, Error **errp) { BlockDriverState *bs = blk_bs(blk); ThrottleGroupMember *tgm = &blk->public.throttle_group_member; + int ret; if (bs) { + if (update_root_node) { + ret = bdrv_child_try_set_aio_context(bs, new_context, blk->root, + errp); + if (ret < 0) { + return ret; + } + } if (tgm->throttle_state) { bdrv_drained_begin(bs); throttle_group_detach_aio_context(tgm); throttle_group_attach_aio_context(tgm, new_context); bdrv_drained_end(bs); } - if (update_root_node) { - GSList *ignore = g_slist_prepend(NULL, blk->root); - bdrv_set_aio_context_ignore(bs, new_context, &ignore); - g_slist_free(ignore); - } } + + blk->ctx = new_context; + return 0; } -void blk_set_aio_context(BlockBackend *blk, AioContext *new_context) +int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, + Error **errp) { - blk_do_set_aio_context(blk, new_context, true); + return blk_do_set_aio_context(blk, new_context, true, errp); } static bool blk_root_can_set_aio_ctx(BdrvChild *child, AioContext *ctx, @@ -1915,7 +1932,7 @@ static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx, GSList **ignore) { BlockBackend *blk = child->opaque; - blk_do_set_aio_context(blk, ctx, false); + blk_do_set_aio_context(blk, ctx, false, &error_abort); } void blk_add_aio_context_notifier(BlockBackend *blk, diff --git a/block/commit.c b/block/commit.c index 14e5bb394c..c815def89a 100644 --- a/block/commit.c +++ b/block/commit.c @@ -301,7 +301,6 @@ void commit_start(const char *job_id, BlockDriverState *bs, commit_top_bs->implicit = true; } commit_top_bs->total_sectors = top->total_sectors; - bdrv_set_aio_context(commit_top_bs, bdrv_get_aio_context(top)); bdrv_append(commit_top_bs, top, &local_err); if (local_err) { @@ -338,7 +337,8 @@ void commit_start(const char *job_id, BlockDriverState *bs, goto fail; } - s->base = blk_new(BLK_PERM_CONSISTENT_READ + s->base = blk_new(s->common.job.aio_context, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_CONSISTENT_READ @@ -351,7 +351,7 @@ void commit_start(const char *job_id, BlockDriverState *bs, s->base_bs = base; /* Required permissions are already taken with block_job_add_bdrv() */ - s->top = blk_new(0, BLK_PERM_ALL); + s->top = blk_new(s->common.job.aio_context, 0, BLK_PERM_ALL); ret = blk_insert_bs(s->top, top, errp); if (ret < 0) { goto fail; @@ -395,6 +395,7 @@ int bdrv_commit(BlockDriverState *bs) BlockDriverState *backing_file_bs = NULL; BlockDriverState *commit_top_bs = NULL; BlockDriver *drv = bs->drv; + AioContext *ctx; int64_t offset, length, backing_length; int ro; int64_t n; @@ -422,8 +423,9 @@ int bdrv_commit(BlockDriverState *bs) } } - src = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); - backing = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); + ctx = bdrv_get_aio_context(bs); + src = blk_new(ctx, BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); + backing = blk_new(ctx, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); ret = blk_insert_bs(src, bs, &local_err); if (ret < 0) { @@ -440,7 +442,6 @@ int bdrv_commit(BlockDriverState *bs) error_report_err(local_err); goto ro_cleanup; } - bdrv_set_aio_context(commit_top_bs, bdrv_get_aio_context(backing_file_bs)); bdrv_set_backing_hd(commit_top_bs, backing_file_bs, &error_abort); bdrv_set_backing_hd(bs, commit_top_bs, &error_abort); diff --git a/block/crypto.c b/block/crypto.c index 3af46b805f..7351fd479d 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -257,7 +257,8 @@ static int block_crypto_co_create_generic(BlockDriverState *bs, QCryptoBlock *crypto = NULL; struct BlockCryptoCreateData data; - blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); + blk = blk_new(bdrv_get_aio_context(bs), + BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); ret = blk_insert_bs(blk, bs, errp); if (ret < 0) { diff --git a/block/io.c b/block/io.c index 3134a60a48..9ba1bada36 100644 --- a/block/io.c +++ b/block/io.c @@ -422,11 +422,12 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, return; } assert(bs->quiesce_counter > 0); - old_quiesce_counter = atomic_fetch_dec(&bs->quiesce_counter); /* Re-enable things in child-to-parent order */ bdrv_drain_invoke(bs, false); bdrv_parent_drained_end(bs, parent, ignore_bds_parents); + + old_quiesce_counter = atomic_fetch_dec(&bs->quiesce_counter); if (old_quiesce_counter == 1) { aio_enable_external(bdrv_get_aio_context(bs)); } @@ -2092,6 +2093,12 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, */ assert(*pnum && QEMU_IS_ALIGNED(*pnum, align) && align > offset - aligned_offset); + if (ret & BDRV_BLOCK_RECURSE) { + assert(ret & BDRV_BLOCK_DATA); + assert(ret & BDRV_BLOCK_OFFSET_VALID); + assert(!(ret & BDRV_BLOCK_ZERO)); + } + *pnum -= offset - aligned_offset; if (*pnum > bytes) { *pnum = bytes; @@ -2122,7 +2129,8 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, } } - if (want_zero && local_file && local_file != bs && + if (want_zero && ret & BDRV_BLOCK_RECURSE && + local_file && local_file != bs && (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) && (ret & BDRV_BLOCK_OFFSET_VALID)) { int64_t file_pnum; @@ -2624,7 +2632,7 @@ int bdrv_flush(BlockDriverState *bs) typedef struct DiscardCo { BdrvChild *child; int64_t offset; - int bytes; + int64_t bytes; int ret; } DiscardCo; static void coroutine_fn bdrv_pdiscard_co_entry(void *opaque) @@ -2635,14 +2643,15 @@ static void coroutine_fn bdrv_pdiscard_co_entry(void *opaque) aio_wait_kick(); } -int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int bytes) +int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, + int64_t bytes) { BdrvTrackedRequest req; int max_pdiscard, ret; int head, tail, align; BlockDriverState *bs = child->bs; - if (!bs || !bs->drv) { + if (!bs || !bs->drv || !bdrv_is_inserted(bs)) { return -ENOMEDIUM; } @@ -2650,9 +2659,8 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int bytes) return -EPERM; } - ret = bdrv_check_byte_request(bs, offset, bytes); - if (ret < 0) { - return ret; + if (offset < 0 || bytes < 0 || bytes > INT64_MAX - offset) { + return -EIO; } /* Do nothing if disabled. */ @@ -2687,7 +2695,7 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int bytes) assert(max_pdiscard >= bs->bl.request_alignment); while (bytes > 0) { - int num = bytes; + int64_t num = bytes; if (head) { /* Make small requests to get to alignment boundaries. */ @@ -2749,7 +2757,7 @@ out: return ret; } -int bdrv_pdiscard(BdrvChild *child, int64_t offset, int bytes) +int bdrv_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes) { Coroutine *co; DiscardCo rwco = { diff --git a/block/linux-aio.c b/block/linux-aio.c index d4b61fb251..27100c2fd1 100644 --- a/block/linux-aio.c +++ b/block/linux-aio.c @@ -30,7 +30,6 @@ #define MAX_EVENTS 128 struct qemu_laiocb { - BlockAIOCB common; Coroutine *co; LinuxAioState *ctx; struct iocb iocb; @@ -72,7 +71,7 @@ static inline ssize_t io_event_ret(struct io_event *ev) } /* - * Completes an AIO request (calls the callback and frees the ACB). + * Completes an AIO request. */ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb) { @@ -94,18 +93,15 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb) } laiocb->ret = ret; - if (laiocb->co) { - /* If the coroutine is already entered it must be in ioq_submit() and - * will notice laio->ret has been filled in when it eventually runs - * later. Coroutines cannot be entered recursively so avoid doing - * that! - */ - if (!qemu_coroutine_entered(laiocb->co)) { - aio_co_wake(laiocb->co); - } - } else { - laiocb->common.cb(laiocb->common.opaque, ret); - qemu_aio_unref(laiocb); + + /* + * If the coroutine is already entered it must be in ioq_submit() and + * will notice laio->ret has been filled in when it eventually runs + * later. Coroutines cannot be entered recursively so avoid doing + * that! + */ + if (!qemu_coroutine_entered(laiocb->co)) { + aio_co_wake(laiocb->co); } } @@ -273,30 +269,6 @@ static bool qemu_laio_poll_cb(void *opaque) return true; } -static void laio_cancel(BlockAIOCB *blockacb) -{ - struct qemu_laiocb *laiocb = (struct qemu_laiocb *)blockacb; - struct io_event event; - int ret; - - if (laiocb->ret != -EINPROGRESS) { - return; - } - ret = io_cancel(laiocb->ctx->ctx, &laiocb->iocb, &event); - laiocb->ret = -ECANCELED; - if (ret != 0) { - /* iocb is not cancelled, cb will be called by the event loop later */ - return; - } - - laiocb->common.cb(laiocb->common.opaque, laiocb->ret); -} - -static const AIOCBInfo laio_aiocb_info = { - .aiocb_size = sizeof(struct qemu_laiocb), - .cancel_async = laio_cancel, -}; - static void ioq_init(LaioQueue *io_q) { QSIMPLEQ_INIT(&io_q->pending); @@ -431,30 +403,6 @@ int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd, return laiocb.ret; } -BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque, int type) -{ - struct qemu_laiocb *laiocb; - off_t offset = sector_num * BDRV_SECTOR_SIZE; - int ret; - - laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque); - laiocb->nbytes = nb_sectors * BDRV_SECTOR_SIZE; - laiocb->ctx = s; - laiocb->ret = -EINPROGRESS; - laiocb->is_read = (type == QEMU_AIO_READ); - laiocb->qiov = qiov; - - ret = laio_do_submit(fd, laiocb, offset, type); - if (ret < 0) { - qemu_aio_unref(laiocb); - return NULL; - } - - return &laiocb->common; -} - void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context) { aio_set_event_notifier(old_context, &s->e, false, NULL, NULL); diff --git a/block/mirror.c b/block/mirror.c index ec4bd9f404..f8bdb5b21b 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1543,7 +1543,6 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, BDRV_REQ_NO_FALLBACK; bs_opaque = g_new0(MirrorBDSOpaque, 1); mirror_top_bs->opaque = bs_opaque; - bdrv_set_aio_context(mirror_top_bs, bdrv_get_aio_context(bs)); /* bdrv_append takes ownership of the mirror_top_bs reference, need to keep * it alive until block_job_create() succeeds even if bs has no parent. */ @@ -1584,7 +1583,8 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, * We can allow anything except resize there.*/ target_is_backing = bdrv_chain_contains(bs, target); target_graph_mod = (backing_mode != MIRROR_LEAVE_BACKING_CHAIN); - s->target = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE | + s->target = blk_new(s->common.job.aio_context, + BLK_PERM_WRITE | BLK_PERM_RESIZE | (target_graph_mod ? BLK_PERM_GRAPH_MOD : 0), BLK_PERM_WRITE_UNCHANGED | (target_is_backing ? BLK_PERM_CONSISTENT_READ | diff --git a/block/parallels.c b/block/parallels.c index 2747400577..00fae125d1 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -554,7 +554,8 @@ static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts, return -EIO; } - blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); + blk = blk_new(bdrv_get_aio_context(bs), + BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); ret = blk_insert_bs(blk, bs, errp); if (ret < 0) { goto out; diff --git a/block/qcow.c b/block/qcow.c index 1bb8fd05e2..6dee5bb792 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -844,7 +844,8 @@ static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts, return -EIO; } - qcow_blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); + qcow_blk = blk_new(bdrv_get_aio_context(bs), + BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); ret = blk_insert_bs(qcow_blk, bs, errp); if (ret < 0) { goto exit; diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 4c1794f9af..22cadd79d5 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -29,6 +29,7 @@ #include "qemu/range.h" #include "qemu/bswap.h" #include "qemu/cutils.h" +#include "trace.h" static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size, uint64_t max); @@ -737,7 +738,11 @@ void qcow2_process_discards(BlockDriverState *bs, int ret) /* Discard is optional, ignore the return value */ if (ret >= 0) { - bdrv_pdiscard(bs->file, d->offset, d->bytes); + int r2 = bdrv_pdiscard(bs->file, d->offset, d->bytes); + if (r2 < 0) { + trace_qcow2_process_discards_failed_region(d->offset, d->bytes, + r2); + } } g_free(d); @@ -3444,3 +3449,35 @@ int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size) "There are no references in the refcount table."); return -EIO; } + +int qcow2_detect_metadata_preallocation(BlockDriverState *bs) +{ + BDRVQcow2State *s = bs->opaque; + int64_t i, end_cluster, cluster_count = 0, threshold; + int64_t file_length, real_allocation, real_clusters; + + file_length = bdrv_getlength(bs->file->bs); + if (file_length < 0) { + return file_length; + } + + real_allocation = bdrv_get_allocated_file_size(bs->file->bs); + if (real_allocation < 0) { + return real_allocation; + } + + real_clusters = real_allocation / s->cluster_size; + threshold = MAX(real_clusters * 10 / 9, real_clusters + 2); + + end_cluster = size_to_clusters(s, file_length); + for (i = 0; i < end_cluster && cluster_count < threshold; i++) { + uint64_t refcount; + int ret = qcow2_get_refcount(bs, i, &refcount); + if (ret < 0) { + return ret; + } + cluster_count += !!refcount; + } + + return cluster_count >= threshold; +} diff --git a/block/qcow2.c b/block/qcow2.c index f2cb131048..9396d490d5 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1895,6 +1895,12 @@ static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs, unsigned int bytes; int status = 0; + if (!s->metadata_preallocation_checked) { + ret = qcow2_detect_metadata_preallocation(bs); + s->metadata_preallocation = (ret == 1); + s->metadata_preallocation_checked = true; + } + bytes = MIN(INT_MAX, count); qemu_co_mutex_lock(&s->lock); ret = qcow2_get_cluster_offset(bs, offset, &bytes, &cluster_offset); @@ -1917,6 +1923,11 @@ static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs, } else if (ret != QCOW2_CLUSTER_UNALLOCATED) { status |= BDRV_BLOCK_DATA; } + if (s->metadata_preallocation && (status & BDRV_BLOCK_DATA) && + (status & BDRV_BLOCK_OFFSET_VALID)) + { + status |= BDRV_BLOCK_RECURSE; + } return status; } @@ -3163,7 +3174,8 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) } /* Create BlockBackend to write to the image */ - blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); + blk = blk_new(bdrv_get_aio_context(bs), + BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); ret = blk_insert_bs(blk, bs, errp); if (ret < 0) { goto out; @@ -4995,7 +5007,8 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, } if (new_size) { - BlockBackend *blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL); + BlockBackend *blk = blk_new(bdrv_get_aio_context(bs), + BLK_PERM_RESIZE, BLK_PERM_ALL); ret = blk_insert_bs(blk, bs, errp); if (ret < 0) { blk_unref(blk); diff --git a/block/qcow2.h b/block/qcow2.h index 567375e56c..fc1b0d3c1e 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -356,6 +356,9 @@ typedef struct BDRVQcow2State { int nb_threads; BdrvChild *data_file; + + bool metadata_preallocation_checked; + bool metadata_preallocation; } BDRVQcow2State; typedef struct Qcow2COWRegion { @@ -655,6 +658,7 @@ int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order, void *cb_opaque, Error **errp); int qcow2_shrink_reftable(BlockDriverState *bs); int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size); +int qcow2_detect_metadata_preallocation(BlockDriverState *bs); /* qcow2-cluster.c functions */ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, diff --git a/block/qed.c b/block/qed.c index dcdcd62b4a..bb4f5c9863 100644 --- a/block/qed.c +++ b/block/qed.c @@ -649,7 +649,8 @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts, return -EIO; } - blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); + blk = blk_new(bdrv_get_aio_context(bs), + BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); ret = blk_insert_bs(blk, bs, errp); if (ret < 0) { goto out; diff --git a/block/sheepdog.c b/block/sheepdog.c index cbdfe9ab6e..f76d6ddbbc 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -1800,7 +1800,8 @@ static int sd_prealloc(BlockDriverState *bs, int64_t old_size, int64_t new_size, void *buf = NULL; int ret; - blk = blk_new(BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE, + blk = blk_new(bdrv_get_aio_context(bs), + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); ret = blk_insert_bs(blk, bs, errp); diff --git a/block/trace-events b/block/trace-events index 1e0653ce6d..eab51497fc 100644 --- a/block/trace-events +++ b/block/trace-events @@ -92,6 +92,9 @@ qcow2_cache_get_done(void *co, int c, int i) "co %p is_l2_cache %d index %d" qcow2_cache_flush(void *co, int c) "co %p is_l2_cache %d" qcow2_cache_entry_flush(void *co, int c, int i) "co %p is_l2_cache %d index %d" +# qcow2-refcount.c +qcow2_process_discards_failed_region(uint64_t offset, uint64_t bytes, int ret) "offset 0x%" PRIx64 " bytes 0x%" PRIx64 " ret %d" + # qed-l2-cache.c qed_alloc_l2_cache_entry(void *l2_cache, void *entry) "l2_cache %p entry %p" qed_unref_l2_cache_entry(void *entry, int ref) "entry %p ref %d" diff --git a/block/vdi.c b/block/vdi.c index d7ef6628e7..b9845a4cbd 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -803,7 +803,8 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options, goto exit; } - blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); + blk = blk_new(bdrv_get_aio_context(bs_file), + BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); ret = blk_insert_bs(blk, bs_file, errp); if (ret < 0) { goto exit; diff --git a/block/vhdx.c b/block/vhdx.c index a143a57657..d6070b6fa8 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -1900,7 +1900,8 @@ static int coroutine_fn vhdx_co_create(BlockdevCreateOptions *opts, return -EIO; } - blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); + blk = blk_new(bdrv_get_aio_context(bs), + BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); ret = blk_insert_bs(blk, bs, errp); if (ret < 0) { goto delete_and_exit; diff --git a/block/vmdk.c b/block/vmdk.c index de8cb859f8..51067c774f 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -2356,7 +2356,8 @@ static BlockBackend *vmdk_co_create_cb(int64_t size, int idx, if (!bs) { return NULL; } - blk = blk_new(BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE, + blk = blk_new(bdrv_get_aio_context(bs), + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); if (blk_insert_bs(blk, bs, errp)) { bdrv_unref(bs); diff --git a/block/vpc.c b/block/vpc.c index 0c279b87c8..d4776ee8a5 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -1011,7 +1011,8 @@ static int coroutine_fn vpc_co_create(BlockdevCreateOptions *opts, return -EIO; } - blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); + blk = blk_new(bdrv_get_aio_context(bs), + BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); ret = blk_insert_bs(blk, bs, errp); if (ret < 0) { goto out; diff --git a/blockdev.c b/blockdev.c index 17c2d801d7..3f44b891eb 100644 --- a/blockdev.c +++ b/blockdev.c @@ -574,7 +574,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, if ((!file || !*file) && !qdict_size(bs_opts)) { BlockBackendRootState *blk_rs; - blk = blk_new(0, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); blk_rs = blk_get_root_state(blk); blk_rs->open_flags = bdrv_flags; blk_rs->read_only = read_only; @@ -1535,6 +1535,7 @@ static void external_snapshot_prepare(BlkActionState *common, DO_UPCAST(ExternalSnapshotState, common, common); TransactionAction *action = common->action; AioContext *aio_context; + int ret; /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar * purpose but a different set of parameters */ @@ -1674,7 +1675,10 @@ static void external_snapshot_prepare(BlkActionState *common, goto out; } - bdrv_set_aio_context(state->new_bs, aio_context); + ret = bdrv_try_set_aio_context(state->new_bs, aio_context, errp); + if (ret < 0) { + goto out; + } /* This removes our old bs and adds the new bs. This is an operation that * can fail, so we need to do it in .prepare; undoing it for abort is @@ -3154,7 +3158,7 @@ void qmp_block_resize(bool has_device, const char *device, goto out; } - blk = blk_new(BLK_PERM_RESIZE, BLK_PERM_ALL); + blk = blk_new(bdrv_get_aio_context(bs), BLK_PERM_RESIZE, BLK_PERM_ALL); ret = blk_insert_bs(blk, bs, errp); if (ret < 0) { goto out; @@ -3440,6 +3444,7 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, int flags, job_flags = JOB_DEFAULT; int64_t size; bool set_backing_hd = false; + int ret; if (!backup->has_speed) { backup->speed = 0; @@ -3541,13 +3546,16 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, goto out; } - bdrv_set_aio_context(target_bs, aio_context); + ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); + if (ret < 0) { + bdrv_unref(target_bs); + goto out; + } if (set_backing_hd) { bdrv_set_backing_hd(target_bs, source, &local_err); if (local_err) { - bdrv_unref(target_bs); - goto out; + goto unref; } } @@ -3555,11 +3563,10 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap); if (!bmap) { error_setg(errp, "Bitmap '%s' could not be found", backup->bitmap); - bdrv_unref(target_bs); - goto out; + goto unref; } if (bdrv_dirty_bitmap_check(bmap, BDRV_BITMAP_DEFAULT, errp)) { - goto out; + goto unref; } } if (!backup->auto_finalize) { @@ -3573,12 +3580,13 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, backup->sync, bmap, backup->compress, backup->on_source_error, backup->on_target_error, job_flags, NULL, NULL, txn, &local_err); - bdrv_unref(target_bs); if (local_err != NULL) { error_propagate(errp, local_err); - goto out; + goto unref; } +unref: + bdrv_unref(target_bs); out: aio_context_release(aio_context); return job; @@ -3614,6 +3622,7 @@ BlockJob *do_blockdev_backup(BlockdevBackup *backup, JobTxn *txn, AioContext *aio_context; BlockJob *job = NULL; int job_flags = JOB_DEFAULT; + int ret; if (!backup->has_speed) { backup->speed = 0; @@ -3650,16 +3659,9 @@ BlockJob *do_blockdev_backup(BlockdevBackup *backup, JobTxn *txn, goto out; } - if (bdrv_get_aio_context(target_bs) != aio_context) { - if (!bdrv_has_blk(target_bs)) { - /* The target BDS is not attached, we can safely move it to another - * AioContext. */ - bdrv_set_aio_context(target_bs, aio_context); - } else { - error_setg(errp, "Target is attached to a different thread from " - "source."); - goto out; - } + ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); + if (ret < 0) { + goto out; } if (backup->has_bitmap) { @@ -3832,6 +3834,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) int flags; int64_t size; const char *format = arg->format; + int ret; bs = qmp_get_root_bs(arg->device, errp); if (!bs) { @@ -3932,7 +3935,11 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) goto out; } - bdrv_set_aio_context(target_bs, aio_context); + ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); + if (ret < 0) { + bdrv_unref(target_bs); + goto out; + } blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, target_bs, arg->has_replaces, arg->replaces, arg->sync, @@ -3976,6 +3983,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, AioContext *aio_context; BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN; Error *local_err = NULL; + int ret; bs = qmp_get_root_bs(device, errp); if (!bs) { @@ -3990,7 +3998,10 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); - bdrv_set_aio_context(target_bs, aio_context); + ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); + if (ret < 0) { + goto out; + } blockdev_mirror_common(has_job_id ? job_id : NULL, bs, target_bs, has_replaces, replaces, sync, backing_mode, @@ -4006,7 +4017,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, has_auto_dismiss, auto_dismiss, &local_err); error_propagate(errp, local_err); - +out: aio_context_release(aio_context); } @@ -4496,7 +4507,7 @@ void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread, old_context = bdrv_get_aio_context(bs); aio_context_acquire(old_context); - bdrv_set_aio_context(bs, new_context); + bdrv_try_set_aio_context(bs, new_context, errp); aio_context_release(old_context); } diff --git a/blockjob.c b/blockjob.c index cc5f18e7cd..931d675c0c 100644 --- a/blockjob.c +++ b/blockjob.c @@ -205,8 +205,14 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, BdrvChild *c; bdrv_ref(bs); - c = bdrv_root_attach_child(bs, name, &child_job, perm, shared_perm, - job, errp); + if (job->job.aio_context != qemu_get_aio_context()) { + aio_context_release(job->job.aio_context); + } + c = bdrv_root_attach_child(bs, name, &child_job, job->job.aio_context, + perm, shared_perm, job, errp); + if (job->job.aio_context != qemu_get_aio_context()) { + aio_context_acquire(job->job.aio_context); + } if (c == NULL) { return -EPERM; } @@ -392,7 +398,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, job_id = bdrv_get_device_name(bs); } - blk = blk_new(perm, shared_perm); + blk = blk_new(bdrv_get_aio_context(bs), perm, shared_perm); ret = blk_insert_bs(blk, bs, errp); if (ret < 0) { blk_unref(blk); diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt index 4f9012d154..aeb997bed5 100644 --- a/docs/devel/multiple-iothreads.txt +++ b/docs/devel/multiple-iothreads.txt @@ -109,7 +109,7 @@ The AioContext originates from the QEMU block layer, even though nowadays AioContext is a generic event loop that can be used by any QEMU subsystem. The block layer has support for AioContext integrated. Each BlockDriverState -is associated with an AioContext using bdrv_set_aio_context() and +is associated with an AioContext using bdrv_try_set_aio_context() and bdrv_get_aio_context(). This allows block layer code to process I/O inside the right AioContext. Other subsystems may wish to follow a similar approach. @@ -134,5 +134,5 @@ Long-running jobs (usually in the form of coroutines) are best scheduled in the BlockDriverState's AioContext to avoid the need to acquire/release around each bdrv_*() call. The functions bdrv_add/remove_aio_context_notifier, or alternatively blk_add/remove_aio_context_notifier if you use BlockBackends, -can be used to get a notification whenever bdrv_set_aio_context() moves a +can be used to get a notification whenever bdrv_try_set_aio_context() moves a BlockDriverState to a different AioContext. @@ -2560,7 +2560,8 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) if (!blk) { BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err); if (bs) { - blk = local_blk = blk_new(0, BLK_PERM_ALL); + blk = local_blk = blk_new(bdrv_get_aio_context(bs), + 0, BLK_PERM_ALL); ret = blk_insert_bs(blk, bs, &err); if (ret < 0) { goto fail; diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index 8c37bd314a..158c78f852 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -173,6 +173,7 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); unsigned i; unsigned nvqs = s->conf->num_queues; + Error *local_err = NULL; int r; if (vblk->dataplane_started || s->starting) { @@ -212,7 +213,11 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) vblk->dataplane_started = true; trace_virtio_blk_data_plane_start(s); - blk_set_aio_context(s->conf->conf.blk, s->ctx); + r = blk_set_aio_context(s->conf->conf.blk, s->ctx, &local_err); + if (r < 0) { + error_report_err(local_err); + goto fail_guest_notifiers; + } /* Kick right away to begin processing requests already in vring */ for (i = 0; i < nvqs; i++) { @@ -281,8 +286,9 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) aio_context_acquire(s->ctx); aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s); - /* Drain and switch bs back to the QEMU main loop */ - blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context()); + /* Drain and try to switch bs back to the QEMU main loop. If other users + * keep the BlockBackend in the iothread, that's ok */ + blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context(), NULL); aio_context_release(s->ctx); diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c index bb8f1186e4..f7ad452bbd 100644 --- a/hw/block/dataplane/xen-block.c +++ b/hw/block/dataplane/xen-block.c @@ -682,7 +682,8 @@ void xen_block_dataplane_stop(XenBlockDataPlane *dataplane) } aio_context_acquire(dataplane->ctx); - blk_set_aio_context(dataplane->blk, qemu_get_aio_context()); + /* Xen doesn't have multiple users for nodes, so this can't fail */ + blk_set_aio_context(dataplane->blk, qemu_get_aio_context(), &error_abort); aio_context_release(dataplane->ctx); xendev = dataplane->xendev; @@ -811,7 +812,8 @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane, } aio_context_acquire(dataplane->ctx); - blk_set_aio_context(dataplane->blk, dataplane->ctx); + /* If other users keep the BlockBackend in the iothread, that's ok */ + blk_set_aio_context(dataplane->blk, dataplane->ctx, NULL); aio_context_release(dataplane->ctx); return; diff --git a/hw/block/fdc.c b/hw/block/fdc.c index 6f19f127a5..37ccedc9f7 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -538,7 +538,7 @@ static void floppy_drive_realize(DeviceState *qdev, Error **errp) if (!dev->conf.blk) { /* Anonymous BlockBackend for an empty drive */ - dev->conf.blk = blk_new(0, BLK_PERM_ALL); + dev->conf.blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); ret = blk_attach_dev(dev->conf.blk, qdev); assert(ret == 0); } diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 63a5b58849..30e50f7a38 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -219,6 +219,30 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, return NVME_INVALID_FIELD | NVME_DNR; } +static uint16_t nvme_dma_write_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, + uint64_t prp1, uint64_t prp2) +{ + QEMUSGList qsg; + QEMUIOVector iov; + uint16_t status = NVME_SUCCESS; + + if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + if (qsg.nsg > 0) { + if (dma_buf_write(ptr, len, &qsg)) { + status = NVME_INVALID_FIELD | NVME_DNR; + } + qemu_sglist_destroy(&qsg); + } else { + if (qemu_iovec_to_buf(&iov, 0, ptr, len) != len) { + status = NVME_INVALID_FIELD | NVME_DNR; + } + qemu_iovec_destroy(&iov); + } + return status; +} + static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, uint64_t prp1, uint64_t prp2) { @@ -678,7 +702,6 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c) return ret; } - static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd) { NvmeIdentify *c = (NvmeIdentify *)cmd; @@ -696,6 +719,57 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd) } } +static inline void nvme_set_timestamp(NvmeCtrl *n, uint64_t ts) +{ + trace_nvme_setfeat_timestamp(ts); + + n->host_timestamp = le64_to_cpu(ts); + n->timestamp_set_qemu_clock_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); +} + +static inline uint64_t nvme_get_timestamp(const NvmeCtrl *n) +{ + uint64_t current_time = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); + uint64_t elapsed_time = current_time - n->timestamp_set_qemu_clock_ms; + + union nvme_timestamp { + struct { + uint64_t timestamp:48; + uint64_t sync:1; + uint64_t origin:3; + uint64_t rsvd1:12; + }; + uint64_t all; + }; + + union nvme_timestamp ts; + ts.all = 0; + + /* + * If the sum of the Timestamp value set by the host and the elapsed + * time exceeds 2^48, the value returned should be reduced modulo 2^48. + */ + ts.timestamp = (n->host_timestamp + elapsed_time) & 0xffffffffffff; + + /* If the host timestamp is non-zero, set the timestamp origin */ + ts.origin = n->host_timestamp ? 0x01 : 0x00; + + trace_nvme_getfeat_timestamp(ts.all); + + return cpu_to_le64(ts.all); +} + +static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd) +{ + uint64_t prp1 = le64_to_cpu(cmd->prp1); + uint64_t prp2 = le64_to_cpu(cmd->prp2); + + uint64_t timestamp = nvme_get_timestamp(n); + + return nvme_dma_read_prp(n, (uint8_t *)×tamp, + sizeof(timestamp), prp1, prp2); +} + static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { uint32_t dw10 = le32_to_cpu(cmd->cdw10); @@ -710,6 +784,9 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) result = cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16)); trace_nvme_getfeat_numq(result); break; + case NVME_TIMESTAMP: + return nvme_get_feature_timestamp(n, cmd); + break; default: trace_nvme_err_invalid_getfeat(dw10); return NVME_INVALID_FIELD | NVME_DNR; @@ -719,6 +796,24 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return NVME_SUCCESS; } +static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd) +{ + uint16_t ret; + uint64_t timestamp; + uint64_t prp1 = le64_to_cpu(cmd->prp1); + uint64_t prp2 = le64_to_cpu(cmd->prp2); + + ret = nvme_dma_write_prp(n, (uint8_t *)×tamp, + sizeof(timestamp), prp1, prp2); + if (ret != NVME_SUCCESS) { + return ret; + } + + nvme_set_timestamp(n, timestamp); + + return NVME_SUCCESS; +} + static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { uint32_t dw10 = le32_to_cpu(cmd->cdw10); @@ -735,6 +830,11 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) req->cqe.result = cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16)); break; + + case NVME_TIMESTAMP: + return nvme_set_feature_timestamp(n, cmd); + break; + default: trace_nvme_err_invalid_setfeat(dw10); return NVME_INVALID_FIELD | NVME_DNR; @@ -907,6 +1007,8 @@ static int nvme_start_ctrl(NvmeCtrl *n) nvme_init_sq(&n->admin_sq, n, n->bar.asq, 0, 0, NVME_AQA_ASQS(n->bar.aqa) + 1); + nvme_set_timestamp(n, 0ULL); + return 0; } @@ -1270,7 +1372,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) id->sqes = (0x6 << 4) | 0x6; id->cqes = (0x4 << 4) | 0x4; id->nn = cpu_to_le32(n->num_namespaces); - id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS); + id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS | NVME_ONCS_TIMESTAMP); id->psd[0].mp = cpu_to_le16(0x9c4); id->psd[0].enlat = cpu_to_le32(0x10); id->psd[0].exlat = cpu_to_le32(0x4); diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 56c9d4b4b1..557194ee19 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -79,6 +79,8 @@ typedef struct NvmeCtrl { uint32_t cmbloc; uint8_t *cmbuf; uint64_t irq_status; + uint64_t host_timestamp; /* Timestamp sent by the host */ + uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ char *serial; NvmeNamespace *namespaces; diff --git a/hw/block/trace-events b/hw/block/trace-events index b92039a573..97a17838ed 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -46,6 +46,8 @@ nvme_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16"" nvme_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s" nvme_getfeat_numq(int result) "get feature number of queues, result=%d" nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d" +nvme_setfeat_timestamp(uint64_t ts) "set feature timestamp = 0x%"PRIx64"" +nvme_getfeat_timestamp(uint64_t ts) "get feature timestamp = 0x%"PRIx64"" nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64"" nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64"" nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64"" diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c index ef635be4c2..31b0f5ccc8 100644 --- a/hw/block/xen-block.c +++ b/hw/block/xen-block.c @@ -609,7 +609,7 @@ static void xen_cdrom_realize(XenBlockDevice *blockdev, Error **errp) int rc; /* Set up an empty drive */ - conf->blk = blk_new(0, BLK_PERM_ALL); + conf->blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); rc = blk_attach_dev(conf->blk, DEVICE(blockdev)); if (!rc) { diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index b45a7ef54b..ba412dd2ca 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -69,8 +69,8 @@ static void set_pointer(Object *obj, Visitor *v, Property *prop, /* --- drive --- */ -static void parse_drive(DeviceState *dev, const char *str, void **ptr, - const char *propname, Error **errp) +static void do_parse_drive(DeviceState *dev, const char *str, void **ptr, + const char *propname, bool iothread, Error **errp) { BlockBackend *blk; bool blk_created = false; @@ -80,7 +80,16 @@ static void parse_drive(DeviceState *dev, const char *str, void **ptr, if (!blk) { BlockDriverState *bs = bdrv_lookup_bs(NULL, str, NULL); if (bs) { - blk = blk_new(0, BLK_PERM_ALL); + /* + * If the device supports iothreads, it will make sure to move the + * block node to the right AioContext if necessary (or fail if this + * isn't possible because of other users). Devices that are not + * aware of iothreads require their BlockBackends to be in the main + * AioContext. + */ + AioContext *ctx = iothread ? bdrv_get_aio_context(bs) : + qemu_get_aio_context(); + blk = blk_new(ctx, 0, BLK_PERM_ALL); blk_created = true; ret = blk_insert_bs(blk, bs, errp); @@ -118,6 +127,18 @@ fail: } } +static void parse_drive(DeviceState *dev, const char *str, void **ptr, + const char *propname, Error **errp) +{ + do_parse_drive(dev, str, ptr, propname, false, errp); +} + +static void parse_drive_iothread(DeviceState *dev, const char *str, void **ptr, + const char *propname, Error **errp) +{ + do_parse_drive(dev, str, ptr, propname, true, errp); +} + static void release_drive(Object *obj, const char *name, void *opaque) { DeviceState *dev = DEVICE(obj); @@ -160,6 +181,12 @@ static void set_drive(Object *obj, Visitor *v, const char *name, void *opaque, set_pointer(obj, v, opaque, parse_drive, name, errp); } +static void set_drive_iothread(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + set_pointer(obj, v, opaque, parse_drive_iothread, name, errp); +} + const PropertyInfo qdev_prop_drive = { .name = "str", .description = "Node name or ID of a block device to use as a backend", @@ -168,6 +195,14 @@ const PropertyInfo qdev_prop_drive = { .release = release_drive, }; +const PropertyInfo qdev_prop_drive_iothread = { + .name = "str", + .description = "Node name or ID of a block device to use as a backend", + .get = get_drive, + .set = set_drive_iothread, + .release = release_drive, +}; + /* --- character device --- */ static void get_chr(Object *obj, Visitor *v, const char *name, void *opaque, diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c index 573b022e1e..360cd20bd8 100644 --- a/hw/ide/qdev.c +++ b/hw/ide/qdev.c @@ -168,7 +168,7 @@ static void ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind, Error **errp) return; } else { /* Anonymous BlockBackend for an empty drive */ - dev->conf.blk = blk_new(0, BLK_PERM_ALL); + dev->conf.blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); ret = blk_attach_dev(dev->conf.blk, &dev->qdev); assert(ret == 0); } diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index e7e865ab3b..7b89ac798b 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -2336,6 +2336,13 @@ static void scsi_realize(SCSIDevice *dev, Error **errp) return; } + if (blk_get_aio_context(s->qdev.conf.blk) != qemu_get_aio_context() && + !s->qdev.hba_supports_iothread) + { + error_setg(errp, "HBA does not support iothreads"); + return; + } + if (dev->type == TYPE_DISK) { if (!blkconf_geometry(&dev->conf, NULL, 65535, 255, 255, errp)) { return; @@ -2417,7 +2424,7 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp) if (!dev->conf.blk) { /* Anonymous BlockBackend for an empty drive. As we put it into * dev->conf, qdev takes care of detaching on unplug. */ - dev->conf.blk = blk_new(0, BLK_PERM_ALL); + dev->conf.blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); ret = blk_attach_dev(dev->conf.blk, &dev->qdev); assert(ret == 0); } @@ -2929,13 +2936,14 @@ static const TypeInfo scsi_disk_base_info = { .abstract = true, }; -#define DEFINE_SCSI_DISK_PROPERTIES() \ - DEFINE_BLOCK_PROPERTIES(SCSIDiskState, qdev.conf), \ - DEFINE_BLOCK_ERROR_PROPERTIES(SCSIDiskState, qdev.conf), \ - DEFINE_PROP_STRING("ver", SCSIDiskState, version), \ - DEFINE_PROP_STRING("serial", SCSIDiskState, serial), \ - DEFINE_PROP_STRING("vendor", SCSIDiskState, vendor), \ - DEFINE_PROP_STRING("product", SCSIDiskState, product), \ +#define DEFINE_SCSI_DISK_PROPERTIES() \ + DEFINE_PROP_DRIVE_IOTHREAD("drive", SCSIDiskState, qdev.conf.blk), \ + DEFINE_BLOCK_PROPERTIES_BASE(SCSIDiskState, qdev.conf), \ + DEFINE_BLOCK_ERROR_PROPERTIES(SCSIDiskState, qdev.conf), \ + DEFINE_PROP_STRING("ver", SCSIDiskState, version), \ + DEFINE_PROP_STRING("serial", SCSIDiskState, serial), \ + DEFINE_PROP_STRING("vendor", SCSIDiskState, vendor), \ + DEFINE_PROP_STRING("product", SCSIDiskState, product), \ DEFINE_PROP_STRING("device_id", SCSIDiskState, device_id) diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 839f120256..2994f0738f 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -789,28 +789,31 @@ static void virtio_scsi_change(SCSIBus *bus, SCSIDevice *dev, SCSISense sense) } } +static void virtio_scsi_pre_hotplug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + SCSIDevice *sd = SCSI_DEVICE(dev); + sd->hba_supports_iothread = true; +} + static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { VirtIODevice *vdev = VIRTIO_DEVICE(hotplug_dev); VirtIOSCSI *s = VIRTIO_SCSI(vdev); SCSIDevice *sd = SCSI_DEVICE(dev); + int ret; if (s->ctx && !s->dataplane_fenced) { - AioContext *ctx; if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { return; } - ctx = blk_get_aio_context(sd->conf.blk); - if (ctx != s->ctx && ctx != qemu_get_aio_context()) { - error_setg(errp, "Cannot attach a blockdev that is using " - "a different iothread"); - return; - } virtio_scsi_acquire(s); - blk_set_aio_context(sd->conf.blk, s->ctx); + ret = blk_set_aio_context(sd->conf.blk, s->ctx, errp); virtio_scsi_release(s); - + if (ret < 0) { + return; + } } if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) { @@ -839,7 +842,8 @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev, if (s->ctx) { virtio_scsi_acquire(s); - blk_set_aio_context(sd->conf.blk, qemu_get_aio_context()); + /* If other users keep the BlockBackend in the iothread, that's ok */ + blk_set_aio_context(sd->conf.blk, qemu_get_aio_context(), NULL); virtio_scsi_release(s); } @@ -986,6 +990,7 @@ static void virtio_scsi_class_init(ObjectClass *klass, void *data) vdc->reset = virtio_scsi_reset; vdc->start_ioeventfd = virtio_scsi_dataplane_start; vdc->stop_ioeventfd = virtio_scsi_dataplane_stop; + hc->pre_plug = virtio_scsi_pre_hotplug; hc->plug = virtio_scsi_hotplug; hc->unplug = virtio_scsi_hotunplug; } diff --git a/include/block/block.h b/include/block/block.h index 9b083e2bca..f9415ed740 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -156,10 +156,15 @@ typedef struct HDGeometry { * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this * layer, set by block layer * - * Internal flag: + * Internal flags: * BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request * that the block layer recompute the answer from the returned * BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID. + * BDRV_BLOCK_RECURSE: request that the block layer will recursively search for + * zeroes in file child of current block node inside + * returned region. Only valid together with both + * BDRV_BLOCK_DATA and BDRV_BLOCK_OFFSET_VALID. Should not + * appear with BDRV_BLOCK_ZERO. * * If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the * host offset within the returned BDS that is allocated for the @@ -184,6 +189,7 @@ typedef struct HDGeometry { #define BDRV_BLOCK_RAW 0x08 #define BDRV_BLOCK_ALLOCATED 0x10 #define BDRV_BLOCK_EOF 0x20 +#define BDRV_BLOCK_RECURSE 0x40 #define BDRV_BLOCK_OFFSET_MASK BDRV_SECTOR_MASK typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue; @@ -428,8 +434,8 @@ void bdrv_drain_all(void); AIO_WAIT_WHILE(bdrv_get_aio_context(bs_), \ cond); }) -int bdrv_pdiscard(BdrvChild *child, int64_t offset, int bytes); -int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int bytes); +int bdrv_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes); +int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes); int bdrv_has_zero_init_1(BlockDriverState *bs); int bdrv_has_zero_init(BlockDriverState *bs); bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs); @@ -577,15 +583,6 @@ AioContext *bdrv_get_aio_context(BlockDriverState *bs); */ void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co); -/** - * bdrv_set_aio_context: - * - * Changes the #AioContext used for fd handlers, timers, and BHs by this - * BlockDriverState and all its children. - * - * This function must be called with iothread lock held. - */ -void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context); void bdrv_set_aio_context_ignore(BlockDriverState *bs, AioContext *new_context, GSList **ignore); int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx, diff --git a/include/block/block_int.h b/include/block/block_int.h index 1eebc7c8f3..06df2bda1b 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -1163,6 +1163,7 @@ void hmp_drive_add_node(Monitor *mon, const char *optstr); BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, const char *child_name, const BdrvChildRole *child_role, + AioContext *ctx, uint64_t perm, uint64_t shared_perm, void *opaque, Error **errp); void bdrv_root_unref_child(BdrvChild *child); diff --git a/include/block/nvme.h b/include/block/nvme.h index 849a6f3fa3..3ec8efcc43 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -581,6 +581,7 @@ enum NvmeIdCtrlOncs { NVME_ONCS_WRITE_ZEROS = 1 << 3, NVME_ONCS_FEATURES = 1 << 4, NVME_ONCS_RESRVATIONS = 1 << 5, + NVME_ONCS_TIMESTAMP = 1 << 6, }; #define NVME_CTRL_SQES_MIN(sqes) ((sqes) & 0xf) @@ -622,6 +623,7 @@ enum NvmeFeatureIds { NVME_INTERRUPT_VECTOR_CONF = 0x9, NVME_WRITE_ATOMICITY = 0xa, NVME_ASYNCHRONOUS_EVENT_CONF = 0xb, + NVME_TIMESTAMP = 0xe, NVME_SOFTWARE_PROGRESS_MARKER = 0x80 }; diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h index ba223dd1f1..0cb7cc74a2 100644 --- a/include/block/raw-aio.h +++ b/include/block/raw-aio.h @@ -50,9 +50,6 @@ LinuxAioState *laio_init(Error **errp); void laio_cleanup(LinuxAioState *s); int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd, uint64_t offset, QEMUIOVector *qiov, int type); -BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque, int type); void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context); void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context); void laio_io_plug(BlockDriverState *bs, LinuxAioState *s); diff --git a/include/hw/block/block.h b/include/hw/block/block.h index d06f25aa0f..607539057a 100644 --- a/include/hw/block/block.h +++ b/include/hw/block/block.h @@ -45,8 +45,7 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf) return exp; } -#define DEFINE_BLOCK_PROPERTIES(_state, _conf) \ - DEFINE_PROP_DRIVE("drive", _state, _conf.blk), \ +#define DEFINE_BLOCK_PROPERTIES_BASE(_state, _conf) \ DEFINE_PROP_BLOCKSIZE("logical_block_size", _state, \ _conf.logical_block_size), \ DEFINE_PROP_BLOCKSIZE("physical_block_size", _state, \ @@ -59,6 +58,10 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf) ON_OFF_AUTO_AUTO), \ DEFINE_PROP_BOOL("share-rw", _state, _conf.share_rw, false) +#define DEFINE_BLOCK_PROPERTIES(_state, _conf) \ + DEFINE_PROP_DRIVE("drive", _state, _conf.blk), \ + DEFINE_BLOCK_PROPERTIES_BASE(_state, _conf) + #define DEFINE_BLOCK_CHS_PROPERTIES(_state, _conf) \ DEFINE_PROP_UINT32("cyls", _state, _conf.cyls, 0), \ DEFINE_PROP_UINT32("heads", _state, _conf.heads, 0), \ diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h index b6758c852e..1eae5ab056 100644 --- a/include/hw/qdev-properties.h +++ b/include/hw/qdev-properties.h @@ -28,6 +28,7 @@ extern const PropertyInfo qdev_prop_blockdev_on_error; extern const PropertyInfo qdev_prop_bios_chs_trans; extern const PropertyInfo qdev_prop_fdc_drive_type; extern const PropertyInfo qdev_prop_drive; +extern const PropertyInfo qdev_prop_drive_iothread; extern const PropertyInfo qdev_prop_netdev; extern const PropertyInfo qdev_prop_pci_devfn; extern const PropertyInfo qdev_prop_blocksize; @@ -198,6 +199,8 @@ extern const PropertyInfo qdev_prop_pcie_link_width; DEFINE_PROP(_n, _s, _f, qdev_prop_netdev, NICPeers) #define DEFINE_PROP_DRIVE(_n, _s, _f) \ DEFINE_PROP(_n, _s, _f, qdev_prop_drive, BlockBackend *) +#define DEFINE_PROP_DRIVE_IOTHREAD(_n, _s, _f) \ + DEFINE_PROP(_n, _s, _f, qdev_prop_drive_iothread, BlockBackend *) #define DEFINE_PROP_MACADDR(_n, _s, _f) \ DEFINE_PROP(_n, _s, _f, qdev_prop_macaddr, MACAddr) #define DEFINE_PROP_ON_OFF_AUTO(_n, _s, _f, _d) \ diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h index acef25faa4..426566a5c6 100644 --- a/include/hw/scsi/scsi.h +++ b/include/hw/scsi/scsi.h @@ -88,6 +88,7 @@ struct SCSIDevice int scsi_version; int default_scsi_version; bool needs_vpd_bl_emulation; + bool hba_supports_iothread; }; extern const VMStateDescription vmstate_scsi_device; diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 938de34fe9..733c4957eb 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -76,7 +76,7 @@ typedef struct BlockBackendPublic { ThrottleGroupMember throttle_group_member; } BlockBackendPublic; -BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm); +BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm); BlockBackend *blk_new_open(const char *filename, const char *reference, QDict *options, int flags, Error **errp); int blk_get_refcnt(BlockBackend *blk); @@ -208,7 +208,8 @@ void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason); void blk_op_block_all(BlockBackend *blk, Error *reason); void blk_op_unblock_all(BlockBackend *blk, Error *reason); AioContext *blk_get_aio_context(BlockBackend *blk); -void blk_set_aio_context(BlockBackend *blk, AioContext *new_context); +int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, + Error **errp); void blk_add_aio_context_notifier(BlockBackend *blk, void (*attached_aio_context)(AioContext *new_context, void *opaque), void (*detach_aio_context)(void *opaque), void *opaque); diff --git a/migration/block.c b/migration/block.c index 83c633fb3f..91f98ef44a 100644 --- a/migration/block.c +++ b/migration/block.c @@ -417,7 +417,8 @@ static int init_blk_migration(QEMUFile *f) } bmds = g_new0(BlkMigDevState, 1); - bmds->blk = blk_new(BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); + bmds->blk = blk_new(qemu_get_aio_context(), + BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); bmds->blk_name = g_strdup(bdrv_get_device_name(bs)); bmds->bulk_completed = 0; bmds->total_sectors = sectors; diff --git a/nbd/server.c b/nbd/server.c index e21bd501dc..aeca3893fe 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -1484,13 +1484,15 @@ NBDExport *nbd_export_new(BlockDriverState *bs, uint64_t dev_offset, if ((nbdflags & NBD_FLAG_READ_ONLY) == 0) { perm |= BLK_PERM_WRITE; } - blk = blk_new(perm, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | - BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD); + blk = blk_new(bdrv_get_aio_context(bs), perm, + BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | + BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD); ret = blk_insert_bs(blk, bs, errp); if (ret < 0) { goto fail; } blk_set_enable_write_cache(blk, !writethrough); + blk_set_allow_aio_context_change(blk, true); exp->refcount = 1; QTAILQ_INIT(&exp->clients); diff --git a/qemu-img.c b/qemu-img.c index b0535919b1..07b6e2a808 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -3313,7 +3313,8 @@ static int img_rebase(int argc, char **argv) BlockDriverState *base_bs = backing_bs(bs); if (base_bs) { - blk_old_backing = blk_new(BLK_PERM_CONSISTENT_READ, + blk_old_backing = blk_new(qemu_get_aio_context(), + BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); ret = blk_insert_bs(blk_old_backing, base_bs, &local_err); @@ -3360,7 +3361,8 @@ static int img_rebase(int argc, char **argv) prefix_chain_bs = bdrv_find_backing_image(bs, out_real_path); if (prefix_chain_bs) { g_free(out_real_path); - blk_new_backing = blk_new(BLK_PERM_CONSISTENT_READ, + blk_new_backing = blk_new(qemu_get_aio_context(), + BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); ret = blk_insert_bs(blk_new_backing, prefix_chain_bs, &local_err); diff --git a/tests/libqtest.c b/tests/libqtest.c index 8ac0c02af4..546a875913 100644 --- a/tests/libqtest.c +++ b/tests/libqtest.c @@ -1038,6 +1038,25 @@ QDict *qmp(const char *fmt, ...) return response; } +void qmp_assert_success(const char *fmt, ...) +{ + va_list ap; + QDict *response; + + va_start(ap, fmt); + response = qtest_vqmp(global_qtest, fmt, ap); + va_end(ap); + + g_assert(response); + if (!qdict_haskey(response, "return")) { + QString *s = qobject_to_json_pretty(QOBJECT(response)); + g_test_message("%s", qstring_get_str(s)); + qobject_unref(s); + } + g_assert(qdict_haskey(response, "return")); + qobject_unref(response); +} + char *hmp(const char *fmt, ...) { va_list ap; diff --git a/tests/libqtest.h b/tests/libqtest.h index a98ea15b7d..32d927755d 100644 --- a/tests/libqtest.h +++ b/tests/libqtest.h @@ -619,6 +619,17 @@ static inline void qtest_end(void) QDict *qmp(const char *fmt, ...) GCC_FMT_ATTR(1, 2); /** + * qmp_assert_success: + * @fmt...: QMP message to send to qemu, formatted like + * qobject_from_jsonf_nofail(). See parse_escape() for what's + * supported after '%'. + * + * Sends a QMP message to QEMU and asserts that a 'return' key is present in + * the response. + */ +void qmp_assert_success(const char *fmt, ...) GCC_FMT_ATTR(1, 2); + +/* * qmp_eventwait: * @s: #event event to wait for. * diff --git a/tests/perf/block/qcow2/convert-blockstatus b/tests/perf/block/qcow2/convert-blockstatus new file mode 100755 index 0000000000..a1a3c1ef43 --- /dev/null +++ b/tests/perf/block/qcow2/convert-blockstatus @@ -0,0 +1,71 @@ +#!/bin/bash +# +# Test lseek influence on qcow2 block-status +# +# Block layer may recursively check block_status in file child of qcow2, if +# qcow2 driver returned DATA. There are several test cases to check influence +# of lseek on block_status performance. To see real difference run on tmpfs. +# +# Copyright (c) 2019 Virtuozzo International GmbH. All rights reserved. +# +# Tests originally written by Kevin Wolf +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +if [ "$#" -lt 1 ]; then + echo "Usage: $0 SOURCE_FILE" + exit 1 +fi + +ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../../../.." >/dev/null 2>&1 && pwd )" +QEMU_IMG="$ROOT_DIR/qemu-img" +QEMU_IO="$ROOT_DIR/qemu-io" + +size=1G +src="$1" + +# test-case plain + +( +$QEMU_IMG create -f qcow2 "$src" $size +for i in $(seq 16384 -1 0); do + echo "write $((i * 65536)) 64k" +done | $QEMU_IO "$src" +) > /dev/null + +echo -n "plain: " +/usr/bin/time -f %e $QEMU_IMG convert -n "$src" null-co:// + +# test-case forward + +( +$QEMU_IMG create -f qcow2 "$src" $size +for i in $(seq 0 2 16384); do + echo "write $((i * 65536)) 64k" +done | $QEMU_IO "$src" +for i in $(seq 1 2 16384); do + echo "write $((i * 65536)) 64k" +done | $QEMU_IO "$src" +) > /dev/null + +echo -n "forward: " +/usr/bin/time -f %e $QEMU_IMG convert -n "$src" null-co:// + +# test-case prealloc + +$QEMU_IMG create -f qcow2 -o preallocation=metadata "$src" $size > /dev/null + +echo -n "prealloc: " +/usr/bin/time -f %e $QEMU_IMG convert -n "$src" null-co:// diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 index a3deb1fcad..200660f977 100755 --- a/tests/qemu-iotests/051 +++ b/tests/qemu-iotests/051 @@ -192,6 +192,30 @@ case "$QEMU_DEFAULT_MACHINE" in esac echo +echo === Attach to node in non-default iothread === +echo + +case "$QEMU_DEFAULT_MACHINE" in + pc) + iothread="-drive file=$TEST_IMG,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on" + + # Can't add a device in the main thread while virtio-scsi0 uses the node + run_qemu $iothread -device ide-hd,drive=disk,share-rw=on + run_qemu $iothread -device virtio-blk-pci,drive=disk,share-rw=on + run_qemu $iothread -device lsi53c895a,id=lsi0 -device scsi-hd,bus=lsi0.0,drive=disk,share-rw=on + run_qemu $iothread -device virtio-scsi,id=virtio-scsi1 -device scsi-hd,bus=virtio-scsi1.0,drive=disk,share-rw=on + + # virtio-blk enables the iothread only when the driver initialises the + # device, so a second virtio-blk device can't be added even with the + # same iothread. virtio-scsi allows this. + run_qemu $iothread -device virtio-blk-pci,drive=disk,iohtread=iothread0,share-rw=on + run_qemu $iothread -device virtio-scsi,id=virtio-scsi1,iothread=thread0 -device scsi-hd,bus=virtio-scsi1.0,drive=disk,share-rw=on + ;; + *) + ;; +esac + +echo echo === Read-only === echo diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out index 9f1cf22608..8993835b94 100644 --- a/tests/qemu-iotests/051.out +++ b/tests/qemu-iotests/051.out @@ -137,6 +137,9 @@ QEMU X.Y.Z monitor - type 'help' for more information (qemu) QEMU_PROG: -drive if=virtio: Device needs media, but drive is empty +=== Attach to node in non-default iothread === + + === Read-only === Testing: -drive file=TEST_DIR/t.qcow2,if=virtio,readonly=on diff --git a/tests/qemu-iotests/051.pc.out b/tests/qemu-iotests/051.pc.out index c4743cc31c..2d811c166c 100644 --- a/tests/qemu-iotests/051.pc.out +++ b/tests/qemu-iotests/051.pc.out @@ -173,6 +173,33 @@ QEMU X.Y.Z monitor - type 'help' for more information (qemu) QEMU_PROG: -device scsi-hd,drive=disk: Device needs media, but drive is empty +=== Attach to node in non-default iothread === + +Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device ide-hd,drive=disk,share-rw=on +QEMU X.Y.Z monitor - type 'help' for more information +(qemu) QEMU_PROG: -device ide-hd,drive=disk,share-rw=on: Cannot change iothread of active block backend + +Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device virtio-blk-pci,drive=disk,share-rw=on +QEMU X.Y.Z monitor - type 'help' for more information +(qemu) QEMU_PROG: -device virtio-blk-pci,drive=disk,share-rw=on: Cannot change iothread of active block backend + +Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device lsi53c895a,id=lsi0 -device scsi-hd,bus=lsi0.0,drive=disk,share-rw=on +QEMU X.Y.Z monitor - type 'help' for more information +(qemu) QEMU_PROG: -device scsi-hd,bus=lsi0.0,drive=disk,share-rw=on: HBA does not support iothreads + +Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device virtio-scsi,id=virtio-scsi1 -device scsi-hd,bus=virtio-scsi1.0,drive=disk,share-rw=on +QEMU X.Y.Z monitor - type 'help' for more information +(qemu) QEMU_PROG: -device scsi-hd,bus=virtio-scsi1.0,drive=disk,share-rw=on: Cannot change iothread of active block backend + +Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device virtio-blk-pci,drive=disk,iohtread=iothread0,share-rw=on +QEMU X.Y.Z monitor - type 'help' for more information +(qemu) QEMU_PROG: -device virtio-blk-pci,drive=disk,iohtread=iothread0,share-rw=on: Cannot change iothread of active block backend + +Testing: -drive file=TEST_DIR/t.qcow2,if=none,node-name=disk -object iothread,id=thread0 -device virtio-scsi,iothread=thread0,id=virtio-scsi0 -device scsi-hd,bus=virtio-scsi0.0,drive=disk,share-rw=on -device virtio-scsi,id=virtio-scsi1,iothread=thread0 -device scsi-hd,bus=virtio-scsi1.0,drive=disk,share-rw=on +QEMU X.Y.Z monitor - type 'help' for more information +(qemu) quit + + === Read-only === Testing: -drive file=TEST_DIR/t.qcow2,if=floppy,readonly=on diff --git a/tests/qemu-iotests/102 b/tests/qemu-iotests/102 index 749ff66b8a..b898df436f 100755 --- a/tests/qemu-iotests/102 +++ b/tests/qemu-iotests/102 @@ -55,7 +55,7 @@ $QEMU_IO -c 'write 0 64k' "$TEST_IMG" | _filter_qemu_io $QEMU_IMG resize -f raw --shrink "$TEST_IMG" $((5 * 64 * 1024)) $QEMU_IO -c map "$TEST_IMG" -$QEMU_IMG map "$TEST_IMG" +$QEMU_IMG map "$TEST_IMG" | _filter_qemu_img_map echo echo '=== Testing map on an image file truncated outside of qemu ===' diff --git a/tests/qemu-iotests/102.out b/tests/qemu-iotests/102.out index 4401b08fee..cd2fdc7f96 100644 --- a/tests/qemu-iotests/102.out +++ b/tests/qemu-iotests/102.out @@ -7,7 +7,8 @@ wrote 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) Image resized. 64 KiB (0x10000) bytes allocated at offset 0 bytes (0x0) -Offset Length Mapped to File +Offset Length File +0 0x10000 TEST_DIR/t.IMGFMT === Testing map on an image file truncated outside of qemu === diff --git a/tests/qemu-iotests/141.out b/tests/qemu-iotests/141.out index 41c7291258..4d71d9dcae 100644 --- a/tests/qemu-iotests/141.out +++ b/tests/qemu-iotests/141.out @@ -42,9 +42,9 @@ Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t. {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} +{"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job0"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}} -{"return": {}} {"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: commit"}} {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job0"}} diff --git a/tests/qemu-iotests/144.out b/tests/qemu-iotests/144.out index 55299201e4..a9a8216bea 100644 --- a/tests/qemu-iotests/144.out +++ b/tests/qemu-iotests/144.out @@ -14,10 +14,10 @@ Formatting 'TEST_DIR/tmp.qcow2', fmt=qcow2 size=536870912 backing_file=TEST_DIR/ {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "virtio0"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "virtio0"}} +{"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "virtio0"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "virtio0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}} {"return": {}} -{"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "virtio0"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "virtio0"}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "virtio0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}} diff --git a/tests/qemu-iotests/240 b/tests/qemu-iotests/240 index b4cf95096d..5be6b9c0f7 100755 --- a/tests/qemu-iotests/240 +++ b/tests/qemu-iotests/240 @@ -27,6 +27,12 @@ echo "QA output created by $seq" status=1 # failure is the default! +_cleanup() +{ + rm -f "$TEST_DIR/nbd" +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + # get standard environment, filters and checks . ./common.rc . ./common.filter @@ -122,6 +128,21 @@ run_qemu <<EOF { "execute": "quit"} EOF +echo +echo === Attach a SCSI disks using the same block device as a NBD server === +echo + +run_qemu <<EOF +{ "execute": "qmp_capabilities" } +{ "execute": "blockdev-add", "arguments": {"driver": "null-co", "node-name": "hd0", "read-only": true}} +{ "execute": "nbd-server-start", "arguments": {"addr":{"type":"unix","data":{"path":"$TEST_DIR/nbd"}}}} +{ "execute": "nbd-server-add", "arguments": {"device":"hd0"}} +{ "execute": "object-add", "arguments": {"qom-type": "iothread", "id": "iothread0"}} +{ "execute": "device_add", "arguments": {"id": "scsi0", "driver": "${virtio_scsi}", "iothread": "iothread0"}} +{ "execute": "device_add", "arguments": {"id": "scsi-hd0", "driver": "scsi-hd", "drive": "hd0", "bus": "scsi0.0"}} +{ "execute": "quit"} +EOF + # success, all done echo "*** done" rm -f $seq.full diff --git a/tests/qemu-iotests/240.out b/tests/qemu-iotests/240.out index d76392966c..d00df50297 100644 --- a/tests/qemu-iotests/240.out +++ b/tests/qemu-iotests/240.out @@ -43,7 +43,20 @@ QMP_VERSION {"return": {}} {"return": {}} {"return": {}} -{"error": {"class": "GenericError", "desc": "Cannot attach a blockdev that is using a different iothread"}} +{"error": {"class": "GenericError", "desc": "Cannot change iothread of active block backend"}} +{"return": {}} +{"return": {}} +{"return": {}} +{"return": {}} +{"return": {}} +{"return": {}} +{"return": {}} + +=== Attach a SCSI disks using the same block device as a NBD server === + +Testing: +QMP_VERSION +{"return": {}} {"return": {}} {"return": {}} {"return": {}} diff --git a/tests/qemu-iotests/250 b/tests/qemu-iotests/250 new file mode 100755 index 0000000000..c9c0a84a5a --- /dev/null +++ b/tests/qemu-iotests/250 @@ -0,0 +1,78 @@ +#!/usr/bin/env bash +# +# Test big discard in qcow2 shrink +# +# Copyright (c) 2019 Virtuozzo International GmbH. All rights reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=vsementsov@virtuozzo.com + +seq=`basename $0` +echo "QA output created by $seq" + +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt qcow2 +_supported_proto file +_supported_os Linux + +# This test checks that qcow2_process_discards does not truncate a discard +# request > 2G. +# To reproduce bug we need to overflow int by one sequential discard, so we +# need size > 2G, bigger cluster size (as with default 64k we may have maximum +# of 512M sequential data, corresponding to one L1 entry), and we need some +# data of the beginning of the disk mapped to the end of file to prevent +# bdrv_co_truncate(bs->file) call in qcow2_co_truncate(), which might succeed +# anyway. + +disk_usage() +{ + du --block-size=1 $1 | awk '{print $1}' +} + +size=2100M +IMGOPTS="cluster_size=1M,preallocation=metadata" + +_make_test_img $size +$QEMU_IO -c 'discard 0 10M' -c 'discard 2090M 10M' \ + -c 'write 2090M 10M' -c 'write 0 10M' "$TEST_IMG" | _filter_qemu_io + +# Check that our trick with swapping first and last 10M chunks succeeded. +# Otherwise test may pass even if bdrv_pdiscard() fails in +# qcow2_process_discards() +$QEMU_IMG map "$TEST_IMG" | _filter_testdir + +before=$(disk_usage "$TEST_IMG") +$QEMU_IMG resize --shrink "$TEST_IMG" 5M +after=$(disk_usage "$TEST_IMG") + +echo "Disk usage delta: $((before - after))" + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/250.out b/tests/qemu-iotests/250.out new file mode 100644 index 0000000000..f480fd273b --- /dev/null +++ b/tests/qemu-iotests/250.out @@ -0,0 +1,16 @@ +QA output created by 250 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202009600 preallocation=metadata +discard 10485760/10485760 bytes at offset 0 +10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +discard 10485760/10485760 bytes at offset 2191523840 +10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 10485760/10485760 bytes at offset 2191523840 +10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 10485760/10485760 bytes at offset 0 +10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Offset Length Mapped to File +0 0xa00000 0x82f00000 TEST_DIR/t.qcow2 +0x82a00000 0xa00000 0x500000 TEST_DIR/t.qcow2 +Image resized. +Disk usage delta: 15728640 +*** done diff --git a/tests/qemu-iotests/255 b/tests/qemu-iotests/255 new file mode 100755 index 0000000000..49433ec122 --- /dev/null +++ b/tests/qemu-iotests/255 @@ -0,0 +1,135 @@ +#!/usr/bin/env python +# +# Test commit job graph modifications while requests are active +# +# Copyright (C) 2019 Red Hat, Inc. +# +# Creator/Owner: Kevin Wolf <kwolf@redhat.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +import iotests +from iotests import imgfmt + +iotests.verify_image_format(supported_fmts=['qcow2']) + +def blockdev_create(vm, options): + result = vm.qmp_log('blockdev-create', + filters=[iotests.filter_qmp_testfiles], + job_id='job0', options=options) + + if 'return' in result: + assert result['return'] == {} + vm.run_job('job0') + iotests.log("") + +iotests.log('Finishing a commit job with background reads') +iotests.log('============================================') +iotests.log('') + +with iotests.FilePath('t.qcow2') as disk_path, \ + iotests.FilePath('t.qcow2.mid') as mid_path, \ + iotests.FilePath('t.qcow2.base') as base_path, \ + iotests.VM() as vm: + + iotests.log("=== Create backing chain and start VM ===") + iotests.log("") + + size = 128 * 1024 * 1024 + size_str = str(size) + + iotests.create_image(base_path, size) + iotests.qemu_img_log('create', '-f', iotests.imgfmt, mid_path, size_str) + iotests.qemu_img_log('create', '-f', iotests.imgfmt, disk_path, size_str) + + # Create a backing chain like this: + # base <- [throttled: bps-read=4096] <- mid <- overlay + + vm.add_object('throttle-group,x-bps-read=4096,id=throttle0') + vm.add_blockdev('file,filename=%s,node-name=base' % (base_path)) + vm.add_blockdev('throttle,throttle-group=throttle0,file=base,node-name=throttled') + vm.add_blockdev('file,filename=%s,node-name=mid-file' % (mid_path)) + vm.add_blockdev('qcow2,file=mid-file,node-name=mid,backing=throttled') + vm.add_drive_raw('if=none,id=overlay,driver=qcow2,file=%s,backing=mid' % (disk_path)) + + vm.launch() + + iotests.log("=== Start background read requests ===") + iotests.log("") + + def start_requests(): + vm.hmp_qemu_io('overlay', 'aio_read 0 4k') + vm.hmp_qemu_io('overlay', 'aio_read 0 4k') + + start_requests() + + iotests.log("=== Run a commit job ===") + iotests.log("") + + result = vm.qmp_log('block-commit', job_id='job0', auto_finalize=False, + device='overlay', top_node='mid') + + vm.run_job('job0', auto_finalize=False, pre_finalize=start_requests, + auto_dismiss=True) + + vm.shutdown() + +iotests.log('') +iotests.log('Closing the VM while a job is being cancelled') +iotests.log('=============================================') +iotests.log('') + +with iotests.FilePath('src.qcow2') as src_path, \ + iotests.FilePath('dst.qcow2') as dst_path, \ + iotests.VM() as vm: + + iotests.log('=== Create images and start VM ===') + iotests.log('') + + size = 128 * 1024 * 1024 + size_str = str(size) + + iotests.qemu_img_log('create', '-f', iotests.imgfmt, src_path, size_str) + iotests.qemu_img_log('create', '-f', iotests.imgfmt, dst_path, size_str) + + iotests.log(iotests.qemu_io('-f', iotests.imgfmt, '-c', 'write 0 1M', + src_path), + filters=[iotests.filter_test_dir, iotests.filter_qemu_io]) + + vm.add_object('throttle-group,x-bps-read=4096,id=throttle0') + + vm.add_blockdev('file,node-name=src-file,filename=%s' % (src_path)) + vm.add_blockdev('%s,node-name=src,file=src-file' % (iotests.imgfmt)) + + vm.add_blockdev('file,node-name=dst-file,filename=%s' % (dst_path)) + vm.add_blockdev('%s,node-name=dst,file=dst-file' % (iotests.imgfmt)) + + vm.add_blockdev('throttle,node-name=src-throttled,' + + 'throttle-group=throttle0,file=src') + + vm.add_device('virtio-blk,drive=src-throttled') + + vm.launch() + + iotests.log('=== Start a mirror job ===') + iotests.log('') + + vm.qmp_log('blockdev-mirror', job_id='job0', device='src-throttled', + target='dst', sync='full') + + vm.qmp_log('block-job-cancel', device='job0') + vm.qmp_log('quit') + + vm.shutdown() diff --git a/tests/qemu-iotests/255.out b/tests/qemu-iotests/255.out new file mode 100644 index 0000000000..348909fdef --- /dev/null +++ b/tests/qemu-iotests/255.out @@ -0,0 +1,40 @@ +Finishing a commit job with background reads +============================================ + +=== Create backing chain and start VM === + +Formatting 'TEST_DIR/PID-t.qcow2.mid', fmt=qcow2 size=134217728 cluster_size=65536 lazy_refcounts=off refcount_bits=16 + +Formatting 'TEST_DIR/PID-t.qcow2', fmt=qcow2 size=134217728 cluster_size=65536 lazy_refcounts=off refcount_bits=16 + +=== Start background read requests === + +=== Run a commit job === + +{"execute": "block-commit", "arguments": {"auto-finalize": false, "device": "overlay", "job-id": "job0", "top-node": "mid"}} +{"return": {}} +{"execute": "job-finalize", "arguments": {"id": "job0"}} +{"return": {}} +{"data": {"id": "job0", "type": "commit"}, "event": "BLOCK_JOB_PENDING", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +{"data": {"device": "job0", "len": 134217728, "offset": 134217728, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + +Closing the VM while a job is being cancelled +============================================= + +=== Create images and start VM === + +Formatting 'TEST_DIR/PID-src.qcow2', fmt=qcow2 size=134217728 cluster_size=65536 lazy_refcounts=off refcount_bits=16 + +Formatting 'TEST_DIR/PID-dst.qcow2', fmt=qcow2 size=134217728 cluster_size=65536 lazy_refcounts=off refcount_bits=16 + +wrote 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +=== Start a mirror job === + +{"execute": "blockdev-mirror", "arguments": {"device": "src-throttled", "job-id": "job0", "sync": "full", "target": "dst"}} +{"return": {}} +{"execute": "block-job-cancel", "arguments": {"device": "job0"}} +{"return": {}} +{"execute": "quit", "arguments": {}} +{"return": {}} diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check index 95162c6cf9..44ebf24080 100755 --- a/tests/qemu-iotests/check +++ b/tests/qemu-iotests/check @@ -876,7 +876,6 @@ do fi else mv $tmp.out $seq.out.bad - $diff -w "$reference" "$PWD"/$seq.out.bad status="fail" results="output mismatch (see $seq.out.bad)" printdiff=true diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 859c4b5e9f..f3b6d601b2 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -262,6 +262,8 @@ 247 rw quick 248 rw quick 249 rw auto quick +250 rw auto quick 252 rw auto backing quick 253 rw auto quick 254 rw auto backing quick +255 rw auto quick diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index 7bde380d96..6bcddd8870 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -126,6 +126,11 @@ def qemu_img_pipe(*args): sys.stderr.write('qemu-img received signal %i: %s\n' % (-exitcode, ' '.join(qemu_img_args + list(args)))) return subp.communicate()[0] +def qemu_img_log(*args): + result = qemu_img_pipe(*args) + log(result, filters=[filter_testfiles]) + return result + def img_info_log(filename, filter_path=None, imgopts=False, extra_args=[]): args = [ 'info' ] if imgopts: @@ -533,7 +538,8 @@ class VM(qtest.QEMUQtestMachine): return result # Returns None on success, and an error string on failure - def run_job(self, job, auto_finalize=True, auto_dismiss=False): + def run_job(self, job, auto_finalize=True, auto_dismiss=False, + pre_finalize=None): error = None while True: for ev in self.get_qmp_events_filtered(wait=True): @@ -546,6 +552,8 @@ class VM(qtest.QEMUQtestMachine): error = j['error'] log('Job failed: %s' % (j['error'])) elif status == 'pending' and not auto_finalize: + if pre_finalize: + pre_finalize() self.qmp_log('job-finalize', id=job) elif status == 'concluded' and not auto_dismiss: self.qmp_log('job-dismiss', id=job) diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c index 5534c2adf9..12e2ecf517 100644 --- a/tests/test-bdrv-drain.c +++ b/tests/test-bdrv-drain.c @@ -206,7 +206,7 @@ static void test_drv_cb_common(enum drain_type drain_type, bool recursive) QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0); - blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, &error_abort); s = bs->opaque; @@ -290,7 +290,7 @@ static void test_quiesce_common(enum drain_type drain_type, bool recursive) BlockBackend *blk; BlockDriverState *bs, *backing; - blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, &error_abort); blk_insert_bs(blk, bs, &error_abort); @@ -353,7 +353,7 @@ static void test_nested(void) BDRVTestState *s, *backing_s; enum drain_type outer, inner; - blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, &error_abort); s = bs->opaque; @@ -402,13 +402,13 @@ static void test_multiparent(void) BlockDriverState *bs_a, *bs_b, *backing; BDRVTestState *a_s, *b_s, *backing_s; - blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, &error_abort); a_s = bs_a->opaque; blk_insert_bs(blk_a, bs_a, &error_abort); - blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, &error_abort); b_s = bs_b->opaque; @@ -475,13 +475,13 @@ static void test_graph_change_drain_subtree(void) BlockDriverState *bs_a, *bs_b, *backing; BDRVTestState *a_s, *b_s, *backing_s; - blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, &error_abort); a_s = bs_a->opaque; blk_insert_bs(blk_a, bs_a, &error_abort); - blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, &error_abort); b_s = bs_b->opaque; @@ -555,7 +555,7 @@ static void test_graph_change_drain_all(void) BDRVTestState *a_s, *b_s; /* Create node A with a BlockBackend */ - blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, &error_abort); a_s = bs_a->opaque; @@ -571,7 +571,7 @@ static void test_graph_change_drain_all(void) g_assert_cmpint(a_s->drain_count, ==, 1); /* Create node B with a BlockBackend */ - blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, &error_abort); b_s = bs_b->opaque; @@ -672,13 +672,13 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) goto out; } - blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, &error_abort); s = bs->opaque; blk_insert_bs(blk, bs, &error_abort); - blk_set_aio_context(blk, ctx_a); + blk_set_aio_context(blk, ctx_a, &error_abort); aio_context_acquire(ctx_a); s->bh_indirection_ctx = ctx_b; @@ -742,7 +742,7 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) } aio_context_acquire(ctx_a); - blk_set_aio_context(blk, qemu_get_aio_context()); + blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); aio_context_release(ctx_a); bdrv_unref(bs); @@ -883,7 +883,7 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, bdrv_set_backing_hd(src, src_backing, &error_abort); bdrv_unref(src_backing); - blk_src = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_src = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); blk_insert_bs(blk_src, src_overlay, &error_abort); switch (drain_node) { @@ -903,15 +903,16 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, if (use_iothread) { iothread = iothread_new(); ctx = iothread_get_aio_context(iothread); - blk_set_aio_context(blk_src, ctx); + blk_set_aio_context(blk_src, ctx, &error_abort); } else { ctx = qemu_get_aio_context(); } target = bdrv_new_open_driver(&bdrv_test, "target", BDRV_O_RDWR, &error_abort); - blk_target = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_target = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); blk_insert_bs(blk_target, target, &error_abort); + blk_set_allow_aio_context_change(blk_target, true); aio_context_acquire(ctx); tjob = block_job_create("job0", &test_job_driver, NULL, src, @@ -972,7 +973,7 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, g_assert_false(job->job.paused); g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */ - do_drain_begin(drain_type, target); + do_drain_begin_unlocked(drain_type, target); if (drain_type == BDRV_DRAIN_ALL) { /* bdrv_drain_all() drains both src and target */ @@ -983,7 +984,7 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, g_assert_true(job->job.paused); g_assert_false(job->job.busy); /* The job is paused */ - do_drain_end(drain_type, target); + do_drain_end_unlocked(drain_type, target); if (use_iothread) { /* paused is reset in the I/O thread, wait for it */ @@ -1001,7 +1002,8 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, g_assert_cmpint(ret, ==, (result == TEST_JOB_SUCCESS ? 0 : -EIO)); if (use_iothread) { - blk_set_aio_context(blk_src, qemu_get_aio_context()); + blk_set_aio_context(blk_src, qemu_get_aio_context(), &error_abort); + assert(blk_get_aio_context(blk_target) == qemu_get_aio_context()); } aio_context_release(ctx); @@ -1205,7 +1207,7 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete, &error_abort); bdrv_attach_child(bs, null_bs, "null-child", &child_file, &error_abort); - blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); blk_insert_bs(blk, bs, &error_abort); /* Referenced by blk now */ @@ -1368,7 +1370,7 @@ static void test_detach_indirect(bool by_parent_cb) c = bdrv_new_open_driver(&bdrv_test, "c", BDRV_O_RDWR, &error_abort); /* blk is a BB for parent-a */ - blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); blk_insert_bs(blk, parent_a, &error_abort); bdrv_unref(parent_a); @@ -1460,7 +1462,7 @@ static void test_append_to_drained(void) BlockDriverState *base, *overlay; BDRVTestState *base_s, *overlay_s; - blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); base = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort); base_s = base->opaque; blk_insert_bs(blk, base, &error_abort); @@ -1507,16 +1509,16 @@ static void test_set_aio_context(void) &error_abort); bdrv_drained_begin(bs); - bdrv_set_aio_context(bs, ctx_a); + bdrv_try_set_aio_context(bs, ctx_a, &error_abort); aio_context_acquire(ctx_a); bdrv_drained_end(bs); bdrv_drained_begin(bs); - bdrv_set_aio_context(bs, ctx_b); + bdrv_try_set_aio_context(bs, ctx_b, &error_abort); aio_context_release(ctx_a); aio_context_acquire(ctx_b); - bdrv_set_aio_context(bs, qemu_get_aio_context()); + bdrv_try_set_aio_context(bs, qemu_get_aio_context(), &error_abort); aio_context_release(ctx_b); bdrv_drained_end(bs); diff --git a/tests/test-bdrv-graph-mod.c b/tests/test-bdrv-graph-mod.c index 747c0bf8fc..cfeec36566 100644 --- a/tests/test-bdrv-graph-mod.c +++ b/tests/test-bdrv-graph-mod.c @@ -102,7 +102,8 @@ static void test_update_perm_tree(void) { Error *local_err = NULL; - BlockBackend *root = blk_new(BLK_PERM_WRITE | BLK_PERM_CONSISTENT_READ, + BlockBackend *root = blk_new(qemu_get_aio_context(), + BLK_PERM_WRITE | BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL & ~BLK_PERM_WRITE); BlockDriverState *bs = no_perm_node("node"); BlockDriverState *filter = pass_through_node("filter"); @@ -165,7 +166,7 @@ static void test_update_perm_tree(void) */ static void test_should_update_child(void) { - BlockBackend *root = blk_new(0, BLK_PERM_ALL); + BlockBackend *root = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); BlockDriverState *bs = no_perm_node("node"); BlockDriverState *filter = no_perm_node("filter"); BlockDriverState *target = no_perm_node("target"); diff --git a/tests/test-block-backend.c b/tests/test-block-backend.c index fd59f02bd0..5b5d6845c0 100644 --- a/tests/test-block-backend.c +++ b/tests/test-block-backend.c @@ -37,7 +37,8 @@ static void test_drain_aio_error_flush_cb(void *opaque, int ret) static void test_drain_aio_error(void) { - BlockBackend *blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + BlockBackend *blk = blk_new(qemu_get_aio_context(), + BLK_PERM_ALL, BLK_PERM_ALL); BlockAIOCB *acb; bool completed = false; @@ -53,7 +54,8 @@ static void test_drain_aio_error(void) static void test_drain_all_aio_error(void) { - BlockBackend *blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + BlockBackend *blk = blk_new(qemu_get_aio_context(), + BLK_PERM_ALL, BLK_PERM_ALL); BlockAIOCB *acb; bool completed = false; diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c index 59f692892e..79d9cf8a57 100644 --- a/tests/test-block-iothread.c +++ b/tests/test-block-iothread.c @@ -336,20 +336,20 @@ static void test_sync_op(const void *opaque) BlockDriverState *bs; BdrvChild *c; - blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort); bs->total_sectors = 65536 / BDRV_SECTOR_SIZE; blk_insert_bs(blk, bs, &error_abort); c = QLIST_FIRST(&bs->parents); - blk_set_aio_context(blk, ctx); + blk_set_aio_context(blk, ctx, &error_abort); aio_context_acquire(ctx); t->fn(c); if (t->blkfn) { t->blkfn(blk); } aio_context_release(ctx); - blk_set_aio_context(blk, qemu_get_aio_context()); + blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); bdrv_unref(bs); blk_unref(blk); @@ -415,7 +415,7 @@ static void test_attach_blockjob(void) BlockDriverState *bs; TestBlockJob *tjob; - blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort); blk_insert_bs(blk, bs, &error_abort); @@ -428,7 +428,7 @@ static void test_attach_blockjob(void) aio_poll(qemu_get_aio_context(), false); } - blk_set_aio_context(blk, ctx); + blk_set_aio_context(blk, ctx, &error_abort); tjob->n = 0; while (tjob->n == 0) { @@ -436,7 +436,7 @@ static void test_attach_blockjob(void) } aio_context_acquire(ctx); - blk_set_aio_context(blk, qemu_get_aio_context()); + blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); aio_context_release(ctx); tjob->n = 0; @@ -444,7 +444,7 @@ static void test_attach_blockjob(void) aio_poll(qemu_get_aio_context(), false); } - blk_set_aio_context(blk, ctx); + blk_set_aio_context(blk, ctx, &error_abort); tjob->n = 0; while (tjob->n == 0) { @@ -453,7 +453,7 @@ static void test_attach_blockjob(void) aio_context_acquire(ctx); job_complete_sync(&tjob->common.job, &error_abort); - blk_set_aio_context(blk, qemu_get_aio_context()); + blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); aio_context_release(ctx); bdrv_unref(bs); @@ -481,7 +481,7 @@ static void test_propagate_basic(void) QDict *options; /* Create bs_a and its BlockBackend */ - blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs_a = bdrv_new_open_driver(&bdrv_test, "bs_a", BDRV_O_RDWR, &error_abort); blk_insert_bs(blk, bs_a, &error_abort); @@ -497,7 +497,7 @@ static void test_propagate_basic(void) bs_verify = bdrv_open(NULL, NULL, options, BDRV_O_RDWR, &error_abort); /* Switch the AioContext */ - blk_set_aio_context(blk, ctx); + blk_set_aio_context(blk, ctx, &error_abort); g_assert(blk_get_aio_context(blk) == ctx); g_assert(bdrv_get_aio_context(bs_a) == ctx); g_assert(bdrv_get_aio_context(bs_verify) == ctx); @@ -505,7 +505,7 @@ static void test_propagate_basic(void) /* Switch the AioContext back */ ctx = qemu_get_aio_context(); - blk_set_aio_context(blk, ctx); + blk_set_aio_context(blk, ctx, &error_abort); g_assert(blk_get_aio_context(blk) == ctx); g_assert(bdrv_get_aio_context(bs_a) == ctx); g_assert(bdrv_get_aio_context(bs_verify) == ctx); @@ -561,11 +561,11 @@ static void test_propagate_diamond(void) qdict_put_str(options, "raw", "bs_c"); bs_verify = bdrv_open(NULL, NULL, options, BDRV_O_RDWR, &error_abort); - blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); blk_insert_bs(blk, bs_verify, &error_abort); /* Switch the AioContext */ - blk_set_aio_context(blk, ctx); + blk_set_aio_context(blk, ctx, &error_abort); g_assert(blk_get_aio_context(blk) == ctx); g_assert(bdrv_get_aio_context(bs_verify) == ctx); g_assert(bdrv_get_aio_context(bs_a) == ctx); @@ -574,7 +574,7 @@ static void test_propagate_diamond(void) /* Switch the AioContext back */ ctx = qemu_get_aio_context(); - blk_set_aio_context(blk, ctx); + blk_set_aio_context(blk, ctx, &error_abort); g_assert(blk_get_aio_context(blk) == ctx); g_assert(bdrv_get_aio_context(bs_verify) == ctx); g_assert(bdrv_get_aio_context(bs_a) == ctx); @@ -593,7 +593,7 @@ static void test_propagate_mirror(void) IOThread *iothread = iothread_new(); AioContext *ctx = iothread_get_aio_context(iothread); AioContext *main_ctx = qemu_get_aio_context(); - BlockDriverState *src, *target; + BlockDriverState *src, *target, *filter; BlockBackend *blk; Job *job; Error *local_err = NULL; @@ -610,11 +610,13 @@ static void test_propagate_mirror(void) false, "filter_node", MIRROR_COPY_MODE_BACKGROUND, &error_abort); job = job_get("job0"); + filter = bdrv_find_node("filter_node"); /* Change the AioContext of src */ bdrv_try_set_aio_context(src, ctx, &error_abort); g_assert(bdrv_get_aio_context(src) == ctx); g_assert(bdrv_get_aio_context(target) == ctx); + g_assert(bdrv_get_aio_context(filter) == ctx); g_assert(job->aio_context == ctx); /* Change the AioContext of target */ @@ -623,9 +625,10 @@ static void test_propagate_mirror(void) aio_context_release(ctx); g_assert(bdrv_get_aio_context(src) == main_ctx); g_assert(bdrv_get_aio_context(target) == main_ctx); + g_assert(bdrv_get_aio_context(filter) == main_ctx); /* With a BlockBackend on src, changing target must fail */ - blk = blk_new(0, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); blk_insert_bs(blk, src, &error_abort); bdrv_try_set_aio_context(target, ctx, &local_err); @@ -635,6 +638,7 @@ static void test_propagate_mirror(void) g_assert(blk_get_aio_context(blk) == main_ctx); g_assert(bdrv_get_aio_context(src) == main_ctx); g_assert(bdrv_get_aio_context(target) == main_ctx); + g_assert(bdrv_get_aio_context(filter) == main_ctx); /* ...unless we explicitly allow it */ aio_context_acquire(ctx); @@ -645,11 +649,12 @@ static void test_propagate_mirror(void) g_assert(blk_get_aio_context(blk) == ctx); g_assert(bdrv_get_aio_context(src) == ctx); g_assert(bdrv_get_aio_context(target) == ctx); + g_assert(bdrv_get_aio_context(filter) == ctx); job_cancel_sync_all(); aio_context_acquire(ctx); - blk_set_aio_context(blk, main_ctx); + blk_set_aio_context(blk, main_ctx, &error_abort); bdrv_try_set_aio_context(target, main_ctx, &error_abort); aio_context_release(ctx); @@ -658,6 +663,69 @@ static void test_propagate_mirror(void) bdrv_unref(target); } +static void test_attach_second_node(void) +{ + IOThread *iothread = iothread_new(); + AioContext *ctx = iothread_get_aio_context(iothread); + AioContext *main_ctx = qemu_get_aio_context(); + BlockBackend *blk; + BlockDriverState *bs, *filter; + QDict *options; + + blk = blk_new(ctx, BLK_PERM_ALL, BLK_PERM_ALL); + bs = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort); + blk_insert_bs(blk, bs, &error_abort); + + options = qdict_new(); + qdict_put_str(options, "driver", "raw"); + qdict_put_str(options, "file", "base"); + + filter = bdrv_open(NULL, NULL, options, BDRV_O_RDWR, &error_abort); + g_assert(blk_get_aio_context(blk) == ctx); + g_assert(bdrv_get_aio_context(bs) == ctx); + g_assert(bdrv_get_aio_context(filter) == ctx); + + blk_set_aio_context(blk, main_ctx, &error_abort); + g_assert(blk_get_aio_context(blk) == main_ctx); + g_assert(bdrv_get_aio_context(bs) == main_ctx); + g_assert(bdrv_get_aio_context(filter) == main_ctx); + + bdrv_unref(filter); + bdrv_unref(bs); + blk_unref(blk); +} + +static void test_attach_preserve_blk_ctx(void) +{ + IOThread *iothread = iothread_new(); + AioContext *ctx = iothread_get_aio_context(iothread); + BlockBackend *blk; + BlockDriverState *bs; + + blk = blk_new(ctx, BLK_PERM_ALL, BLK_PERM_ALL); + bs = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort); + bs->total_sectors = 65536 / BDRV_SECTOR_SIZE; + + /* Add node to BlockBackend that has an iothread context assigned */ + blk_insert_bs(blk, bs, &error_abort); + g_assert(blk_get_aio_context(blk) == ctx); + g_assert(bdrv_get_aio_context(bs) == ctx); + + /* Remove the node again */ + blk_remove_bs(blk); + g_assert(blk_get_aio_context(blk) == ctx); + g_assert(bdrv_get_aio_context(bs) == qemu_get_aio_context()); + + /* Re-attach the node */ + blk_insert_bs(blk, bs, &error_abort); + g_assert(blk_get_aio_context(blk) == ctx); + g_assert(bdrv_get_aio_context(bs) == ctx); + + blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); + bdrv_unref(bs); + blk_unref(blk); +} + int main(int argc, char **argv) { int i; @@ -673,6 +741,8 @@ int main(int argc, char **argv) } g_test_add_func("/attach/blockjob", test_attach_blockjob); + g_test_add_func("/attach/second_node", test_attach_second_node); + g_test_add_func("/attach/preserve_blk_ctx", test_attach_preserve_blk_ctx); g_test_add_func("/propagate/basic", test_propagate_basic); g_test_add_func("/propagate/diamond", test_propagate_diamond); g_test_add_func("/propagate/mirror", test_propagate_mirror); diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c index 652d1e8359..8c91980c70 100644 --- a/tests/test-blockjob.c +++ b/tests/test-blockjob.c @@ -68,7 +68,7 @@ static BlockJob *do_test_id(BlockBackend *blk, const char *id, static BlockBackend *create_blk(const char *name) { /* No I/O is performed on this device */ - BlockBackend *blk = blk_new(0, BLK_PERM_ALL); + BlockBackend *blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); BlockDriverState *bs; bs = bdrv_open("null-co://", NULL, NULL, 0, &error_abort); diff --git a/tests/test-throttle.c b/tests/test-throttle.c index 948a42c991..5644cf95ca 100644 --- a/tests/test-throttle.c +++ b/tests/test-throttle.c @@ -675,9 +675,9 @@ static void test_groups(void) ThrottleGroupMember *tgm1, *tgm2, *tgm3; /* No actual I/O is performed on these devices */ - blk1 = blk_new(0, BLK_PERM_ALL); - blk2 = blk_new(0, BLK_PERM_ALL); - blk3 = blk_new(0, BLK_PERM_ALL); + blk1 = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); + blk2 = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); + blk3 = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL); blkp1 = blk_get_public(blk1); blkp2 = blk_get_public(blk2); diff --git a/tests/virtio-scsi-test.c b/tests/virtio-scsi-test.c index 162b31c88d..1e535cb178 100644 --- a/tests/virtio-scsi-test.c +++ b/tests/virtio-scsi-test.c @@ -188,6 +188,53 @@ static void test_unaligned_write_same(void *obj, void *data, qvirtio_scsi_pci_free(vs); } +static void test_iothread_attach_node(void *obj, void *data, + QGuestAllocator *t_alloc) +{ + QVirtioSCSIPCI *scsi_pci = obj; + QVirtioSCSI *scsi = &scsi_pci->scsi; + QVirtioSCSIQueues *vs; + char tmp_path[] = "/tmp/qtest.XXXXXX"; + int fd; + int ret; + + uint8_t buf[512] = { 0 }; + const uint8_t write_cdb[VIRTIO_SCSI_CDB_SIZE] = { + /* WRITE(10) to LBA 0, transfer length 1 */ + 0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00 + }; + + alloc = t_alloc; + vs = qvirtio_scsi_init(scsi->vdev); + + /* Create a temporary qcow2 overlay*/ + fd = mkstemp(tmp_path); + g_assert(fd >= 0); + close(fd); + + if (!have_qemu_img()) { + g_test_message("QTEST_QEMU_IMG not set or qemu-img missing; " + "skipping snapshot test"); + goto fail; + } + + mkqcow2(tmp_path, 64); + + /* Attach the overlay to the null0 node */ + qmp_assert_success("{'execute': 'blockdev-add', 'arguments': {" + " 'driver': 'qcow2', 'node-name': 'overlay'," + " 'backing': 'null0', 'file': {" + " 'driver': 'file', 'filename': %s}}}", tmp_path); + + /* Send a request to see if the AioContext is still right */ + ret = virtio_scsi_do_command(vs, write_cdb, NULL, 0, buf, 512, NULL); + g_assert_cmphex(ret, ==, 0); + +fail: + qvirtio_scsi_pci_free(vs); + unlink(tmp_path); +} + static void *virtio_scsi_hotplug_setup(GString *cmd_line, void *arg) { g_string_append(cmd_line, @@ -204,6 +251,15 @@ static void *virtio_scsi_setup(GString *cmd_line, void *arg) return arg; } +static void *virtio_scsi_setup_iothread(GString *cmd_line, void *arg) +{ + g_string_append(cmd_line, + " -object iothread,id=thread0" + " -blockdev driver=null-co,node-name=null0" + " -device scsi-hd,drive=null0"); + return arg; +} + static void register_virtio_scsi_test(void) { QOSGraphTestOptions opts = { }; @@ -214,6 +270,13 @@ static void register_virtio_scsi_test(void) opts.before = virtio_scsi_setup; qos_add_test("unaligned-write-same", "virtio-scsi", test_unaligned_write_same, &opts); + + opts.before = virtio_scsi_setup_iothread; + opts.edge = (QOSGraphEdgeOptions) { + .extra_device_opts = "iothread=thread0", + }; + qos_add_test("iothread-attach-node", "virtio-scsi-pci", + test_iothread_attach_node, &opts); } libqos_init(register_virtio_scsi_test); |