diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2018-01-08 13:44:01 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2018-01-08 13:44:01 +0000 |
commit | 799044b6a3a0fc63e1e020e4d9266786a2dc7a0b (patch) | |
tree | 88bd01adf7fbf783edaadad1dadb185b99287ddd | |
parent | 1e10eb532c40ba23db974ebfc3032cb32140ccdc (diff) | |
parent | 1a63a907507fbbcfaee3f622907ec244b7eabda8 (diff) |
Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
Block layer patches
# gpg: Signature made Fri 22 Dec 2017 14:09:01 GMT
# gpg: using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74 56FE 7F09 B272 C88F 2FD6
* remotes/kevin/tags/for-upstream: (35 commits)
block: Keep nodes drained between reopen_queue/multiple
commit: Simplify reopen of base
test-bdrv-drain: Test graph changes in drained section
block: Allow graph changes in subtree drained section
test-bdrv-drain: Recursive draining with multiple parents
test-bdrv-drain: Test behaviour in coroutine context
test-bdrv-drain: Tests for bdrv_subtree_drain
block: Add bdrv_subtree_drained_begin/end()
block: Don't notify parents in drain call chain
test-bdrv-drain: Test nested drain sections
block: Nested drain_end must still call callbacks
block: Don't block_job_pause_all() in bdrv_drain_all()
test-bdrv-drain: Test drain vs. block jobs
blockjob: Pause job on draining any job BDS
test-bdrv-drain: Test bs->quiesce_counter
test-bdrv-drain: Test callback for bdrv_drain
block: Make bdrv_drain() driver callbacks non-recursive
block: Assert drain_all is only called from main AioContext
block: Remove unused bdrv_requests_pending
block: Mention -drive cyls/heads/secs/trans/serial/addr in deprecation chapter
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | block.c | 75 | ||||
-rw-r--r-- | block/commit.c | 8 | ||||
-rw-r--r-- | block/io.c | 164 | ||||
-rw-r--r-- | block/qcow2.c | 51 | ||||
-rw-r--r-- | block/qcow2.h | 3 | ||||
-rw-r--r-- | block/replication.c | 6 | ||||
-rw-r--r-- | blockdev.c | 11 | ||||
-rw-r--r-- | blockjob.c | 22 | ||||
-rw-r--r-- | hmp.c | 6 | ||||
-rw-r--r-- | hw/block/nvme.c | 349 | ||||
-rw-r--r-- | hw/block/trace-events | 93 | ||||
-rw-r--r-- | include/block/block.h | 15 | ||||
-rw-r--r-- | include/block/block_int.h | 6 | ||||
-rw-r--r-- | qapi/block-core.json | 4 | ||||
-rw-r--r-- | qemu-doc.texi | 29 | ||||
-rw-r--r-- | qemu-io-cmds.c | 3 | ||||
-rw-r--r-- | qemu-options.hx | 19 | ||||
-rw-r--r-- | tests/Makefile.include | 2 | ||||
-rwxr-xr-x | tests/qemu-iotests/197 | 4 | ||||
-rw-r--r-- | tests/qemu-iotests/common.filter | 3 | ||||
-rw-r--r-- | tests/test-bdrv-drain.c | 651 | ||||
-rw-r--r-- | vl.c | 86 |
22 files changed, 1294 insertions, 316 deletions
@@ -822,6 +822,18 @@ static void bdrv_child_cb_drained_end(BdrvChild *child) bdrv_drained_end(bs); } +static void bdrv_child_cb_attach(BdrvChild *child) +{ + BlockDriverState *bs = child->opaque; + bdrv_apply_subtree_drain(child, bs); +} + +static void bdrv_child_cb_detach(BdrvChild *child) +{ + BlockDriverState *bs = child->opaque; + bdrv_unapply_subtree_drain(child, bs); +} + static int bdrv_child_cb_inactivate(BdrvChild *child) { BlockDriverState *bs = child->opaque; @@ -889,6 +901,8 @@ const BdrvChildRole child_file = { .inherit_options = bdrv_inherited_options, .drained_begin = bdrv_child_cb_drained_begin, .drained_end = bdrv_child_cb_drained_end, + .attach = bdrv_child_cb_attach, + .detach = bdrv_child_cb_detach, .inactivate = bdrv_child_cb_inactivate, }; @@ -911,6 +925,8 @@ const BdrvChildRole child_format = { .inherit_options = bdrv_inherited_fmt_options, .drained_begin = bdrv_child_cb_drained_begin, .drained_end = bdrv_child_cb_drained_end, + .attach = bdrv_child_cb_attach, + .detach = bdrv_child_cb_detach, .inactivate = bdrv_child_cb_inactivate, }; @@ -953,6 +969,8 @@ static void bdrv_backing_attach(BdrvChild *c) parent->backing_blocker); bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET, parent->backing_blocker); + + bdrv_child_cb_attach(c); } static void bdrv_backing_detach(BdrvChild *c) @@ -963,6 +981,8 @@ static void bdrv_backing_detach(BdrvChild *c) bdrv_op_unblock_all(c->bs, parent->backing_blocker); error_free(parent->backing_blocker); parent->backing_blocker = NULL; + + bdrv_child_cb_detach(c); } /* @@ -1924,6 +1944,8 @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, assert(role == &child_backing || role == &child_file); if (!backing) { + int flags = bdrv_reopen_get_flags(reopen_queue, bs); + /* Apart from the modifications below, the same permissions are * forwarded and left alone as for filters */ bdrv_filter_default_perms(bs, c, role, reopen_queue, perm, shared, @@ -1936,7 +1958,9 @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, /* bs->file always needs to be consistent because of the metadata. We * can never allow other users to resize or write to it. */ - perm |= BLK_PERM_CONSISTENT_READ; + if (!(flags & BDRV_O_NO_IO)) { + perm |= BLK_PERM_CONSISTENT_READ; + } shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE); } else { /* We want consistent read from backing files if the parent needs it. @@ -1968,17 +1992,23 @@ static void bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs) { BlockDriverState *old_bs = child->bs; + int i; if (old_bs && new_bs) { assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); } if (old_bs) { - if (old_bs->quiesce_counter && child->role->drained_end) { - child->role->drained_end(child); - } + /* Detach first so that the recursive drain sections coming from @child + * are already gone and we only end the drain sections that came from + * elsewhere. */ if (child->role->detach) { child->role->detach(child); } + if (old_bs->quiesce_counter && child->role->drained_end) { + for (i = 0; i < old_bs->quiesce_counter; i++) { + child->role->drained_end(child); + } + } QLIST_REMOVE(child, next_parent); } @@ -1987,9 +2017,14 @@ static void bdrv_replace_child_noperm(BdrvChild *child, if (new_bs) { QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); if (new_bs->quiesce_counter && child->role->drained_begin) { - child->role->drained_begin(child); + for (i = 0; i < new_bs->quiesce_counter; i++) { + child->role->drained_begin(child); + } } + /* Attach only after starting new drained sections, so that recursive + * drain sections coming from @child don't get an extra .drained_begin + * callback. */ if (child->role->attach) { child->role->attach(child); } @@ -2731,6 +2766,7 @@ BlockDriverState *bdrv_open(const char *filename, const char *reference, * returns a pointer to bs_queue, which is either the newly allocated * bs_queue, or the existing bs_queue being used. * + * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple(). */ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, BlockDriverState *bs, @@ -2746,6 +2782,11 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, BdrvChild *child; QDict *old_options, *explicit_options; + /* Make sure that the caller remembered to use a drained section. This is + * important to avoid graph changes between the recursive queuing here and + * bdrv_reopen_multiple(). */ + assert(bs->quiesce_counter > 0); + if (bs_queue == NULL) { bs_queue = g_new0(BlockReopenQueue, 1); QSIMPLEQ_INIT(bs_queue); @@ -2870,6 +2911,8 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, * If all devices prepare successfully, then the changes are committed * to all devices. * + * All affected nodes must be drained between bdrv_reopen_queue() and + * bdrv_reopen_multiple(). */ int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **errp) { @@ -2879,11 +2922,8 @@ int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **er assert(bs_queue != NULL); - aio_context_release(ctx); - bdrv_drain_all_begin(); - aio_context_acquire(ctx); - QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { + assert(bs_entry->state.bs->quiesce_counter > 0); if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) { error_propagate(errp, local_err); goto cleanup; @@ -2912,8 +2952,6 @@ cleanup: } g_free(bs_queue); - bdrv_drain_all_end(); - return ret; } @@ -2923,12 +2961,18 @@ int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp) { int ret = -1; Error *local_err = NULL; - BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags); + BlockReopenQueue *queue; + bdrv_subtree_drained_begin(bs); + + queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags); ret = bdrv_reopen_multiple(bdrv_get_aio_context(bs), queue, &local_err); if (local_err != NULL) { error_propagate(errp, local_err); } + + bdrv_subtree_drained_end(bs); + return ret; } @@ -4601,10 +4645,11 @@ void bdrv_img_create(const char *filename, const char *fmt, back_flags = flags; back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); + backing_options = qdict_new(); if (backing_fmt) { - backing_options = qdict_new(); qdict_put_str(backing_options, "driver", backing_fmt); } + qdict_put_bool(backing_options, BDRV_OPT_FORCE_SHARE, true); bs = bdrv_open(full_backing, NULL, backing_options, back_flags, &local_err); @@ -4754,7 +4799,7 @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) AioContext *ctx = bdrv_get_aio_context(bs); aio_disable_external(ctx); - bdrv_parent_drained_begin(bs); + bdrv_parent_drained_begin(bs, NULL); bdrv_drain(bs); /* ensure there are no in-flight requests */ while (aio_poll(ctx, false)) { @@ -4768,7 +4813,7 @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) */ aio_context_acquire(new_context); bdrv_attach_aio_context(bs, new_context); - bdrv_parent_drained_end(bs); + bdrv_parent_drained_end(bs, NULL); aio_enable_external(ctx); aio_context_release(new_context); } diff --git a/block/commit.c b/block/commit.c index c5327551ce..bb6c904704 100644 --- a/block/commit.c +++ b/block/commit.c @@ -277,7 +277,6 @@ void commit_start(const char *job_id, BlockDriverState *bs, const char *filter_node_name, Error **errp) { CommitBlockJob *s; - BlockReopenQueue *reopen_queue = NULL; int orig_base_flags; BlockDriverState *iter; BlockDriverState *commit_top_bs = NULL; @@ -299,12 +298,7 @@ void commit_start(const char *job_id, BlockDriverState *bs, /* convert base to r/w, if necessary */ orig_base_flags = bdrv_get_flags(base); if (!(orig_base_flags & BDRV_O_RDWR)) { - reopen_queue = bdrv_reopen_queue(reopen_queue, base, NULL, - orig_base_flags | BDRV_O_RDWR); - } - - if (reopen_queue) { - bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err); + bdrv_reopen(base, orig_base_flags | BDRV_O_RDWR, &local_err); if (local_err != NULL) { error_propagate(errp, local_err); goto fail; diff --git a/block/io.c b/block/io.c index 6773926fc1..7ea402352e 100644 --- a/block/io.c +++ b/block/io.c @@ -40,22 +40,28 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, BdrvRequestFlags flags); -void bdrv_parent_drained_begin(BlockDriverState *bs) +void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) { BdrvChild *c, *next; QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { + if (c == ignore) { + continue; + } if (c->role->drained_begin) { c->role->drained_begin(c); } } } -void bdrv_parent_drained_end(BlockDriverState *bs) +void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) { BdrvChild *c, *next; QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { + if (c == ignore) { + continue; + } if (c->role->drained_end) { c->role->drained_end(c); } @@ -134,29 +140,13 @@ void bdrv_disable_copy_on_read(BlockDriverState *bs) assert(old >= 1); } -/* Check if any requests are in-flight (including throttled requests) */ -bool bdrv_requests_pending(BlockDriverState *bs) -{ - BdrvChild *child; - - if (atomic_read(&bs->in_flight)) { - return true; - } - - QLIST_FOREACH(child, &bs->children, next) { - if (bdrv_requests_pending(child->bs)) { - return true; - } - } - - return false; -} - typedef struct { Coroutine *co; BlockDriverState *bs; bool done; bool begin; + bool recursive; + BdrvChild *parent; } BdrvCoDrainData; static void coroutine_fn bdrv_drain_invoke_entry(void *opaque) @@ -175,8 +165,10 @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque) bdrv_wakeup(bs); } -static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) +/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */ +static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, bool recursive) { + BdrvChild *child, *tmp; BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin}; if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) || @@ -187,16 +179,19 @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) data.co = qemu_coroutine_create(bdrv_drain_invoke_entry, &data); bdrv_coroutine_enter(bs, data.co); BDRV_POLL_WHILE(bs, !data.done); + + if (recursive) { + QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) { + bdrv_drain_invoke(child->bs, begin, true); + } + } } -static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin) +static bool bdrv_drain_recurse(BlockDriverState *bs) { BdrvChild *child, *tmp; bool waited; - /* Ensure any pending metadata writes are submitted to bs->file. */ - bdrv_drain_invoke(bs, begin); - /* Wait for drained requests to finish */ waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0); @@ -215,7 +210,7 @@ static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin) */ bdrv_ref(bs); } - waited |= bdrv_drain_recurse(bs, begin); + waited |= bdrv_drain_recurse(bs); if (in_main_loop) { bdrv_unref(bs); } @@ -224,6 +219,11 @@ static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin) return waited; } +static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, + BdrvChild *parent); +static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, + BdrvChild *parent); + static void bdrv_co_drain_bh_cb(void *opaque) { BdrvCoDrainData *data = opaque; @@ -232,9 +232,9 @@ static void bdrv_co_drain_bh_cb(void *opaque) bdrv_dec_in_flight(bs); if (data->begin) { - bdrv_drained_begin(bs); + bdrv_do_drained_begin(bs, data->recursive, data->parent); } else { - bdrv_drained_end(bs); + bdrv_do_drained_end(bs, data->recursive, data->parent); } data->done = true; @@ -242,7 +242,8 @@ static void bdrv_co_drain_bh_cb(void *opaque) } static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, - bool begin) + bool begin, bool recursive, + BdrvChild *parent) { BdrvCoDrainData data; @@ -256,6 +257,8 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, .bs = bs, .done = false, .begin = begin, + .recursive = recursive, + .parent = parent, }; bdrv_inc_in_flight(bs); aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), @@ -267,35 +270,97 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, assert(data.done); } -void bdrv_drained_begin(BlockDriverState *bs) +void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, + BdrvChild *parent) { + BdrvChild *child, *next; + if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(bs, true); + bdrv_co_yield_to_drain(bs, true, recursive, parent); return; } + /* Stop things in parent-to-child order */ if (atomic_fetch_inc(&bs->quiesce_counter) == 0) { aio_disable_external(bdrv_get_aio_context(bs)); - bdrv_parent_drained_begin(bs); } - bdrv_drain_recurse(bs, true); + bdrv_parent_drained_begin(bs, parent); + bdrv_drain_invoke(bs, true, false); + bdrv_drain_recurse(bs); + + if (recursive) { + bs->recursive_quiesce_counter++; + QLIST_FOREACH_SAFE(child, &bs->children, next, next) { + bdrv_do_drained_begin(child->bs, true, child); + } + } } -void bdrv_drained_end(BlockDriverState *bs) +void bdrv_drained_begin(BlockDriverState *bs) { + bdrv_do_drained_begin(bs, false, NULL); +} + +void bdrv_subtree_drained_begin(BlockDriverState *bs) +{ + bdrv_do_drained_begin(bs, true, NULL); +} + +void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, + BdrvChild *parent) +{ + BdrvChild *child, *next; + int old_quiesce_counter; + if (qemu_in_coroutine()) { - bdrv_co_yield_to_drain(bs, false); + bdrv_co_yield_to_drain(bs, false, recursive, parent); return; } assert(bs->quiesce_counter > 0); - if (atomic_fetch_dec(&bs->quiesce_counter) > 1) { - return; + old_quiesce_counter = atomic_fetch_dec(&bs->quiesce_counter); + + /* Re-enable things in child-to-parent order */ + bdrv_drain_invoke(bs, false, false); + bdrv_parent_drained_end(bs, parent); + if (old_quiesce_counter == 1) { + aio_enable_external(bdrv_get_aio_context(bs)); } - bdrv_parent_drained_end(bs); - bdrv_drain_recurse(bs, false); - aio_enable_external(bdrv_get_aio_context(bs)); + if (recursive) { + bs->recursive_quiesce_counter--; + QLIST_FOREACH_SAFE(child, &bs->children, next, next) { + bdrv_do_drained_end(child->bs, true, child); + } + } +} + +void bdrv_drained_end(BlockDriverState *bs) +{ + bdrv_do_drained_end(bs, false, NULL); +} + +void bdrv_subtree_drained_end(BlockDriverState *bs) +{ + bdrv_do_drained_end(bs, true, NULL); +} + +void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) +{ + int i; + + for (i = 0; i < new_parent->recursive_quiesce_counter; i++) { + bdrv_do_drained_begin(child->bs, true, child); + } +} + +void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) +{ + int i; + + for (i = 0; i < old_parent->recursive_quiesce_counter; i++) { + bdrv_do_drained_end(child->bs, true, child); + } } /* @@ -342,14 +407,20 @@ void bdrv_drain_all_begin(void) BdrvNextIterator it; GSList *aio_ctxs = NULL, *ctx; - block_job_pause_all(); + /* BDRV_POLL_WHILE() for a node can only be called from its own I/O thread + * or the main loop AioContext. We potentially use BDRV_POLL_WHILE() on + * nodes in several different AioContexts, so make sure we're in the main + * context. */ + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { AioContext *aio_context = bdrv_get_aio_context(bs); + /* Stop things in parent-to-child order */ aio_context_acquire(aio_context); - bdrv_parent_drained_begin(bs); aio_disable_external(aio_context); + bdrv_parent_drained_begin(bs, NULL); + bdrv_drain_invoke(bs, true, true); aio_context_release(aio_context); if (!g_slist_find(aio_ctxs, aio_context)) { @@ -372,7 +443,7 @@ void bdrv_drain_all_begin(void) aio_context_acquire(aio_context); for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { if (aio_context == bdrv_get_aio_context(bs)) { - waited |= bdrv_drain_recurse(bs, true); + waited |= bdrv_drain_recurse(bs); } } aio_context_release(aio_context); @@ -390,14 +461,13 @@ void bdrv_drain_all_end(void) for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { AioContext *aio_context = bdrv_get_aio_context(bs); + /* Re-enable things in child-to-parent order */ aio_context_acquire(aio_context); + bdrv_drain_invoke(bs, false, true); + bdrv_parent_drained_end(bs, NULL); aio_enable_external(aio_context); - bdrv_parent_drained_end(bs); - bdrv_drain_recurse(bs, false); aio_context_release(aio_context); } - - block_job_resume_all(); } void bdrv_drain_all(void) diff --git a/block/qcow2.c b/block/qcow2.c index 1914a940e5..4348b2c0c5 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1672,34 +1672,12 @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs, return status; } -/* handle reading after the end of the backing file */ -int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, - int64_t offset, int bytes) -{ - uint64_t bs_size = bs->total_sectors * BDRV_SECTOR_SIZE; - int n1; - - if ((offset + bytes) <= bs_size) { - return bytes; - } - - if (offset >= bs_size) { - n1 = 0; - } else { - n1 = bs_size - offset; - } - - qemu_iovec_memset(qiov, n1, 0, bytes - n1); - - return n1; -} - static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) { BDRVQcow2State *s = bs->opaque; - int offset_in_cluster, n1; + int offset_in_cluster; int ret; unsigned int cur_bytes; /* number of bytes in current iteration */ uint64_t cluster_offset = 0; @@ -1734,26 +1712,13 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, case QCOW2_CLUSTER_UNALLOCATED: if (bs->backing) { - /* read from the base image */ - n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov, - offset, cur_bytes); - if (n1 > 0) { - QEMUIOVector local_qiov; - - qemu_iovec_init(&local_qiov, hd_qiov.niov); - qemu_iovec_concat(&local_qiov, &hd_qiov, 0, n1); - - BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); - qemu_co_mutex_unlock(&s->lock); - ret = bdrv_co_preadv(bs->backing, offset, n1, - &local_qiov, 0); - qemu_co_mutex_lock(&s->lock); - - qemu_iovec_destroy(&local_qiov); - - if (ret < 0) { - goto fail; - } + BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); + qemu_co_mutex_unlock(&s->lock); + ret = bdrv_co_preadv(bs->backing, offset, cur_bytes, + &hd_qiov, 0); + qemu_co_mutex_lock(&s->lock); + if (ret < 0) { + goto fail; } } else { /* Note: in this case, no need to wait */ diff --git a/block/qcow2.h b/block/qcow2.h index 6f0ff15dd0..46c8cf44ec 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -528,9 +528,6 @@ uint32_t offset_to_reftable_index(BDRVQcow2State *s, uint64_t offset) } /* qcow2.c functions */ -int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, - int64_t sector_num, int nb_sectors); - int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size, int refcount_order, bool generous_increase, uint64_t *refblock_count); diff --git a/block/replication.c b/block/replication.c index e41e293d2b..b1ea3caa4b 100644 --- a/block/replication.c +++ b/block/replication.c @@ -394,6 +394,9 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, new_secondary_flags = s->orig_secondary_flags; } + bdrv_subtree_drained_begin(s->hidden_disk->bs); + bdrv_subtree_drained_begin(s->secondary_disk->bs); + if (orig_hidden_flags != new_hidden_flags) { reopen_queue = bdrv_reopen_queue(reopen_queue, s->hidden_disk->bs, NULL, new_hidden_flags); @@ -409,6 +412,9 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, reopen_queue, &local_err); error_propagate(errp, local_err); } + + bdrv_subtree_drained_end(s->hidden_disk->bs); + bdrv_subtree_drained_end(s->secondary_disk->bs); } static void backup_job_cleanup(BlockDriverState *bs) diff --git a/blockdev.c b/blockdev.c index 9c3a430cfb..29d569a24e 100644 --- a/blockdev.c +++ b/blockdev.c @@ -735,10 +735,6 @@ QemuOptsList qemu_legacy_drive_opts = { .type = QEMU_OPT_STRING, .help = "chs translation (auto, lba, none)", },{ - .name = "boot", - .type = QEMU_OPT_BOOL, - .help = "(deprecated, ignored)", - },{ .name = "addr", .type = QEMU_OPT_STRING, .help = "pci address (virtio only)", @@ -873,13 +869,6 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type) goto fail; } - /* Deprecated option boot=[on|off] */ - if (qemu_opt_get(legacy_opts, "boot") != NULL) { - fprintf(stderr, "qemu-kvm: boot=on|off is deprecated and will be " - "ignored. Future versions will reject this parameter. Please " - "update your scripts.\n"); - } - /* Other deprecated options */ if (!qtest_enabled()) { for (i = 0; i < ARRAY_SIZE(deprecated); i++) { diff --git a/blockjob.c b/blockjob.c index 6173e4728c..f5cea84e73 100644 --- a/blockjob.c +++ b/blockjob.c @@ -234,26 +234,23 @@ static char *child_job_get_parent_desc(BdrvChild *c) job->id); } -static const BdrvChildRole child_job = { - .get_parent_desc = child_job_get_parent_desc, - .stay_at_node = true, -}; - -static void block_job_drained_begin(void *opaque) +static void child_job_drained_begin(BdrvChild *c) { - BlockJob *job = opaque; + BlockJob *job = c->opaque; block_job_pause(job); } -static void block_job_drained_end(void *opaque) +static void child_job_drained_end(BdrvChild *c) { - BlockJob *job = opaque; + BlockJob *job = c->opaque; block_job_resume(job); } -static const BlockDevOps block_job_dev_ops = { - .drained_begin = block_job_drained_begin, - .drained_end = block_job_drained_end, +static const BdrvChildRole child_job = { + .get_parent_desc = child_job_get_parent_desc, + .drained_begin = child_job_drained_begin, + .drained_end = child_job_drained_end, + .stay_at_node = true, }; void block_job_remove_all_bdrv(BlockJob *job) @@ -715,7 +712,6 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort); bs->job = job; - blk_set_dev_ops(blk, &block_job_dev_ops, job); bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); QLIST_INSERT_HEAD(&block_jobs, job, job_list); @@ -2318,7 +2318,6 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) { BlockBackend *blk; BlockBackend *local_blk = NULL; - AioContext *aio_context; const char* device = qdict_get_str(qdict, "device"); const char* command = qdict_get_str(qdict, "command"); Error *err = NULL; @@ -2338,9 +2337,6 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) } } - aio_context = blk_get_aio_context(blk); - aio_context_acquire(aio_context); - /* * Notably absent: Proper permission management. This is sad, but it seems * almost impossible to achieve without changing the semantics and thereby @@ -2368,8 +2364,6 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) */ qemuio_command(blk, command); - aio_context_release(aio_context); - fail: blk_unref(local_blk); hmp_handle_error(mon, &err); diff --git a/hw/block/nvme.c b/hw/block/nvme.c index e529e88e4e..1ac356d3a5 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -34,8 +34,17 @@ #include "qapi/visitor.h" #include "sysemu/block-backend.h" +#include "qemu/log.h" +#include "trace.h" #include "nvme.h" +#define NVME_GUEST_ERR(trace, fmt, ...) \ + do { \ + (trace_##trace)(__VA_ARGS__); \ + qemu_log_mask(LOG_GUEST_ERROR, #trace \ + " in %s: " fmt "\n", __func__, ## __VA_ARGS__); \ + } while (0) + static void nvme_process_sq(void *opaque); static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) @@ -86,10 +95,14 @@ static void nvme_isr_notify(NvmeCtrl *n, NvmeCQueue *cq) { if (cq->irq_enabled) { if (msix_enabled(&(n->parent_obj))) { + trace_nvme_irq_msix(cq->vector); msix_notify(&(n->parent_obj), cq->vector); } else { + trace_nvme_irq_pin(); pci_irq_pulse(&n->parent_obj); } + } else { + trace_nvme_irq_masked(); } } @@ -100,7 +113,8 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, trans_len = MIN(len, trans_len); int num_prps = (len >> n->page_bits) + 1; - if (!prp1) { + if (unlikely(!prp1)) { + trace_nvme_err_invalid_prp(); return NVME_INVALID_FIELD | NVME_DNR; } else if (n->cmbsz && prp1 >= n->ctrl_mem.addr && prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) { @@ -113,7 +127,8 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, } len -= trans_len; if (len) { - if (!prp2) { + if (unlikely(!prp2)) { + trace_nvme_err_invalid_prp2_missing(); goto unmap; } if (len > n->page_size) { @@ -128,7 +143,8 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, uint64_t prp_ent = le64_to_cpu(prp_list[i]); if (i == n->max_prp_ents - 1 && len > n->page_size) { - if (!prp_ent || prp_ent & (n->page_size - 1)) { + if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) { + trace_nvme_err_invalid_prplist_ent(prp_ent); goto unmap; } @@ -140,7 +156,8 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, prp_ent = le64_to_cpu(prp_list[i]); } - if (!prp_ent || prp_ent & (n->page_size - 1)) { + if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) { + trace_nvme_err_invalid_prplist_ent(prp_ent); goto unmap; } @@ -154,7 +171,8 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, i++; } } else { - if (prp2 & (n->page_size - 1)) { + if (unlikely(prp2 & (n->page_size - 1))) { + trace_nvme_err_invalid_prp2_align(prp2); goto unmap; } if (qsg->nsg) { @@ -178,16 +196,20 @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, QEMUIOVector iov; uint16_t status = NVME_SUCCESS; + trace_nvme_dma_read(prp1, prp2); + if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) { return NVME_INVALID_FIELD | NVME_DNR; } if (qsg.nsg > 0) { - if (dma_buf_read(ptr, len, &qsg)) { + if (unlikely(dma_buf_read(ptr, len, &qsg))) { + trace_nvme_err_invalid_dma(); status = NVME_INVALID_FIELD | NVME_DNR; } qemu_sglist_destroy(&qsg); } else { - if (qemu_iovec_to_buf(&iov, 0, ptr, len) != len) { + if (unlikely(qemu_iovec_to_buf(&iov, 0, ptr, len) != len)) { + trace_nvme_err_invalid_dma(); status = NVME_INVALID_FIELD | NVME_DNR; } qemu_iovec_destroy(&iov); @@ -273,7 +295,8 @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, uint64_t aio_slba = slba << (data_shift - BDRV_SECTOR_BITS); uint32_t aio_nlb = nlb << (data_shift - BDRV_SECTOR_BITS); - if (slba + nlb > ns->id_ns.nsze) { + if (unlikely(slba + nlb > ns->id_ns.nsze)) { + trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); return NVME_LBA_RANGE | NVME_DNR; } @@ -301,8 +324,11 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0; enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ; - if ((slba + nlb) > ns->id_ns.nsze) { + trace_nvme_rw(is_write ? "write" : "read", nlb, data_size, slba); + + if (unlikely((slba + nlb) > ns->id_ns.nsze)) { block_acct_invalid(blk_get_stats(n->conf.blk), acct); + trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); return NVME_LBA_RANGE | NVME_DNR; } @@ -336,7 +362,8 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) NvmeNamespace *ns; uint32_t nsid = le32_to_cpu(cmd->nsid); - if (nsid == 0 || nsid > n->num_namespaces) { + if (unlikely(nsid == 0 || nsid > n->num_namespaces)) { + trace_nvme_err_invalid_ns(nsid, n->num_namespaces); return NVME_INVALID_NSID | NVME_DNR; } @@ -350,6 +377,7 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) case NVME_CMD_READ: return nvme_rw(n, ns, cmd, req); default: + trace_nvme_err_invalid_opc(cmd->opcode); return NVME_INVALID_OPCODE | NVME_DNR; } } @@ -373,10 +401,13 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd) NvmeCQueue *cq; uint16_t qid = le16_to_cpu(c->qid); - if (!qid || nvme_check_sqid(n, qid)) { + if (unlikely(!qid || nvme_check_sqid(n, qid))) { + trace_nvme_err_invalid_del_sq(qid); return NVME_INVALID_QID | NVME_DNR; } + trace_nvme_del_sq(qid); + sq = n->sq[qid]; while (!QTAILQ_EMPTY(&sq->out_req_list)) { req = QTAILQ_FIRST(&sq->out_req_list); @@ -439,19 +470,26 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd) uint16_t qflags = le16_to_cpu(c->sq_flags); uint64_t prp1 = le64_to_cpu(c->prp1); - if (!cqid || nvme_check_cqid(n, cqid)) { + trace_nvme_create_sq(prp1, sqid, cqid, qsize, qflags); + + if (unlikely(!cqid || nvme_check_cqid(n, cqid))) { + trace_nvme_err_invalid_create_sq_cqid(cqid); return NVME_INVALID_CQID | NVME_DNR; } - if (!sqid || !nvme_check_sqid(n, sqid)) { + if (unlikely(!sqid || !nvme_check_sqid(n, sqid))) { + trace_nvme_err_invalid_create_sq_sqid(sqid); return NVME_INVALID_QID | NVME_DNR; } - if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) { + if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) { + trace_nvme_err_invalid_create_sq_size(qsize); return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR; } - if (!prp1 || prp1 & (n->page_size - 1)) { + if (unlikely(!prp1 || prp1 & (n->page_size - 1))) { + trace_nvme_err_invalid_create_sq_addr(prp1); return NVME_INVALID_FIELD | NVME_DNR; } - if (!(NVME_SQ_FLAGS_PC(qflags))) { + if (unlikely(!(NVME_SQ_FLAGS_PC(qflags)))) { + trace_nvme_err_invalid_create_sq_qflags(NVME_SQ_FLAGS_PC(qflags)); return NVME_INVALID_FIELD | NVME_DNR; } sq = g_malloc0(sizeof(*sq)); @@ -476,14 +514,17 @@ static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd) NvmeCQueue *cq; uint16_t qid = le16_to_cpu(c->qid); - if (!qid || nvme_check_cqid(n, qid)) { + if (unlikely(!qid || nvme_check_cqid(n, qid))) { + trace_nvme_err_invalid_del_cq_cqid(qid); return NVME_INVALID_CQID | NVME_DNR; } cq = n->cq[qid]; - if (!QTAILQ_EMPTY(&cq->sq_list)) { + if (unlikely(!QTAILQ_EMPTY(&cq->sq_list))) { + trace_nvme_err_invalid_del_cq_notempty(qid); return NVME_INVALID_QUEUE_DEL; } + trace_nvme_del_cq(qid); nvme_free_cq(cq, n); return NVME_SUCCESS; } @@ -516,19 +557,27 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd) uint16_t qflags = le16_to_cpu(c->cq_flags); uint64_t prp1 = le64_to_cpu(c->prp1); - if (!cqid || !nvme_check_cqid(n, cqid)) { + trace_nvme_create_cq(prp1, cqid, vector, qsize, qflags, + NVME_CQ_FLAGS_IEN(qflags) != 0); + + if (unlikely(!cqid || !nvme_check_cqid(n, cqid))) { + trace_nvme_err_invalid_create_cq_cqid(cqid); return NVME_INVALID_CQID | NVME_DNR; } - if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) { + if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) { + trace_nvme_err_invalid_create_cq_size(qsize); return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR; } - if (!prp1) { + if (unlikely(!prp1)) { + trace_nvme_err_invalid_create_cq_addr(prp1); return NVME_INVALID_FIELD | NVME_DNR; } - if (vector > n->num_queues) { + if (unlikely(vector > n->num_queues)) { + trace_nvme_err_invalid_create_cq_vector(vector); return NVME_INVALID_IRQ_VECTOR | NVME_DNR; } - if (!(NVME_CQ_FLAGS_PC(qflags))) { + if (unlikely(!(NVME_CQ_FLAGS_PC(qflags)))) { + trace_nvme_err_invalid_create_cq_qflags(NVME_CQ_FLAGS_PC(qflags)); return NVME_INVALID_FIELD | NVME_DNR; } @@ -543,6 +592,8 @@ static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c) uint64_t prp1 = le64_to_cpu(c->prp1); uint64_t prp2 = le64_to_cpu(c->prp2); + trace_nvme_identify_ctrl(); + return nvme_dma_read_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl), prp1, prp2); } @@ -554,11 +605,15 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c) uint64_t prp1 = le64_to_cpu(c->prp1); uint64_t prp2 = le64_to_cpu(c->prp2); - if (nsid == 0 || nsid > n->num_namespaces) { + trace_nvme_identify_ns(nsid); + + if (unlikely(nsid == 0 || nsid > n->num_namespaces)) { + trace_nvme_err_invalid_ns(nsid, n->num_namespaces); return NVME_INVALID_NSID | NVME_DNR; } ns = &n->namespaces[nsid - 1]; + return nvme_dma_read_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns), prp1, prp2); } @@ -573,6 +628,8 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c) uint16_t ret; int i, j = 0; + trace_nvme_identify_nslist(min_nsid); + list = g_malloc0(data_len); for (i = 0; i < n->num_namespaces; i++) { if (i < min_nsid) { @@ -601,6 +658,7 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd) case 0x02: return nvme_identify_nslist(n, c); default: + trace_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns)); return NVME_INVALID_FIELD | NVME_DNR; } } @@ -613,11 +671,14 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) switch (dw10) { case NVME_VOLATILE_WRITE_CACHE: result = blk_enable_write_cache(n->conf.blk); + trace_nvme_getfeat_vwcache(result ? "enabled" : "disabled"); break; case NVME_NUMBER_OF_QUEUES: result = cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16)); + trace_nvme_getfeat_numq(result); break; default: + trace_nvme_err_invalid_getfeat(dw10); return NVME_INVALID_FIELD | NVME_DNR; } @@ -635,10 +696,14 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) blk_set_enable_write_cache(n->conf.blk, dw11 & 1); break; case NVME_NUMBER_OF_QUEUES: + trace_nvme_setfeat_numq((dw11 & 0xFFFF) + 1, + ((dw11 >> 16) & 0xFFFF) + 1, + n->num_queues - 1, n->num_queues - 1); req->cqe.result = cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16)); break; default: + trace_nvme_err_invalid_setfeat(dw10); return NVME_INVALID_FIELD | NVME_DNR; } return NVME_SUCCESS; @@ -662,6 +727,7 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) case NVME_ADM_CMD_GET_FEATURES: return nvme_get_feature(n, cmd, req); default: + trace_nvme_err_invalid_admin_opc(cmd->opcode); return NVME_INVALID_OPCODE | NVME_DNR; } } @@ -721,15 +787,78 @@ static int nvme_start_ctrl(NvmeCtrl *n) uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12; uint32_t page_size = 1 << page_bits; - if (n->cq[0] || n->sq[0] || !n->bar.asq || !n->bar.acq || - n->bar.asq & (page_size - 1) || n->bar.acq & (page_size - 1) || - NVME_CC_MPS(n->bar.cc) < NVME_CAP_MPSMIN(n->bar.cap) || - NVME_CC_MPS(n->bar.cc) > NVME_CAP_MPSMAX(n->bar.cap) || - NVME_CC_IOCQES(n->bar.cc) < NVME_CTRL_CQES_MIN(n->id_ctrl.cqes) || - NVME_CC_IOCQES(n->bar.cc) > NVME_CTRL_CQES_MAX(n->id_ctrl.cqes) || - NVME_CC_IOSQES(n->bar.cc) < NVME_CTRL_SQES_MIN(n->id_ctrl.sqes) || - NVME_CC_IOSQES(n->bar.cc) > NVME_CTRL_SQES_MAX(n->id_ctrl.sqes) || - !NVME_AQA_ASQS(n->bar.aqa) || !NVME_AQA_ACQS(n->bar.aqa)) { + if (unlikely(n->cq[0])) { + trace_nvme_err_startfail_cq(); + return -1; + } + if (unlikely(n->sq[0])) { + trace_nvme_err_startfail_sq(); + return -1; + } + if (unlikely(!n->bar.asq)) { + trace_nvme_err_startfail_nbarasq(); + return -1; + } + if (unlikely(!n->bar.acq)) { + trace_nvme_err_startfail_nbaracq(); + return -1; + } + if (unlikely(n->bar.asq & (page_size - 1))) { + trace_nvme_err_startfail_asq_misaligned(n->bar.asq); + return -1; + } + if (unlikely(n->bar.acq & (page_size - 1))) { + trace_nvme_err_startfail_acq_misaligned(n->bar.acq); + return -1; + } + if (unlikely(NVME_CC_MPS(n->bar.cc) < + NVME_CAP_MPSMIN(n->bar.cap))) { + trace_nvme_err_startfail_page_too_small( + NVME_CC_MPS(n->bar.cc), + NVME_CAP_MPSMIN(n->bar.cap)); + return -1; + } + if (unlikely(NVME_CC_MPS(n->bar.cc) > + NVME_CAP_MPSMAX(n->bar.cap))) { + trace_nvme_err_startfail_page_too_large( + NVME_CC_MPS(n->bar.cc), + NVME_CAP_MPSMAX(n->bar.cap)); + return -1; + } + if (unlikely(NVME_CC_IOCQES(n->bar.cc) < + NVME_CTRL_CQES_MIN(n->id_ctrl.cqes))) { + trace_nvme_err_startfail_cqent_too_small( + NVME_CC_IOCQES(n->bar.cc), + NVME_CTRL_CQES_MIN(n->bar.cap)); + return -1; + } + if (unlikely(NVME_CC_IOCQES(n->bar.cc) > + NVME_CTRL_CQES_MAX(n->id_ctrl.cqes))) { + trace_nvme_err_startfail_cqent_too_large( + NVME_CC_IOCQES(n->bar.cc), + NVME_CTRL_CQES_MAX(n->bar.cap)); + return -1; + } + if (unlikely(NVME_CC_IOSQES(n->bar.cc) < + NVME_CTRL_SQES_MIN(n->id_ctrl.sqes))) { + trace_nvme_err_startfail_sqent_too_small( + NVME_CC_IOSQES(n->bar.cc), + NVME_CTRL_SQES_MIN(n->bar.cap)); + return -1; + } + if (unlikely(NVME_CC_IOSQES(n->bar.cc) > + NVME_CTRL_SQES_MAX(n->id_ctrl.sqes))) { + trace_nvme_err_startfail_sqent_too_large( + NVME_CC_IOSQES(n->bar.cc), + NVME_CTRL_SQES_MAX(n->bar.cap)); + return -1; + } + if (unlikely(!NVME_AQA_ASQS(n->bar.aqa))) { + trace_nvme_err_startfail_asqent_sz_zero(); + return -1; + } + if (unlikely(!NVME_AQA_ACQS(n->bar.aqa))) { + trace_nvme_err_startfail_acqent_sz_zero(); return -1; } @@ -749,16 +878,48 @@ static int nvme_start_ctrl(NvmeCtrl *n) static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, unsigned size) { + if (unlikely(offset & (sizeof(uint32_t) - 1))) { + NVME_GUEST_ERR(nvme_ub_mmiowr_misaligned32, + "MMIO write not 32-bit aligned," + " offset=0x%"PRIx64"", offset); + /* should be ignored, fall through for now */ + } + + if (unlikely(size < sizeof(uint32_t))) { + NVME_GUEST_ERR(nvme_ub_mmiowr_toosmall, + "MMIO write smaller than 32-bits," + " offset=0x%"PRIx64", size=%u", + offset, size); + /* should be ignored, fall through for now */ + } + switch (offset) { - case 0xc: + case 0xc: /* INTMS */ + if (unlikely(msix_enabled(&(n->parent_obj)))) { + NVME_GUEST_ERR(nvme_ub_mmiowr_intmask_with_msix, + "undefined access to interrupt mask set" + " when MSI-X is enabled"); + /* should be ignored, fall through for now */ + } n->bar.intms |= data & 0xffffffff; n->bar.intmc = n->bar.intms; + trace_nvme_mmio_intm_set(data & 0xffffffff, + n->bar.intmc); break; - case 0x10: + case 0x10: /* INTMC */ + if (unlikely(msix_enabled(&(n->parent_obj)))) { + NVME_GUEST_ERR(nvme_ub_mmiowr_intmask_with_msix, + "undefined access to interrupt mask clr" + " when MSI-X is enabled"); + /* should be ignored, fall through for now */ + } n->bar.intms &= ~(data & 0xffffffff); n->bar.intmc = n->bar.intms; + trace_nvme_mmio_intm_clr(data & 0xffffffff, + n->bar.intmc); break; - case 0x14: + case 0x14: /* CC */ + trace_nvme_mmio_cfg(data & 0xffffffff); /* Windows first sends data, then sends enable bit */ if (!NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc) && !NVME_CC_SHN(data) && !NVME_CC_SHN(n->bar.cc)) @@ -768,40 +929,82 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, if (NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc)) { n->bar.cc = data; - if (nvme_start_ctrl(n)) { + if (unlikely(nvme_start_ctrl(n))) { + trace_nvme_err_startfail(); n->bar.csts = NVME_CSTS_FAILED; } else { + trace_nvme_mmio_start_success(); n->bar.csts = NVME_CSTS_READY; } } else if (!NVME_CC_EN(data) && NVME_CC_EN(n->bar.cc)) { + trace_nvme_mmio_stopped(); nvme_clear_ctrl(n); n->bar.csts &= ~NVME_CSTS_READY; } if (NVME_CC_SHN(data) && !(NVME_CC_SHN(n->bar.cc))) { - nvme_clear_ctrl(n); - n->bar.cc = data; - n->bar.csts |= NVME_CSTS_SHST_COMPLETE; + trace_nvme_mmio_shutdown_set(); + nvme_clear_ctrl(n); + n->bar.cc = data; + n->bar.csts |= NVME_CSTS_SHST_COMPLETE; } else if (!NVME_CC_SHN(data) && NVME_CC_SHN(n->bar.cc)) { - n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE; - n->bar.cc = data; + trace_nvme_mmio_shutdown_cleared(); + n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE; + n->bar.cc = data; + } + break; + case 0x1C: /* CSTS */ + if (data & (1 << 4)) { + NVME_GUEST_ERR(nvme_ub_mmiowr_ssreset_w1c_unsupported, + "attempted to W1C CSTS.NSSRO" + " but CAP.NSSRS is zero (not supported)"); + } else if (data != 0) { + NVME_GUEST_ERR(nvme_ub_mmiowr_ro_csts, + "attempted to set a read only bit" + " of controller status"); + } + break; + case 0x20: /* NSSR */ + if (data == 0x4E564D65) { + trace_nvme_ub_mmiowr_ssreset_unsupported(); + } else { + /* The spec says that writes of other values have no effect */ + return; } break; - case 0x24: + case 0x24: /* AQA */ n->bar.aqa = data & 0xffffffff; + trace_nvme_mmio_aqattr(data & 0xffffffff); break; - case 0x28: + case 0x28: /* ASQ */ n->bar.asq = data; + trace_nvme_mmio_asqaddr(data); break; - case 0x2c: + case 0x2c: /* ASQ hi */ n->bar.asq |= data << 32; + trace_nvme_mmio_asqaddr_hi(data, n->bar.asq); break; - case 0x30: + case 0x30: /* ACQ */ + trace_nvme_mmio_acqaddr(data); n->bar.acq = data; break; - case 0x34: + case 0x34: /* ACQ hi */ n->bar.acq |= data << 32; + trace_nvme_mmio_acqaddr_hi(data, n->bar.acq); break; + case 0x38: /* CMBLOC */ + NVME_GUEST_ERR(nvme_ub_mmiowr_cmbloc_reserved, + "invalid write to reserved CMBLOC" + " when CMBSZ is zero, ignored"); + return; + case 0x3C: /* CMBSZ */ + NVME_GUEST_ERR(nvme_ub_mmiowr_cmbsz_readonly, + "invalid write to read only CMBSZ, ignored"); + return; default: + NVME_GUEST_ERR(nvme_ub_mmiowr_invalid, + "invalid MMIO write," + " offset=0x%"PRIx64", data=%"PRIx64"", + offset, data); break; } } @@ -812,9 +1015,26 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) uint8_t *ptr = (uint8_t *)&n->bar; uint64_t val = 0; + if (unlikely(addr & (sizeof(uint32_t) - 1))) { + NVME_GUEST_ERR(nvme_ub_mmiord_misaligned32, + "MMIO read not 32-bit aligned," + " offset=0x%"PRIx64"", addr); + /* should RAZ, fall through for now */ + } else if (unlikely(size < sizeof(uint32_t))) { + NVME_GUEST_ERR(nvme_ub_mmiord_toosmall, + "MMIO read smaller than 32-bits," + " offset=0x%"PRIx64"", addr); + /* should RAZ, fall through for now */ + } + if (addr < sizeof(n->bar)) { memcpy(&val, ptr + addr, size); + } else { + NVME_GUEST_ERR(nvme_ub_mmiord_invalid_ofs, + "MMIO read beyond last register," + " offset=0x%"PRIx64", returning 0", addr); } + return val; } @@ -822,22 +1042,36 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) { uint32_t qid; - if (addr & ((1 << 2) - 1)) { + if (unlikely(addr & ((1 << 2) - 1))) { + NVME_GUEST_ERR(nvme_ub_db_wr_misaligned, + "doorbell write not 32-bit aligned," + " offset=0x%"PRIx64", ignoring", addr); return; } if (((addr - 0x1000) >> 2) & 1) { + /* Completion queue doorbell write */ + uint16_t new_head = val & 0xffff; int start_sqs; NvmeCQueue *cq; qid = (addr - (0x1000 + (1 << 2))) >> 3; - if (nvme_check_cqid(n, qid)) { + if (unlikely(nvme_check_cqid(n, qid))) { + NVME_GUEST_ERR(nvme_ub_db_wr_invalid_cq, + "completion queue doorbell write" + " for nonexistent queue," + " sqid=%"PRIu32", ignoring", qid); return; } cq = n->cq[qid]; - if (new_head >= cq->size) { + if (unlikely(new_head >= cq->size)) { + NVME_GUEST_ERR(nvme_ub_db_wr_invalid_cqhead, + "completion queue doorbell write value" + " beyond queue size, sqid=%"PRIu32"," + " new_head=%"PRIu16", ignoring", + qid, new_head); return; } @@ -855,16 +1089,27 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) nvme_isr_notify(n, cq); } } else { + /* Submission queue doorbell write */ + uint16_t new_tail = val & 0xffff; NvmeSQueue *sq; qid = (addr - 0x1000) >> 3; - if (nvme_check_sqid(n, qid)) { + if (unlikely(nvme_check_sqid(n, qid))) { + NVME_GUEST_ERR(nvme_ub_db_wr_invalid_sq, + "submission queue doorbell write" + " for nonexistent queue," + " sqid=%"PRIu32", ignoring", qid); return; } sq = n->sq[qid]; - if (new_tail >= sq->size) { + if (unlikely(new_tail >= sq->size)) { + NVME_GUEST_ERR(nvme_ub_db_wr_invalid_sqtail, + "submission queue doorbell write value" + " beyond queue size, sqid=%"PRIu32"," + " new_tail=%"PRIu16", ignoring", + qid, new_tail); return; } diff --git a/hw/block/trace-events b/hw/block/trace-events index 962a3bfa24..5acd495207 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -11,6 +11,99 @@ virtio_blk_submit_multireq(void *vdev, void *mrb, int start, int num_reqs, uint6 hd_geometry_lchs_guess(void *blk, int cyls, int heads, int secs) "blk %p LCHS %d %d %d" hd_geometry_guess(void *blk, uint32_t cyls, uint32_t heads, uint32_t secs, int trans) "blk %p CHS %u %u %u trans %d" +# hw/block/nvme.c +# nvme traces for successful events +nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u" +nvme_irq_pin(void) "pulsing IRQ pin" +nvme_irq_masked(void) "IRQ is masked" +nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64"" +nvme_rw(char const *verb, uint32_t blk_count, uint64_t byte_count, uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64"" +nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16"" +nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d" +nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16"" +nvme_del_cq(uint16_t cqid) "deleted completion queue, sqid=%"PRIu16"" +nvme_identify_ctrl(void) "identify controller" +nvme_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16"" +nvme_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16"" +nvme_getfeat_vwcache(char const* result) "get feature volatile write cache, result=%s" +nvme_getfeat_numq(int result) "get feature number of queues, result=%d" +nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d" +nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64"" +nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64"" +nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64"" +nvme_mmio_aqattr(uint64_t data) "wrote MMIO, admin queue attributes=0x%"PRIx64"" +nvme_mmio_asqaddr(uint64_t data) "wrote MMIO, admin submission queue address=0x%"PRIx64"" +nvme_mmio_acqaddr(uint64_t data) "wrote MMIO, admin completion queue address=0x%"PRIx64"" +nvme_mmio_asqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin submission queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" +nvme_mmio_acqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin completion queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" +nvme_mmio_start_success(void) "setting controller enable bit succeeded" +nvme_mmio_stopped(void) "cleared controller enable bit" +nvme_mmio_shutdown_set(void) "shutdown bit set" +nvme_mmio_shutdown_cleared(void) "shutdown bit cleared" + +# nvme traces for error conditions +nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size" +nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64"" +nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64"" +nvme_err_invalid_prp2_missing(void) "PRP2 is null and more data to be transferred" +nvme_err_invalid_field(void) "invalid field" +nvme_err_invalid_prp(void) "invalid PRP" +nvme_err_invalid_sgl(void) "invalid SGL" +nvme_err_invalid_ns(uint32_t ns, uint32_t limit) "invalid namespace %u not within 1-%u" +nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8"" +nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8"" +nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64"" +nvme_err_invalid_del_sq(uint16_t qid) "invalid submission queue deletion, sid=%"PRIu16"" +nvme_err_invalid_create_sq_cqid(uint16_t cqid) "failed creating submission queue, invalid cqid=%"PRIu16"" +nvme_err_invalid_create_sq_sqid(uint16_t sqid) "failed creating submission queue, invalid sqid=%"PRIu16"" +nvme_err_invalid_create_sq_size(uint16_t qsize) "failed creating submission queue, invalid qsize=%"PRIu16"" +nvme_err_invalid_create_sq_addr(uint64_t addr) "failed creating submission queue, addr=0x%"PRIx64"" +nvme_err_invalid_create_sq_qflags(uint16_t qflags) "failed creating submission queue, qflags=%"PRIu16"" +nvme_err_invalid_del_cq_cqid(uint16_t cqid) "failed deleting completion queue, cqid=%"PRIu16"" +nvme_err_invalid_del_cq_notempty(uint16_t cqid) "failed deleting completion queue, it is not empty, cqid=%"PRIu16"" +nvme_err_invalid_create_cq_cqid(uint16_t cqid) "failed creating completion queue, cqid=%"PRIu16"" +nvme_err_invalid_create_cq_size(uint16_t size) "failed creating completion queue, size=%"PRIu16"" +nvme_err_invalid_create_cq_addr(uint64_t addr) "failed creating completion queue, addr=0x%"PRIx64"" +nvme_err_invalid_create_cq_vector(uint16_t vector) "failed creating completion queue, vector=%"PRIu16"" +nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion queue, qflags=%"PRIu16"" +nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16"" +nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32"" +nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32"" +nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues" +nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues" +nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null" +nvme_err_startfail_nbaracq(void) "nvme_start_ctrl failed because the admin completion queue address is null" +nvme_err_startfail_asq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin submission queue address is misaligned: 0x%"PRIx64"" +nvme_err_startfail_acq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin completion queue address is misaligned: 0x%"PRIx64"" +nvme_err_startfail_page_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too small: log2size=%u, min=%u" +nvme_err_startfail_page_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too large: log2size=%u, max=%u" +nvme_err_startfail_cqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too small: log2size=%u, min=%u" +nvme_err_startfail_cqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too large: log2size=%u, max=%u" +nvme_err_startfail_sqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too small: log2size=%u, min=%u" +nvme_err_startfail_sqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too large: log2size=%u, max=%u" +nvme_err_startfail_asqent_sz_zero(void) "nvme_start_ctrl failed because the admin submission queue size is zero" +nvme_err_startfail_acqent_sz_zero(void) "nvme_start_ctrl failed because the admin completion queue size is zero" +nvme_err_startfail(void) "setting controller enable bit failed" + +# Traces for undefined behavior +nvme_ub_mmiowr_misaligned32(uint64_t offset) "MMIO write not 32-bit aligned, offset=0x%"PRIx64"" +nvme_ub_mmiowr_toosmall(uint64_t offset, unsigned size) "MMIO write smaller than 32 bits, offset=0x%"PRIx64", size=%u" +nvme_ub_mmiowr_intmask_with_msix(void) "undefined access to interrupt mask set when MSI-X is enabled" +nvme_ub_mmiowr_ro_csts(void) "attempted to set a read only bit of controller status" +nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CAP.NSSRS is zero (not supported)" +nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)" +nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored" +nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored" +nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64"" +nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64"" +nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64"" +nvme_ub_mmiord_invalid_ofs(uint64_t offset) "MMIO read beyond last register, offset=0x%"PRIx64", returning 0" +nvme_ub_db_wr_misaligned(uint64_t offset) "doorbell write not 32-bit aligned, offset=0x%"PRIx64", ignoring" +nvme_ub_db_wr_invalid_cq(uint32_t qid) "completion queue doorbell write for nonexistent queue, cqid=%"PRIu32", ignoring" +nvme_ub_db_wr_invalid_cqhead(uint32_t qid, uint16_t new_head) "completion queue doorbell write value beyond queue size, cqid=%"PRIu32", new_head=%"PRIu16", ignoring" +nvme_ub_db_wr_invalid_sq(uint32_t qid) "submission queue doorbell write for nonexistent queue, sqid=%"PRIu32", ignoring" +nvme_ub_db_wr_invalid_sqtail(uint32_t qid, uint16_t new_tail) "submission queue doorbell write value beyond queue size, sqid=%"PRIu32", new_head=%"PRIu16", ignoring" + # hw/block/xen_disk.c xen_disk_alloc(char *name) "%s" xen_disk_init(char *name) "%s" diff --git a/include/block/block.h b/include/block/block.h index c05cac57e5..9b12774ddf 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -585,7 +585,7 @@ void bdrv_io_unplug(BlockDriverState *bs); * Begin a quiesced section of all users of @bs. This is part of * bdrv_drained_begin. */ -void bdrv_parent_drained_begin(BlockDriverState *bs); +void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore); /** * bdrv_parent_drained_end: @@ -593,7 +593,7 @@ void bdrv_parent_drained_begin(BlockDriverState *bs); * End a quiesced section of all users of @bs. This is part of * bdrv_drained_end. */ -void bdrv_parent_drained_end(BlockDriverState *bs); +void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore); /** * bdrv_drained_begin: @@ -608,12 +608,23 @@ void bdrv_parent_drained_end(BlockDriverState *bs); void bdrv_drained_begin(BlockDriverState *bs); /** + * Like bdrv_drained_begin, but recursively begins a quiesced section for + * exclusive access to all child nodes as well. + */ +void bdrv_subtree_drained_begin(BlockDriverState *bs); + +/** * bdrv_drained_end: * * End a quiescent section started by bdrv_drained_begin(). */ void bdrv_drained_end(BlockDriverState *bs); +/** + * End a quiescent section started by bdrv_subtree_drained_begin(). + */ +void bdrv_subtree_drained_end(BlockDriverState *bs); + void bdrv_add_child(BlockDriverState *parent, BlockDriverState *child, Error **errp); void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp); diff --git a/include/block/block_int.h b/include/block/block_int.h index a5482775ec..29cafa4236 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -717,6 +717,8 @@ struct BlockDriverState { /* Accessed with atomic ops. */ int quiesce_counter; + int recursive_quiesce_counter; + unsigned int write_gen; /* Current data generation */ /* Protected by reqs_lock. */ @@ -768,6 +770,9 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes, QEMUIOVector *qiov, BdrvRequestFlags flags); +void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent); +void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); + int get_tmp_filename(char *filename, int size); BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, const char *filename); @@ -1045,7 +1050,6 @@ bool blk_dev_is_tray_open(BlockBackend *blk); bool blk_dev_is_medium_locked(BlockBackend *blk); void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes); -bool bdrv_requests_pending(BlockDriverState *bs); void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out); void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in); diff --git a/qapi/block-core.json b/qapi/block-core.json index a8cdbc300b..e94a6881b2 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -3918,6 +3918,10 @@ # does not support all kinds of operations, all kinds of children, nor # all block drivers. # +# FIXME Removing children from a quorum node means introducing gaps in the +# child indices. This cannot be represented in the 'children' list of +# BlockdevOptionsQuorum, as returned by .bdrv_refresh_filename(). +# # Warning: The data in a new quorum child MUST be consistent with that of # the rest of the array. # diff --git a/qemu-doc.texi b/qemu-doc.texi index ae90f7199e..8d0c809ad5 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -2589,12 +2589,6 @@ deprecated. @section System emulator command line arguments -@subsection -drive boot=on|off (since 1.3.0) - -The ``boot=on|off'' option to the ``-drive'' argument is -ignored. Applications should use the ``bootindex=N'' parameter -to set an absolute ordering between devices instead. - @subsection -tdf (since 1.3.0) The ``-tdf'' argument is ignored. The behaviour implemented @@ -2690,20 +2684,27 @@ longer be directly supported in QEMU. The ``-drive if=scsi'' argument is replaced by the the ``-device BUS-TYPE'' argument combined with ``-drive if=none''. +@subsection -drive cyls=...,heads=...,secs=...,trans=... (since 2.10.0) + +The drive geometry arguments are replaced by the the geometry arguments +that can be specified with the ``-device'' parameter. + +@subsection -drive serial=... (since 2.10.0) + +The drive serial argument is replaced by the the serial argument +that can be specified with the ``-device'' parameter. + +@subsection -drive addr=... (since 2.10.0) + +The drive addr argument is replaced by the the addr argument +that can be specified with the ``-device'' parameter. + @subsection -net dump (since 2.10.0) The ``--net dump'' argument is now replaced with the ``-object filter-dump'' argument which works in combination with the modern ``-netdev`` backends instead. -@subsection -hdachs (since 2.10.0) - -The ``-hdachs'' argument is now a synonym for setting -the ``cyls'', ``heads'', ``secs'', and ``trans'' properties -on the ``ide-hd'' device using the ``-device'' argument. -The new syntax allows different settings to be provided -per disk. - @subsection -usbdevice (since 2.10.0) The ``-usbdevice DEV'' argument is now a synonym for setting diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index de8e3de726..a6a70fc3dc 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -2013,8 +2013,11 @@ static int reopen_f(BlockBackend *blk, int argc, char **argv) opts = qopts ? qemu_opts_to_qdict(qopts, NULL) : NULL; qemu_opts_reset(&reopen_opts); + bdrv_subtree_drained_begin(bs); brq = bdrv_reopen_queue(NULL, bs, opts, flags); bdrv_reopen_multiple(bdrv_get_aio_context(bs), brq, &local_err); + bdrv_subtree_drained_end(bs); + if (local_err) { error_report_err(local_err); } else { diff --git a/qemu-options.hx b/qemu-options.hx index 6d56f4bdb3..678181c599 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -846,8 +846,8 @@ of available connectors of a given interface type. @item media=@var{media} This option defines the type of the media: disk or cdrom. @item cyls=@var{c},heads=@var{h},secs=@var{s}[,trans=@var{t}] -These options have the same definition as they have in @option{-hdachs}. -These parameters are deprecated, use the corresponding parameters +Force disk physical geometry and the optional BIOS translation (trans=none or +lba). These parameters are deprecated, use the corresponding parameters of @code{-device} instead. @item snapshot=@var{snapshot} @var{snapshot} is "on" or "off" and controls snapshot mode for the given drive @@ -1027,21 +1027,6 @@ the raw disk image you use is not written back. You can however force the write back by pressing @key{C-a s} (@pxref{disk_images}). ETEXI -DEF("hdachs", HAS_ARG, QEMU_OPTION_hdachs, \ - "-hdachs c,h,s[,t]\n" \ - " force hard disk 0 physical geometry and the optional BIOS\n" \ - " translation (t=none or lba) (usually QEMU can guess them)\n", - QEMU_ARCH_ALL) -STEXI -@item -hdachs @var{c},@var{h},@var{s},[,@var{t}] -@findex -hdachs -Force hard disk 0 physical geometry (1 <= @var{c} <= 16383, 1 <= -@var{h} <= 16, 1 <= @var{s} <= 63) and optionally force the BIOS -translation mode (@var{t}=none, lba or auto). Usually QEMU can guess -all those parameters. This option is deprecated, please use -@code{-device ide-hd,cyls=c,heads=h,secs=s,...} instead. -ETEXI - DEF("fsdev", HAS_ARG, QEMU_OPTION_fsdev, "-fsdev fsdriver,id=id[,path=path,][security_model={mapped-xattr|mapped-file|passthrough|none}]\n" " [,writeout=immediate][,readonly][,socket=socket|sock_fd=sock_fd][,fmode=fmode][,dmode=dmode]\n" diff --git a/tests/Makefile.include b/tests/Makefile.include index 77f8183117..39a4b5359d 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include @@ -80,6 +80,7 @@ gcov-files-test-thread-pool-y = thread-pool.c gcov-files-test-hbitmap-y = util/hbitmap.c check-unit-y += tests/test-hbitmap$(EXESUF) gcov-files-test-hbitmap-y = blockjob.c +check-unit-y += tests/test-bdrv-drain$(EXESUF) check-unit-y += tests/test-blockjob$(EXESUF) check-unit-y += tests/test-blockjob-txn$(EXESUF) check-unit-y += tests/test-x86-cpuid$(EXESUF) @@ -595,6 +596,7 @@ tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(test-block-obj-y) tests/test-aio$(EXESUF): tests/test-aio.o $(test-block-obj-y) tests/test-aio-multithread$(EXESUF): tests/test-aio-multithread.o $(test-block-obj-y) tests/test-throttle$(EXESUF): tests/test-throttle.o $(test-block-obj-y) +tests/test-bdrv-drain$(EXESUF): tests/test-bdrv-drain.o $(test-block-obj-y) $(test-util-obj-y) tests/test-blockjob$(EXESUF): tests/test-blockjob.o $(test-block-obj-y) $(test-util-obj-y) tests/test-blockjob-txn$(EXESUF): tests/test-blockjob-txn.o $(test-block-obj-y) $(test-util-obj-y) tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(test-block-obj-y) diff --git a/tests/qemu-iotests/197 b/tests/qemu-iotests/197 index 887eb4f496..5e869fe2b7 100755 --- a/tests/qemu-iotests/197 +++ b/tests/qemu-iotests/197 @@ -60,6 +60,10 @@ echo '=== Copy-on-read ===' echo # Prep the images +# VPC rounds image sizes to a specific geometry, force a specific size. +if [ "$IMGFMT" = "vpc" ]; then + IMGOPTS=$(_optstr_add "$IMGOPTS" "force_size") +fi _make_test_img 4G $QEMU_IO -c "write -P 55 3G 1k" "$TEST_IMG" | _filter_qemu_io IMGPROTO=file IMGFMT=qcow2 IMGOPTS= TEST_IMG_FILE="$TEST_WRAP" \ diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter index d9237799e9..f08248bfd9 100644 --- a/tests/qemu-iotests/common.filter +++ b/tests/qemu-iotests/common.filter @@ -134,7 +134,8 @@ _filter_img_create() -e "s# log_size=[0-9]\\+##g" \ -e "s# refcount_bits=[0-9]\\+##g" \ -e "s# key-secret=[a-zA-Z0-9]\\+##g" \ - -e "s# iter-time=[0-9]\\+##g" + -e "s# iter-time=[0-9]\\+##g" \ + -e "s# force_size=\\(on\\|off\\)##g" } _filter_img_info() diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c new file mode 100644 index 0000000000..d760e2b243 --- /dev/null +++ b/tests/test-bdrv-drain.c @@ -0,0 +1,651 @@ +/* + * Block node draining tests + * + * Copyright (c) 2017 Kevin Wolf <kwolf@redhat.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "block/block.h" +#include "block/blockjob_int.h" +#include "sysemu/block-backend.h" +#include "qapi/error.h" + +typedef struct BDRVTestState { + int drain_count; +} BDRVTestState; + +static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) +{ + BDRVTestState *s = bs->opaque; + s->drain_count++; +} + +static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs) +{ + BDRVTestState *s = bs->opaque; + s->drain_count--; +} + +static void bdrv_test_close(BlockDriverState *bs) +{ + BDRVTestState *s = bs->opaque; + g_assert_cmpint(s->drain_count, >, 0); +} + +static int coroutine_fn bdrv_test_co_preadv(BlockDriverState *bs, + uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + /* We want this request to stay until the polling loop in drain waits for + * it to complete. We need to sleep a while as bdrv_drain_invoke() comes + * first and polls its result, too, but it shouldn't accidentally complete + * this request yet. */ + qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); + + return 0; +} + +static BlockDriver bdrv_test = { + .format_name = "test", + .instance_size = sizeof(BDRVTestState), + + .bdrv_close = bdrv_test_close, + .bdrv_co_preadv = bdrv_test_co_preadv, + + .bdrv_co_drain_begin = bdrv_test_co_drain_begin, + .bdrv_co_drain_end = bdrv_test_co_drain_end, + + .bdrv_child_perm = bdrv_format_default_perms, +}; + +static void aio_ret_cb(void *opaque, int ret) +{ + int *aio_ret = opaque; + *aio_ret = ret; +} + +typedef struct CallInCoroutineData { + void (*entry)(void); + bool done; +} CallInCoroutineData; + +static coroutine_fn void call_in_coroutine_entry(void *opaque) +{ + CallInCoroutineData *data = opaque; + + data->entry(); + data->done = true; +} + +static void call_in_coroutine(void (*entry)(void)) +{ + Coroutine *co; + CallInCoroutineData data = { + .entry = entry, + .done = false, + }; + + co = qemu_coroutine_create(call_in_coroutine_entry, &data); + qemu_coroutine_enter(co); + while (!data.done) { + aio_poll(qemu_get_aio_context(), true); + } +} + +enum drain_type { + BDRV_DRAIN_ALL, + BDRV_DRAIN, + BDRV_SUBTREE_DRAIN, + DRAIN_TYPE_MAX, +}; + +static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs) +{ + switch (drain_type) { + case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break; + case BDRV_DRAIN: bdrv_drained_begin(bs); break; + case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_begin(bs); break; + default: g_assert_not_reached(); + } +} + +static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs) +{ + switch (drain_type) { + case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break; + case BDRV_DRAIN: bdrv_drained_end(bs); break; + case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_end(bs); break; + default: g_assert_not_reached(); + } +} + +static void test_drv_cb_common(enum drain_type drain_type, bool recursive) +{ + BlockBackend *blk; + BlockDriverState *bs, *backing; + BDRVTestState *s, *backing_s; + BlockAIOCB *acb; + int aio_ret; + + QEMUIOVector qiov; + struct iovec iov = { + .iov_base = NULL, + .iov_len = 0, + }; + qemu_iovec_init_external(&qiov, &iov, 1); + + blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, + &error_abort); + s = bs->opaque; + blk_insert_bs(blk, bs, &error_abort); + + backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); + backing_s = backing->opaque; + bdrv_set_backing_hd(bs, backing, &error_abort); + + /* Simple bdrv_drain_all_begin/end pair, check that CBs are called */ + g_assert_cmpint(s->drain_count, ==, 0); + g_assert_cmpint(backing_s->drain_count, ==, 0); + + do_drain_begin(drain_type, bs); + + g_assert_cmpint(s->drain_count, ==, 1); + g_assert_cmpint(backing_s->drain_count, ==, !!recursive); + + do_drain_end(drain_type, bs); + + g_assert_cmpint(s->drain_count, ==, 0); + g_assert_cmpint(backing_s->drain_count, ==, 0); + + /* Now do the same while a request is pending */ + aio_ret = -EINPROGRESS; + acb = blk_aio_preadv(blk, 0, &qiov, 0, aio_ret_cb, &aio_ret); + g_assert(acb != NULL); + g_assert_cmpint(aio_ret, ==, -EINPROGRESS); + + g_assert_cmpint(s->drain_count, ==, 0); + g_assert_cmpint(backing_s->drain_count, ==, 0); + + do_drain_begin(drain_type, bs); + + g_assert_cmpint(aio_ret, ==, 0); + g_assert_cmpint(s->drain_count, ==, 1); + g_assert_cmpint(backing_s->drain_count, ==, !!recursive); + + do_drain_end(drain_type, bs); + + g_assert_cmpint(s->drain_count, ==, 0); + g_assert_cmpint(backing_s->drain_count, ==, 0); + + bdrv_unref(backing); + bdrv_unref(bs); + blk_unref(blk); +} + +static void test_drv_cb_drain_all(void) +{ + test_drv_cb_common(BDRV_DRAIN_ALL, true); +} + +static void test_drv_cb_drain(void) +{ + test_drv_cb_common(BDRV_DRAIN, false); +} + +static void test_drv_cb_drain_subtree(void) +{ + test_drv_cb_common(BDRV_SUBTREE_DRAIN, true); +} + +static void test_drv_cb_co_drain(void) +{ + call_in_coroutine(test_drv_cb_drain); +} + +static void test_drv_cb_co_drain_subtree(void) +{ + call_in_coroutine(test_drv_cb_drain_subtree); +} + +static void test_quiesce_common(enum drain_type drain_type, bool recursive) +{ + BlockBackend *blk; + BlockDriverState *bs, *backing; + + blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, + &error_abort); + blk_insert_bs(blk, bs, &error_abort); + + backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); + bdrv_set_backing_hd(bs, backing, &error_abort); + + g_assert_cmpint(bs->quiesce_counter, ==, 0); + g_assert_cmpint(backing->quiesce_counter, ==, 0); + + do_drain_begin(drain_type, bs); + + g_assert_cmpint(bs->quiesce_counter, ==, 1); + g_assert_cmpint(backing->quiesce_counter, ==, !!recursive); + + do_drain_end(drain_type, bs); + + g_assert_cmpint(bs->quiesce_counter, ==, 0); + g_assert_cmpint(backing->quiesce_counter, ==, 0); + + bdrv_unref(backing); + bdrv_unref(bs); + blk_unref(blk); +} + +static void test_quiesce_drain_all(void) +{ + // XXX drain_all doesn't quiesce + //test_quiesce_common(BDRV_DRAIN_ALL, true); +} + +static void test_quiesce_drain(void) +{ + test_quiesce_common(BDRV_DRAIN, false); +} + +static void test_quiesce_drain_subtree(void) +{ + test_quiesce_common(BDRV_SUBTREE_DRAIN, true); +} + +static void test_quiesce_co_drain(void) +{ + call_in_coroutine(test_quiesce_drain); +} + +static void test_quiesce_co_drain_subtree(void) +{ + call_in_coroutine(test_quiesce_drain_subtree); +} + +static void test_nested(void) +{ + BlockBackend *blk; + BlockDriverState *bs, *backing; + BDRVTestState *s, *backing_s; + enum drain_type outer, inner; + + blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, + &error_abort); + s = bs->opaque; + blk_insert_bs(blk, bs, &error_abort); + + backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); + backing_s = backing->opaque; + bdrv_set_backing_hd(bs, backing, &error_abort); + + for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) { + for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) { + /* XXX bdrv_drain_all() doesn't increase the quiesce_counter */ + int bs_quiesce = (outer != BDRV_DRAIN_ALL) + + (inner != BDRV_DRAIN_ALL); + int backing_quiesce = (outer == BDRV_SUBTREE_DRAIN) + + (inner == BDRV_SUBTREE_DRAIN); + int backing_cb_cnt = (outer != BDRV_DRAIN) + + (inner != BDRV_DRAIN); + + g_assert_cmpint(bs->quiesce_counter, ==, 0); + g_assert_cmpint(backing->quiesce_counter, ==, 0); + g_assert_cmpint(s->drain_count, ==, 0); + g_assert_cmpint(backing_s->drain_count, ==, 0); + + do_drain_begin(outer, bs); + do_drain_begin(inner, bs); + + g_assert_cmpint(bs->quiesce_counter, ==, bs_quiesce); + g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce); + g_assert_cmpint(s->drain_count, ==, 2); + g_assert_cmpint(backing_s->drain_count, ==, backing_cb_cnt); + + do_drain_end(inner, bs); + do_drain_end(outer, bs); + + g_assert_cmpint(bs->quiesce_counter, ==, 0); + g_assert_cmpint(backing->quiesce_counter, ==, 0); + g_assert_cmpint(s->drain_count, ==, 0); + g_assert_cmpint(backing_s->drain_count, ==, 0); + } + } + + bdrv_unref(backing); + bdrv_unref(bs); + blk_unref(blk); +} + +static void test_multiparent(void) +{ + BlockBackend *blk_a, *blk_b; + BlockDriverState *bs_a, *bs_b, *backing; + BDRVTestState *a_s, *b_s, *backing_s; + + blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, + &error_abort); + a_s = bs_a->opaque; + blk_insert_bs(blk_a, bs_a, &error_abort); + + blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, + &error_abort); + b_s = bs_b->opaque; + blk_insert_bs(blk_b, bs_b, &error_abort); + + backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); + backing_s = backing->opaque; + bdrv_set_backing_hd(bs_a, backing, &error_abort); + bdrv_set_backing_hd(bs_b, backing, &error_abort); + + g_assert_cmpint(bs_a->quiesce_counter, ==, 0); + g_assert_cmpint(bs_b->quiesce_counter, ==, 0); + g_assert_cmpint(backing->quiesce_counter, ==, 0); + g_assert_cmpint(a_s->drain_count, ==, 0); + g_assert_cmpint(b_s->drain_count, ==, 0); + g_assert_cmpint(backing_s->drain_count, ==, 0); + + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); + + g_assert_cmpint(bs_a->quiesce_counter, ==, 1); + g_assert_cmpint(bs_b->quiesce_counter, ==, 1); + g_assert_cmpint(backing->quiesce_counter, ==, 1); + g_assert_cmpint(a_s->drain_count, ==, 1); + g_assert_cmpint(b_s->drain_count, ==, 1); + g_assert_cmpint(backing_s->drain_count, ==, 1); + + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); + + g_assert_cmpint(bs_a->quiesce_counter, ==, 2); + g_assert_cmpint(bs_b->quiesce_counter, ==, 2); + g_assert_cmpint(backing->quiesce_counter, ==, 2); + g_assert_cmpint(a_s->drain_count, ==, 2); + g_assert_cmpint(b_s->drain_count, ==, 2); + g_assert_cmpint(backing_s->drain_count, ==, 2); + + do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); + + g_assert_cmpint(bs_a->quiesce_counter, ==, 1); + g_assert_cmpint(bs_b->quiesce_counter, ==, 1); + g_assert_cmpint(backing->quiesce_counter, ==, 1); + g_assert_cmpint(a_s->drain_count, ==, 1); + g_assert_cmpint(b_s->drain_count, ==, 1); + g_assert_cmpint(backing_s->drain_count, ==, 1); + + do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); + + g_assert_cmpint(bs_a->quiesce_counter, ==, 0); + g_assert_cmpint(bs_b->quiesce_counter, ==, 0); + g_assert_cmpint(backing->quiesce_counter, ==, 0); + g_assert_cmpint(a_s->drain_count, ==, 0); + g_assert_cmpint(b_s->drain_count, ==, 0); + g_assert_cmpint(backing_s->drain_count, ==, 0); + + bdrv_unref(backing); + bdrv_unref(bs_a); + bdrv_unref(bs_b); + blk_unref(blk_a); + blk_unref(blk_b); +} + +static void test_graph_change(void) +{ + BlockBackend *blk_a, *blk_b; + BlockDriverState *bs_a, *bs_b, *backing; + BDRVTestState *a_s, *b_s, *backing_s; + + blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, + &error_abort); + a_s = bs_a->opaque; + blk_insert_bs(blk_a, bs_a, &error_abort); + + blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, + &error_abort); + b_s = bs_b->opaque; + blk_insert_bs(blk_b, bs_b, &error_abort); + + backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); + backing_s = backing->opaque; + bdrv_set_backing_hd(bs_a, backing, &error_abort); + + g_assert_cmpint(bs_a->quiesce_counter, ==, 0); + g_assert_cmpint(bs_b->quiesce_counter, ==, 0); + g_assert_cmpint(backing->quiesce_counter, ==, 0); + g_assert_cmpint(a_s->drain_count, ==, 0); + g_assert_cmpint(b_s->drain_count, ==, 0); + g_assert_cmpint(backing_s->drain_count, ==, 0); + + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); + do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); + + bdrv_set_backing_hd(bs_b, backing, &error_abort); + g_assert_cmpint(bs_a->quiesce_counter, ==, 5); + g_assert_cmpint(bs_b->quiesce_counter, ==, 5); + g_assert_cmpint(backing->quiesce_counter, ==, 5); + g_assert_cmpint(a_s->drain_count, ==, 5); + g_assert_cmpint(b_s->drain_count, ==, 5); + g_assert_cmpint(backing_s->drain_count, ==, 5); + + bdrv_set_backing_hd(bs_b, NULL, &error_abort); + g_assert_cmpint(bs_a->quiesce_counter, ==, 3); + g_assert_cmpint(bs_b->quiesce_counter, ==, 2); + g_assert_cmpint(backing->quiesce_counter, ==, 3); + g_assert_cmpint(a_s->drain_count, ==, 3); + g_assert_cmpint(b_s->drain_count, ==, 2); + g_assert_cmpint(backing_s->drain_count, ==, 3); + + bdrv_set_backing_hd(bs_b, backing, &error_abort); + g_assert_cmpint(bs_a->quiesce_counter, ==, 5); + g_assert_cmpint(bs_b->quiesce_counter, ==, 5); + g_assert_cmpint(backing->quiesce_counter, ==, 5); + g_assert_cmpint(a_s->drain_count, ==, 5); + g_assert_cmpint(b_s->drain_count, ==, 5); + g_assert_cmpint(backing_s->drain_count, ==, 5); + + do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); + do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); + do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); + do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); + do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); + + g_assert_cmpint(bs_a->quiesce_counter, ==, 0); + g_assert_cmpint(bs_b->quiesce_counter, ==, 0); + g_assert_cmpint(backing->quiesce_counter, ==, 0); + g_assert_cmpint(a_s->drain_count, ==, 0); + g_assert_cmpint(b_s->drain_count, ==, 0); + g_assert_cmpint(backing_s->drain_count, ==, 0); + + bdrv_unref(backing); + bdrv_unref(bs_a); + bdrv_unref(bs_b); + blk_unref(blk_a); + blk_unref(blk_b); +} + + +typedef struct TestBlockJob { + BlockJob common; + bool should_complete; +} TestBlockJob; + +static void test_job_completed(BlockJob *job, void *opaque) +{ + block_job_completed(job, 0); +} + +static void coroutine_fn test_job_start(void *opaque) +{ + TestBlockJob *s = opaque; + + while (!s->should_complete) { + block_job_sleep_ns(&s->common, 100000); + } + + block_job_defer_to_main_loop(&s->common, test_job_completed, NULL); +} + +static void test_job_complete(BlockJob *job, Error **errp) +{ + TestBlockJob *s = container_of(job, TestBlockJob, common); + s->should_complete = true; +} + +BlockJobDriver test_job_driver = { + .instance_size = sizeof(TestBlockJob), + .start = test_job_start, + .complete = test_job_complete, +}; + +static void test_blockjob_common(enum drain_type drain_type) +{ + BlockBackend *blk_src, *blk_target; + BlockDriverState *src, *target; + BlockJob *job; + int ret; + + src = bdrv_new_open_driver(&bdrv_test, "source", BDRV_O_RDWR, + &error_abort); + blk_src = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_insert_bs(blk_src, src, &error_abort); + + target = bdrv_new_open_driver(&bdrv_test, "target", BDRV_O_RDWR, + &error_abort); + blk_target = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_insert_bs(blk_target, target, &error_abort); + + job = block_job_create("job0", &test_job_driver, src, 0, BLK_PERM_ALL, 0, + 0, NULL, NULL, &error_abort); + block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort); + block_job_start(job); + + g_assert_cmpint(job->pause_count, ==, 0); + g_assert_false(job->paused); + g_assert_false(job->busy); /* We're in block_job_sleep_ns() */ + + do_drain_begin(drain_type, src); + + if (drain_type == BDRV_DRAIN_ALL) { + /* bdrv_drain_all() drains both src and target */ + g_assert_cmpint(job->pause_count, ==, 2); + } else { + g_assert_cmpint(job->pause_count, ==, 1); + } + /* XXX We don't wait until the job is actually paused. Is this okay? */ + /* g_assert_true(job->paused); */ + g_assert_false(job->busy); /* The job is paused */ + + do_drain_end(drain_type, src); + + g_assert_cmpint(job->pause_count, ==, 0); + g_assert_false(job->paused); + g_assert_false(job->busy); /* We're in block_job_sleep_ns() */ + + do_drain_begin(drain_type, target); + + if (drain_type == BDRV_DRAIN_ALL) { + /* bdrv_drain_all() drains both src and target */ + g_assert_cmpint(job->pause_count, ==, 2); + } else { + g_assert_cmpint(job->pause_count, ==, 1); + } + /* XXX We don't wait until the job is actually paused. Is this okay? */ + /* g_assert_true(job->paused); */ + g_assert_false(job->busy); /* The job is paused */ + + do_drain_end(drain_type, target); + + g_assert_cmpint(job->pause_count, ==, 0); + g_assert_false(job->paused); + g_assert_false(job->busy); /* We're in block_job_sleep_ns() */ + + ret = block_job_complete_sync(job, &error_abort); + g_assert_cmpint(ret, ==, 0); + + blk_unref(blk_src); + blk_unref(blk_target); + bdrv_unref(src); + bdrv_unref(target); +} + +static void test_blockjob_drain_all(void) +{ + test_blockjob_common(BDRV_DRAIN_ALL); +} + +static void test_blockjob_drain(void) +{ + test_blockjob_common(BDRV_DRAIN); +} + +static void test_blockjob_drain_subtree(void) +{ + test_blockjob_common(BDRV_SUBTREE_DRAIN); +} + +int main(int argc, char **argv) +{ + bdrv_init(); + qemu_init_main_loop(&error_abort); + + g_test_init(&argc, &argv, NULL); + + g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all); + g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain); + g_test_add_func("/bdrv-drain/driver-cb/drain_subtree", + test_drv_cb_drain_subtree); + + // XXX bdrv_drain_all() doesn't work in coroutine context + g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain); + g_test_add_func("/bdrv-drain/driver-cb/co/drain_subtree", + test_drv_cb_co_drain_subtree); + + + g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all); + g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain); + g_test_add_func("/bdrv-drain/quiesce/drain_subtree", + test_quiesce_drain_subtree); + + // XXX bdrv_drain_all() doesn't work in coroutine context + g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain); + g_test_add_func("/bdrv-drain/quiesce/co/drain_subtree", + test_quiesce_co_drain_subtree); + + g_test_add_func("/bdrv-drain/nested", test_nested); + g_test_add_func("/bdrv-drain/multiparent", test_multiparent); + g_test_add_func("/bdrv-drain/graph-change", test_graph_change); + + g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); + g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); + g_test_add_func("/bdrv-drain/blockjob/drain_subtree", + test_blockjob_drain_subtree); + + return g_test_run(); +} @@ -3052,9 +3052,8 @@ int main(int argc, char **argv, char **envp) const char *boot_order = NULL; const char *boot_once = NULL; DisplayState *ds; - int cyls, heads, secs, translation; QemuOpts *opts, *machine_opts; - QemuOpts *hda_opts = NULL, *icount_opts = NULL, *accel_opts = NULL; + QemuOpts *icount_opts = NULL, *accel_opts = NULL; QemuOptsList *olist; int optind; const char *optarg; @@ -3146,8 +3145,6 @@ int main(int argc, char **argv, char **envp) cpu_model = NULL; snapshot = 0; - cyls = heads = secs = 0; - translation = BIOS_ATA_TRANSLATION_AUTO; nb_nics = 0; @@ -3186,7 +3183,7 @@ int main(int argc, char **argv, char **envp) if (optind >= argc) break; if (argv[optind][0] != '-') { - hda_opts = drive_add(IF_DEFAULT, 0, argv[optind++], HD_OPTS); + drive_add(IF_DEFAULT, 0, argv[optind++], HD_OPTS); } else { const QEMUOption *popt; @@ -3206,21 +3203,6 @@ int main(int argc, char **argv, char **envp) cpu_model = optarg; break; case QEMU_OPTION_hda: - { - char buf[256]; - if (cyls == 0) - snprintf(buf, sizeof(buf), "%s", HD_OPTS); - else - snprintf(buf, sizeof(buf), - "%s,cyls=%d,heads=%d,secs=%d%s", - HD_OPTS , cyls, heads, secs, - translation == BIOS_ATA_TRANSLATION_LBA ? - ",trans=lba" : - translation == BIOS_ATA_TRANSLATION_NONE ? - ",trans=none" : ""); - drive_add(IF_DEFAULT, 0, optarg, buf); - break; - } case QEMU_OPTION_hdb: case QEMU_OPTION_hdc: case QEMU_OPTION_hdd: @@ -3271,70 +3253,6 @@ int main(int argc, char **argv, char **envp) case QEMU_OPTION_snapshot: snapshot = 1; break; - case QEMU_OPTION_hdachs: - { - const char *p; - p = optarg; - cyls = strtol(p, (char **)&p, 0); - if (cyls < 1 || cyls > 16383) - goto chs_fail; - if (*p != ',') - goto chs_fail; - p++; - heads = strtol(p, (char **)&p, 0); - if (heads < 1 || heads > 16) - goto chs_fail; - if (*p != ',') - goto chs_fail; - p++; - secs = strtol(p, (char **)&p, 0); - if (secs < 1 || secs > 63) - goto chs_fail; - if (*p == ',') { - p++; - if (!strcmp(p, "large")) { - translation = BIOS_ATA_TRANSLATION_LARGE; - } else if (!strcmp(p, "rechs")) { - translation = BIOS_ATA_TRANSLATION_RECHS; - } else if (!strcmp(p, "none")) { - translation = BIOS_ATA_TRANSLATION_NONE; - } else if (!strcmp(p, "lba")) { - translation = BIOS_ATA_TRANSLATION_LBA; - } else if (!strcmp(p, "auto")) { - translation = BIOS_ATA_TRANSLATION_AUTO; - } else { - goto chs_fail; - } - } else if (*p != '\0') { - chs_fail: - error_report("invalid physical CHS format"); - exit(1); - } - if (hda_opts != NULL) { - qemu_opt_set_number(hda_opts, "cyls", cyls, - &error_abort); - qemu_opt_set_number(hda_opts, "heads", heads, - &error_abort); - qemu_opt_set_number(hda_opts, "secs", secs, - &error_abort); - if (translation == BIOS_ATA_TRANSLATION_LARGE) { - qemu_opt_set(hda_opts, "trans", "large", - &error_abort); - } else if (translation == BIOS_ATA_TRANSLATION_RECHS) { - qemu_opt_set(hda_opts, "trans", "rechs", - &error_abort); - } else if (translation == BIOS_ATA_TRANSLATION_LBA) { - qemu_opt_set(hda_opts, "trans", "lba", - &error_abort); - } else if (translation == BIOS_ATA_TRANSLATION_NONE) { - qemu_opt_set(hda_opts, "trans", "none", - &error_abort); - } - } - } - error_report("'-hdachs' is deprecated, please use '-device" - " ide-hd,cyls=c,heads=h,secs=s,...' instead"); - break; case QEMU_OPTION_numa: opts = qemu_opts_parse_noisily(qemu_find_opts("numa"), optarg, true); |