aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block.c75
-rw-r--r--block/commit.c8
-rw-r--r--block/io.c164
-rw-r--r--block/qcow2.c51
-rw-r--r--block/qcow2.h3
-rw-r--r--block/replication.c6
-rw-r--r--blockdev.c11
-rw-r--r--blockjob.c22
-rw-r--r--hmp.c6
-rw-r--r--hw/block/nvme.c349
-rw-r--r--hw/block/trace-events93
-rw-r--r--include/block/block.h15
-rw-r--r--include/block/block_int.h6
-rw-r--r--qapi/block-core.json4
-rw-r--r--qemu-doc.texi29
-rw-r--r--qemu-io-cmds.c3
-rw-r--r--qemu-options.hx19
-rw-r--r--tests/Makefile.include2
-rwxr-xr-xtests/qemu-iotests/1974
-rw-r--r--tests/qemu-iotests/common.filter3
-rw-r--r--tests/test-bdrv-drain.c651
-rw-r--r--vl.c86
22 files changed, 1294 insertions, 316 deletions
diff --git a/block.c b/block.c
index 1c37ce4554..a8da4f2b25 100644
--- a/block.c
+++ b/block.c
@@ -822,6 +822,18 @@ static void bdrv_child_cb_drained_end(BdrvChild *child)
bdrv_drained_end(bs);
}
+static void bdrv_child_cb_attach(BdrvChild *child)
+{
+ BlockDriverState *bs = child->opaque;
+ bdrv_apply_subtree_drain(child, bs);
+}
+
+static void bdrv_child_cb_detach(BdrvChild *child)
+{
+ BlockDriverState *bs = child->opaque;
+ bdrv_unapply_subtree_drain(child, bs);
+}
+
static int bdrv_child_cb_inactivate(BdrvChild *child)
{
BlockDriverState *bs = child->opaque;
@@ -889,6 +901,8 @@ const BdrvChildRole child_file = {
.inherit_options = bdrv_inherited_options,
.drained_begin = bdrv_child_cb_drained_begin,
.drained_end = bdrv_child_cb_drained_end,
+ .attach = bdrv_child_cb_attach,
+ .detach = bdrv_child_cb_detach,
.inactivate = bdrv_child_cb_inactivate,
};
@@ -911,6 +925,8 @@ const BdrvChildRole child_format = {
.inherit_options = bdrv_inherited_fmt_options,
.drained_begin = bdrv_child_cb_drained_begin,
.drained_end = bdrv_child_cb_drained_end,
+ .attach = bdrv_child_cb_attach,
+ .detach = bdrv_child_cb_detach,
.inactivate = bdrv_child_cb_inactivate,
};
@@ -953,6 +969,8 @@ static void bdrv_backing_attach(BdrvChild *c)
parent->backing_blocker);
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
parent->backing_blocker);
+
+ bdrv_child_cb_attach(c);
}
static void bdrv_backing_detach(BdrvChild *c)
@@ -963,6 +981,8 @@ static void bdrv_backing_detach(BdrvChild *c)
bdrv_op_unblock_all(c->bs, parent->backing_blocker);
error_free(parent->backing_blocker);
parent->backing_blocker = NULL;
+
+ bdrv_child_cb_detach(c);
}
/*
@@ -1924,6 +1944,8 @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
assert(role == &child_backing || role == &child_file);
if (!backing) {
+ int flags = bdrv_reopen_get_flags(reopen_queue, bs);
+
/* Apart from the modifications below, the same permissions are
* forwarded and left alone as for filters */
bdrv_filter_default_perms(bs, c, role, reopen_queue, perm, shared,
@@ -1936,7 +1958,9 @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
/* bs->file always needs to be consistent because of the metadata. We
* can never allow other users to resize or write to it. */
- perm |= BLK_PERM_CONSISTENT_READ;
+ if (!(flags & BDRV_O_NO_IO)) {
+ perm |= BLK_PERM_CONSISTENT_READ;
+ }
shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
} else {
/* We want consistent read from backing files if the parent needs it.
@@ -1968,17 +1992,23 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
BlockDriverState *new_bs)
{
BlockDriverState *old_bs = child->bs;
+ int i;
if (old_bs && new_bs) {
assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
}
if (old_bs) {
- if (old_bs->quiesce_counter && child->role->drained_end) {
- child->role->drained_end(child);
- }
+ /* Detach first so that the recursive drain sections coming from @child
+ * are already gone and we only end the drain sections that came from
+ * elsewhere. */
if (child->role->detach) {
child->role->detach(child);
}
+ if (old_bs->quiesce_counter && child->role->drained_end) {
+ for (i = 0; i < old_bs->quiesce_counter; i++) {
+ child->role->drained_end(child);
+ }
+ }
QLIST_REMOVE(child, next_parent);
}
@@ -1987,9 +2017,14 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
if (new_bs) {
QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
if (new_bs->quiesce_counter && child->role->drained_begin) {
- child->role->drained_begin(child);
+ for (i = 0; i < new_bs->quiesce_counter; i++) {
+ child->role->drained_begin(child);
+ }
}
+ /* Attach only after starting new drained sections, so that recursive
+ * drain sections coming from @child don't get an extra .drained_begin
+ * callback. */
if (child->role->attach) {
child->role->attach(child);
}
@@ -2731,6 +2766,7 @@ BlockDriverState *bdrv_open(const char *filename, const char *reference,
* returns a pointer to bs_queue, which is either the newly allocated
* bs_queue, or the existing bs_queue being used.
*
+ * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple().
*/
static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
BlockDriverState *bs,
@@ -2746,6 +2782,11 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
BdrvChild *child;
QDict *old_options, *explicit_options;
+ /* Make sure that the caller remembered to use a drained section. This is
+ * important to avoid graph changes between the recursive queuing here and
+ * bdrv_reopen_multiple(). */
+ assert(bs->quiesce_counter > 0);
+
if (bs_queue == NULL) {
bs_queue = g_new0(BlockReopenQueue, 1);
QSIMPLEQ_INIT(bs_queue);
@@ -2870,6 +2911,8 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
* If all devices prepare successfully, then the changes are committed
* to all devices.
*
+ * All affected nodes must be drained between bdrv_reopen_queue() and
+ * bdrv_reopen_multiple().
*/
int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **errp)
{
@@ -2879,11 +2922,8 @@ int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **er
assert(bs_queue != NULL);
- aio_context_release(ctx);
- bdrv_drain_all_begin();
- aio_context_acquire(ctx);
-
QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
+ assert(bs_entry->state.bs->quiesce_counter > 0);
if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
error_propagate(errp, local_err);
goto cleanup;
@@ -2912,8 +2952,6 @@ cleanup:
}
g_free(bs_queue);
- bdrv_drain_all_end();
-
return ret;
}
@@ -2923,12 +2961,18 @@ int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
{
int ret = -1;
Error *local_err = NULL;
- BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
+ BlockReopenQueue *queue;
+ bdrv_subtree_drained_begin(bs);
+
+ queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
ret = bdrv_reopen_multiple(bdrv_get_aio_context(bs), queue, &local_err);
if (local_err != NULL) {
error_propagate(errp, local_err);
}
+
+ bdrv_subtree_drained_end(bs);
+
return ret;
}
@@ -4601,10 +4645,11 @@ void bdrv_img_create(const char *filename, const char *fmt,
back_flags = flags;
back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
+ backing_options = qdict_new();
if (backing_fmt) {
- backing_options = qdict_new();
qdict_put_str(backing_options, "driver", backing_fmt);
}
+ qdict_put_bool(backing_options, BDRV_OPT_FORCE_SHARE, true);
bs = bdrv_open(full_backing, NULL, backing_options, back_flags,
&local_err);
@@ -4754,7 +4799,7 @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
AioContext *ctx = bdrv_get_aio_context(bs);
aio_disable_external(ctx);
- bdrv_parent_drained_begin(bs);
+ bdrv_parent_drained_begin(bs, NULL);
bdrv_drain(bs); /* ensure there are no in-flight requests */
while (aio_poll(ctx, false)) {
@@ -4768,7 +4813,7 @@ void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
*/
aio_context_acquire(new_context);
bdrv_attach_aio_context(bs, new_context);
- bdrv_parent_drained_end(bs);
+ bdrv_parent_drained_end(bs, NULL);
aio_enable_external(ctx);
aio_context_release(new_context);
}
diff --git a/block/commit.c b/block/commit.c
index c5327551ce..bb6c904704 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -277,7 +277,6 @@ void commit_start(const char *job_id, BlockDriverState *bs,
const char *filter_node_name, Error **errp)
{
CommitBlockJob *s;
- BlockReopenQueue *reopen_queue = NULL;
int orig_base_flags;
BlockDriverState *iter;
BlockDriverState *commit_top_bs = NULL;
@@ -299,12 +298,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
/* convert base to r/w, if necessary */
orig_base_flags = bdrv_get_flags(base);
if (!(orig_base_flags & BDRV_O_RDWR)) {
- reopen_queue = bdrv_reopen_queue(reopen_queue, base, NULL,
- orig_base_flags | BDRV_O_RDWR);
- }
-
- if (reopen_queue) {
- bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err);
+ bdrv_reopen(base, orig_base_flags | BDRV_O_RDWR, &local_err);
if (local_err != NULL) {
error_propagate(errp, local_err);
goto fail;
diff --git a/block/io.c b/block/io.c
index 6773926fc1..7ea402352e 100644
--- a/block/io.c
+++ b/block/io.c
@@ -40,22 +40,28 @@
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int bytes, BdrvRequestFlags flags);
-void bdrv_parent_drained_begin(BlockDriverState *bs)
+void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore)
{
BdrvChild *c, *next;
QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
+ if (c == ignore) {
+ continue;
+ }
if (c->role->drained_begin) {
c->role->drained_begin(c);
}
}
}
-void bdrv_parent_drained_end(BlockDriverState *bs)
+void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore)
{
BdrvChild *c, *next;
QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
+ if (c == ignore) {
+ continue;
+ }
if (c->role->drained_end) {
c->role->drained_end(c);
}
@@ -134,29 +140,13 @@ void bdrv_disable_copy_on_read(BlockDriverState *bs)
assert(old >= 1);
}
-/* Check if any requests are in-flight (including throttled requests) */
-bool bdrv_requests_pending(BlockDriverState *bs)
-{
- BdrvChild *child;
-
- if (atomic_read(&bs->in_flight)) {
- return true;
- }
-
- QLIST_FOREACH(child, &bs->children, next) {
- if (bdrv_requests_pending(child->bs)) {
- return true;
- }
- }
-
- return false;
-}
-
typedef struct {
Coroutine *co;
BlockDriverState *bs;
bool done;
bool begin;
+ bool recursive;
+ BdrvChild *parent;
} BdrvCoDrainData;
static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
@@ -175,8 +165,10 @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
bdrv_wakeup(bs);
}
-static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
+/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */
+static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, bool recursive)
{
+ BdrvChild *child, *tmp;
BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin};
if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
@@ -187,16 +179,19 @@ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
data.co = qemu_coroutine_create(bdrv_drain_invoke_entry, &data);
bdrv_coroutine_enter(bs, data.co);
BDRV_POLL_WHILE(bs, !data.done);
+
+ if (recursive) {
+ QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
+ bdrv_drain_invoke(child->bs, begin, true);
+ }
+ }
}
-static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin)
+static bool bdrv_drain_recurse(BlockDriverState *bs)
{
BdrvChild *child, *tmp;
bool waited;
- /* Ensure any pending metadata writes are submitted to bs->file. */
- bdrv_drain_invoke(bs, begin);
-
/* Wait for drained requests to finish */
waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);
@@ -215,7 +210,7 @@ static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin)
*/
bdrv_ref(bs);
}
- waited |= bdrv_drain_recurse(bs, begin);
+ waited |= bdrv_drain_recurse(bs);
if (in_main_loop) {
bdrv_unref(bs);
}
@@ -224,6 +219,11 @@ static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin)
return waited;
}
+static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
+ BdrvChild *parent);
+static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
+ BdrvChild *parent);
+
static void bdrv_co_drain_bh_cb(void *opaque)
{
BdrvCoDrainData *data = opaque;
@@ -232,9 +232,9 @@ static void bdrv_co_drain_bh_cb(void *opaque)
bdrv_dec_in_flight(bs);
if (data->begin) {
- bdrv_drained_begin(bs);
+ bdrv_do_drained_begin(bs, data->recursive, data->parent);
} else {
- bdrv_drained_end(bs);
+ bdrv_do_drained_end(bs, data->recursive, data->parent);
}
data->done = true;
@@ -242,7 +242,8 @@ static void bdrv_co_drain_bh_cb(void *opaque)
}
static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
- bool begin)
+ bool begin, bool recursive,
+ BdrvChild *parent)
{
BdrvCoDrainData data;
@@ -256,6 +257,8 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
.bs = bs,
.done = false,
.begin = begin,
+ .recursive = recursive,
+ .parent = parent,
};
bdrv_inc_in_flight(bs);
aio_bh_schedule_oneshot(bdrv_get_aio_context(bs),
@@ -267,35 +270,97 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
assert(data.done);
}
-void bdrv_drained_begin(BlockDriverState *bs)
+void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
+ BdrvChild *parent)
{
+ BdrvChild *child, *next;
+
if (qemu_in_coroutine()) {
- bdrv_co_yield_to_drain(bs, true);
+ bdrv_co_yield_to_drain(bs, true, recursive, parent);
return;
}
+ /* Stop things in parent-to-child order */
if (atomic_fetch_inc(&bs->quiesce_counter) == 0) {
aio_disable_external(bdrv_get_aio_context(bs));
- bdrv_parent_drained_begin(bs);
}
- bdrv_drain_recurse(bs, true);
+ bdrv_parent_drained_begin(bs, parent);
+ bdrv_drain_invoke(bs, true, false);
+ bdrv_drain_recurse(bs);
+
+ if (recursive) {
+ bs->recursive_quiesce_counter++;
+ QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
+ bdrv_do_drained_begin(child->bs, true, child);
+ }
+ }
}
-void bdrv_drained_end(BlockDriverState *bs)
+void bdrv_drained_begin(BlockDriverState *bs)
{
+ bdrv_do_drained_begin(bs, false, NULL);
+}
+
+void bdrv_subtree_drained_begin(BlockDriverState *bs)
+{
+ bdrv_do_drained_begin(bs, true, NULL);
+}
+
+void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
+ BdrvChild *parent)
+{
+ BdrvChild *child, *next;
+ int old_quiesce_counter;
+
if (qemu_in_coroutine()) {
- bdrv_co_yield_to_drain(bs, false);
+ bdrv_co_yield_to_drain(bs, false, recursive, parent);
return;
}
assert(bs->quiesce_counter > 0);
- if (atomic_fetch_dec(&bs->quiesce_counter) > 1) {
- return;
+ old_quiesce_counter = atomic_fetch_dec(&bs->quiesce_counter);
+
+ /* Re-enable things in child-to-parent order */
+ bdrv_drain_invoke(bs, false, false);
+ bdrv_parent_drained_end(bs, parent);
+ if (old_quiesce_counter == 1) {
+ aio_enable_external(bdrv_get_aio_context(bs));
}
- bdrv_parent_drained_end(bs);
- bdrv_drain_recurse(bs, false);
- aio_enable_external(bdrv_get_aio_context(bs));
+ if (recursive) {
+ bs->recursive_quiesce_counter--;
+ QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
+ bdrv_do_drained_end(child->bs, true, child);
+ }
+ }
+}
+
+void bdrv_drained_end(BlockDriverState *bs)
+{
+ bdrv_do_drained_end(bs, false, NULL);
+}
+
+void bdrv_subtree_drained_end(BlockDriverState *bs)
+{
+ bdrv_do_drained_end(bs, true, NULL);
+}
+
+void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
+{
+ int i;
+
+ for (i = 0; i < new_parent->recursive_quiesce_counter; i++) {
+ bdrv_do_drained_begin(child->bs, true, child);
+ }
+}
+
+void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
+{
+ int i;
+
+ for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
+ bdrv_do_drained_end(child->bs, true, child);
+ }
}
/*
@@ -342,14 +407,20 @@ void bdrv_drain_all_begin(void)
BdrvNextIterator it;
GSList *aio_ctxs = NULL, *ctx;
- block_job_pause_all();
+ /* BDRV_POLL_WHILE() for a node can only be called from its own I/O thread
+ * or the main loop AioContext. We potentially use BDRV_POLL_WHILE() on
+ * nodes in several different AioContexts, so make sure we're in the main
+ * context. */
+ assert(qemu_get_current_aio_context() == qemu_get_aio_context());
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
AioContext *aio_context = bdrv_get_aio_context(bs);
+ /* Stop things in parent-to-child order */
aio_context_acquire(aio_context);
- bdrv_parent_drained_begin(bs);
aio_disable_external(aio_context);
+ bdrv_parent_drained_begin(bs, NULL);
+ bdrv_drain_invoke(bs, true, true);
aio_context_release(aio_context);
if (!g_slist_find(aio_ctxs, aio_context)) {
@@ -372,7 +443,7 @@ void bdrv_drain_all_begin(void)
aio_context_acquire(aio_context);
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
if (aio_context == bdrv_get_aio_context(bs)) {
- waited |= bdrv_drain_recurse(bs, true);
+ waited |= bdrv_drain_recurse(bs);
}
}
aio_context_release(aio_context);
@@ -390,14 +461,13 @@ void bdrv_drain_all_end(void)
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
AioContext *aio_context = bdrv_get_aio_context(bs);
+ /* Re-enable things in child-to-parent order */
aio_context_acquire(aio_context);
+ bdrv_drain_invoke(bs, false, true);
+ bdrv_parent_drained_end(bs, NULL);
aio_enable_external(aio_context);
- bdrv_parent_drained_end(bs);
- bdrv_drain_recurse(bs, false);
aio_context_release(aio_context);
}
-
- block_job_resume_all();
}
void bdrv_drain_all(void)
diff --git a/block/qcow2.c b/block/qcow2.c
index 1914a940e5..4348b2c0c5 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1672,34 +1672,12 @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
return status;
}
-/* handle reading after the end of the backing file */
-int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
- int64_t offset, int bytes)
-{
- uint64_t bs_size = bs->total_sectors * BDRV_SECTOR_SIZE;
- int n1;
-
- if ((offset + bytes) <= bs_size) {
- return bytes;
- }
-
- if (offset >= bs_size) {
- n1 = 0;
- } else {
- n1 = bs_size - offset;
- }
-
- qemu_iovec_memset(qiov, n1, 0, bytes - n1);
-
- return n1;
-}
-
static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov,
int flags)
{
BDRVQcow2State *s = bs->opaque;
- int offset_in_cluster, n1;
+ int offset_in_cluster;
int ret;
unsigned int cur_bytes; /* number of bytes in current iteration */
uint64_t cluster_offset = 0;
@@ -1734,26 +1712,13 @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
case QCOW2_CLUSTER_UNALLOCATED:
if (bs->backing) {
- /* read from the base image */
- n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov,
- offset, cur_bytes);
- if (n1 > 0) {
- QEMUIOVector local_qiov;
-
- qemu_iovec_init(&local_qiov, hd_qiov.niov);
- qemu_iovec_concat(&local_qiov, &hd_qiov, 0, n1);
-
- BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
- qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_preadv(bs->backing, offset, n1,
- &local_qiov, 0);
- qemu_co_mutex_lock(&s->lock);
-
- qemu_iovec_destroy(&local_qiov);
-
- if (ret < 0) {
- goto fail;
- }
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_preadv(bs->backing, offset, cur_bytes,
+ &hd_qiov, 0);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ goto fail;
}
} else {
/* Note: in this case, no need to wait */
diff --git a/block/qcow2.h b/block/qcow2.h
index 6f0ff15dd0..46c8cf44ec 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -528,9 +528,6 @@ uint32_t offset_to_reftable_index(BDRVQcow2State *s, uint64_t offset)
}
/* qcow2.c functions */
-int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
- int64_t sector_num, int nb_sectors);
-
int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size,
int refcount_order, bool generous_increase,
uint64_t *refblock_count);
diff --git a/block/replication.c b/block/replication.c
index e41e293d2b..b1ea3caa4b 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -394,6 +394,9 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable,
new_secondary_flags = s->orig_secondary_flags;
}
+ bdrv_subtree_drained_begin(s->hidden_disk->bs);
+ bdrv_subtree_drained_begin(s->secondary_disk->bs);
+
if (orig_hidden_flags != new_hidden_flags) {
reopen_queue = bdrv_reopen_queue(reopen_queue, s->hidden_disk->bs, NULL,
new_hidden_flags);
@@ -409,6 +412,9 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable,
reopen_queue, &local_err);
error_propagate(errp, local_err);
}
+
+ bdrv_subtree_drained_end(s->hidden_disk->bs);
+ bdrv_subtree_drained_end(s->secondary_disk->bs);
}
static void backup_job_cleanup(BlockDriverState *bs)
diff --git a/blockdev.c b/blockdev.c
index 9c3a430cfb..29d569a24e 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -735,10 +735,6 @@ QemuOptsList qemu_legacy_drive_opts = {
.type = QEMU_OPT_STRING,
.help = "chs translation (auto, lba, none)",
},{
- .name = "boot",
- .type = QEMU_OPT_BOOL,
- .help = "(deprecated, ignored)",
- },{
.name = "addr",
.type = QEMU_OPT_STRING,
.help = "pci address (virtio only)",
@@ -873,13 +869,6 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
goto fail;
}
- /* Deprecated option boot=[on|off] */
- if (qemu_opt_get(legacy_opts, "boot") != NULL) {
- fprintf(stderr, "qemu-kvm: boot=on|off is deprecated and will be "
- "ignored. Future versions will reject this parameter. Please "
- "update your scripts.\n");
- }
-
/* Other deprecated options */
if (!qtest_enabled()) {
for (i = 0; i < ARRAY_SIZE(deprecated); i++) {
diff --git a/blockjob.c b/blockjob.c
index 6173e4728c..f5cea84e73 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -234,26 +234,23 @@ static char *child_job_get_parent_desc(BdrvChild *c)
job->id);
}
-static const BdrvChildRole child_job = {
- .get_parent_desc = child_job_get_parent_desc,
- .stay_at_node = true,
-};
-
-static void block_job_drained_begin(void *opaque)
+static void child_job_drained_begin(BdrvChild *c)
{
- BlockJob *job = opaque;
+ BlockJob *job = c->opaque;
block_job_pause(job);
}
-static void block_job_drained_end(void *opaque)
+static void child_job_drained_end(BdrvChild *c)
{
- BlockJob *job = opaque;
+ BlockJob *job = c->opaque;
block_job_resume(job);
}
-static const BlockDevOps block_job_dev_ops = {
- .drained_begin = block_job_drained_begin,
- .drained_end = block_job_drained_end,
+static const BdrvChildRole child_job = {
+ .get_parent_desc = child_job_get_parent_desc,
+ .drained_begin = child_job_drained_begin,
+ .drained_end = child_job_drained_end,
+ .stay_at_node = true,
};
void block_job_remove_all_bdrv(BlockJob *job)
@@ -715,7 +712,6 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort);
bs->job = job;
- blk_set_dev_ops(blk, &block_job_dev_ops, job);
bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
QLIST_INSERT_HEAD(&block_jobs, job, job_list);
diff --git a/hmp.c b/hmp.c
index 35a7041824..2d72f94193 100644
--- a/hmp.c
+++ b/hmp.c
@@ -2318,7 +2318,6 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
{
BlockBackend *blk;
BlockBackend *local_blk = NULL;
- AioContext *aio_context;
const char* device = qdict_get_str(qdict, "device");
const char* command = qdict_get_str(qdict, "command");
Error *err = NULL;
@@ -2338,9 +2337,6 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
}
}
- aio_context = blk_get_aio_context(blk);
- aio_context_acquire(aio_context);
-
/*
* Notably absent: Proper permission management. This is sad, but it seems
* almost impossible to achieve without changing the semantics and thereby
@@ -2368,8 +2364,6 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
*/
qemuio_command(blk, command);
- aio_context_release(aio_context);
-
fail:
blk_unref(local_blk);
hmp_handle_error(mon, &err);
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index e529e88e4e..1ac356d3a5 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -34,8 +34,17 @@
#include "qapi/visitor.h"
#include "sysemu/block-backend.h"
+#include "qemu/log.h"
+#include "trace.h"
#include "nvme.h"
+#define NVME_GUEST_ERR(trace, fmt, ...) \
+ do { \
+ (trace_##trace)(__VA_ARGS__); \
+ qemu_log_mask(LOG_GUEST_ERROR, #trace \
+ " in %s: " fmt "\n", __func__, ## __VA_ARGS__); \
+ } while (0)
+
static void nvme_process_sq(void *opaque);
static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
@@ -86,10 +95,14 @@ static void nvme_isr_notify(NvmeCtrl *n, NvmeCQueue *cq)
{
if (cq->irq_enabled) {
if (msix_enabled(&(n->parent_obj))) {
+ trace_nvme_irq_msix(cq->vector);
msix_notify(&(n->parent_obj), cq->vector);
} else {
+ trace_nvme_irq_pin();
pci_irq_pulse(&n->parent_obj);
}
+ } else {
+ trace_nvme_irq_masked();
}
}
@@ -100,7 +113,8 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
trans_len = MIN(len, trans_len);
int num_prps = (len >> n->page_bits) + 1;
- if (!prp1) {
+ if (unlikely(!prp1)) {
+ trace_nvme_err_invalid_prp();
return NVME_INVALID_FIELD | NVME_DNR;
} else if (n->cmbsz && prp1 >= n->ctrl_mem.addr &&
prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) {
@@ -113,7 +127,8 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
}
len -= trans_len;
if (len) {
- if (!prp2) {
+ if (unlikely(!prp2)) {
+ trace_nvme_err_invalid_prp2_missing();
goto unmap;
}
if (len > n->page_size) {
@@ -128,7 +143,8 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
uint64_t prp_ent = le64_to_cpu(prp_list[i]);
if (i == n->max_prp_ents - 1 && len > n->page_size) {
- if (!prp_ent || prp_ent & (n->page_size - 1)) {
+ if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
+ trace_nvme_err_invalid_prplist_ent(prp_ent);
goto unmap;
}
@@ -140,7 +156,8 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
prp_ent = le64_to_cpu(prp_list[i]);
}
- if (!prp_ent || prp_ent & (n->page_size - 1)) {
+ if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
+ trace_nvme_err_invalid_prplist_ent(prp_ent);
goto unmap;
}
@@ -154,7 +171,8 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
i++;
}
} else {
- if (prp2 & (n->page_size - 1)) {
+ if (unlikely(prp2 & (n->page_size - 1))) {
+ trace_nvme_err_invalid_prp2_align(prp2);
goto unmap;
}
if (qsg->nsg) {
@@ -178,16 +196,20 @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
QEMUIOVector iov;
uint16_t status = NVME_SUCCESS;
+ trace_nvme_dma_read(prp1, prp2);
+
if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) {
return NVME_INVALID_FIELD | NVME_DNR;
}
if (qsg.nsg > 0) {
- if (dma_buf_read(ptr, len, &qsg)) {
+ if (unlikely(dma_buf_read(ptr, len, &qsg))) {
+ trace_nvme_err_invalid_dma();
status = NVME_INVALID_FIELD | NVME_DNR;
}
qemu_sglist_destroy(&qsg);
} else {
- if (qemu_iovec_to_buf(&iov, 0, ptr, len) != len) {
+ if (unlikely(qemu_iovec_to_buf(&iov, 0, ptr, len) != len)) {
+ trace_nvme_err_invalid_dma();
status = NVME_INVALID_FIELD | NVME_DNR;
}
qemu_iovec_destroy(&iov);
@@ -273,7 +295,8 @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
uint64_t aio_slba = slba << (data_shift - BDRV_SECTOR_BITS);
uint32_t aio_nlb = nlb << (data_shift - BDRV_SECTOR_BITS);
- if (slba + nlb > ns->id_ns.nsze) {
+ if (unlikely(slba + nlb > ns->id_ns.nsze)) {
+ trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
return NVME_LBA_RANGE | NVME_DNR;
}
@@ -301,8 +324,11 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0;
enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ;
- if ((slba + nlb) > ns->id_ns.nsze) {
+ trace_nvme_rw(is_write ? "write" : "read", nlb, data_size, slba);
+
+ if (unlikely((slba + nlb) > ns->id_ns.nsze)) {
block_acct_invalid(blk_get_stats(n->conf.blk), acct);
+ trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
return NVME_LBA_RANGE | NVME_DNR;
}
@@ -336,7 +362,8 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
NvmeNamespace *ns;
uint32_t nsid = le32_to_cpu(cmd->nsid);
- if (nsid == 0 || nsid > n->num_namespaces) {
+ if (unlikely(nsid == 0 || nsid > n->num_namespaces)) {
+ trace_nvme_err_invalid_ns(nsid, n->num_namespaces);
return NVME_INVALID_NSID | NVME_DNR;
}
@@ -350,6 +377,7 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
case NVME_CMD_READ:
return nvme_rw(n, ns, cmd, req);
default:
+ trace_nvme_err_invalid_opc(cmd->opcode);
return NVME_INVALID_OPCODE | NVME_DNR;
}
}
@@ -373,10 +401,13 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd)
NvmeCQueue *cq;
uint16_t qid = le16_to_cpu(c->qid);
- if (!qid || nvme_check_sqid(n, qid)) {
+ if (unlikely(!qid || nvme_check_sqid(n, qid))) {
+ trace_nvme_err_invalid_del_sq(qid);
return NVME_INVALID_QID | NVME_DNR;
}
+ trace_nvme_del_sq(qid);
+
sq = n->sq[qid];
while (!QTAILQ_EMPTY(&sq->out_req_list)) {
req = QTAILQ_FIRST(&sq->out_req_list);
@@ -439,19 +470,26 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
uint16_t qflags = le16_to_cpu(c->sq_flags);
uint64_t prp1 = le64_to_cpu(c->prp1);
- if (!cqid || nvme_check_cqid(n, cqid)) {
+ trace_nvme_create_sq(prp1, sqid, cqid, qsize, qflags);
+
+ if (unlikely(!cqid || nvme_check_cqid(n, cqid))) {
+ trace_nvme_err_invalid_create_sq_cqid(cqid);
return NVME_INVALID_CQID | NVME_DNR;
}
- if (!sqid || !nvme_check_sqid(n, sqid)) {
+ if (unlikely(!sqid || !nvme_check_sqid(n, sqid))) {
+ trace_nvme_err_invalid_create_sq_sqid(sqid);
return NVME_INVALID_QID | NVME_DNR;
}
- if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) {
+ if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) {
+ trace_nvme_err_invalid_create_sq_size(qsize);
return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
}
- if (!prp1 || prp1 & (n->page_size - 1)) {
+ if (unlikely(!prp1 || prp1 & (n->page_size - 1))) {
+ trace_nvme_err_invalid_create_sq_addr(prp1);
return NVME_INVALID_FIELD | NVME_DNR;
}
- if (!(NVME_SQ_FLAGS_PC(qflags))) {
+ if (unlikely(!(NVME_SQ_FLAGS_PC(qflags)))) {
+ trace_nvme_err_invalid_create_sq_qflags(NVME_SQ_FLAGS_PC(qflags));
return NVME_INVALID_FIELD | NVME_DNR;
}
sq = g_malloc0(sizeof(*sq));
@@ -476,14 +514,17 @@ static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd)
NvmeCQueue *cq;
uint16_t qid = le16_to_cpu(c->qid);
- if (!qid || nvme_check_cqid(n, qid)) {
+ if (unlikely(!qid || nvme_check_cqid(n, qid))) {
+ trace_nvme_err_invalid_del_cq_cqid(qid);
return NVME_INVALID_CQID | NVME_DNR;
}
cq = n->cq[qid];
- if (!QTAILQ_EMPTY(&cq->sq_list)) {
+ if (unlikely(!QTAILQ_EMPTY(&cq->sq_list))) {
+ trace_nvme_err_invalid_del_cq_notempty(qid);
return NVME_INVALID_QUEUE_DEL;
}
+ trace_nvme_del_cq(qid);
nvme_free_cq(cq, n);
return NVME_SUCCESS;
}
@@ -516,19 +557,27 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd)
uint16_t qflags = le16_to_cpu(c->cq_flags);
uint64_t prp1 = le64_to_cpu(c->prp1);
- if (!cqid || !nvme_check_cqid(n, cqid)) {
+ trace_nvme_create_cq(prp1, cqid, vector, qsize, qflags,
+ NVME_CQ_FLAGS_IEN(qflags) != 0);
+
+ if (unlikely(!cqid || !nvme_check_cqid(n, cqid))) {
+ trace_nvme_err_invalid_create_cq_cqid(cqid);
return NVME_INVALID_CQID | NVME_DNR;
}
- if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) {
+ if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) {
+ trace_nvme_err_invalid_create_cq_size(qsize);
return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
}
- if (!prp1) {
+ if (unlikely(!prp1)) {
+ trace_nvme_err_invalid_create_cq_addr(prp1);
return NVME_INVALID_FIELD | NVME_DNR;
}
- if (vector > n->num_queues) {
+ if (unlikely(vector > n->num_queues)) {
+ trace_nvme_err_invalid_create_cq_vector(vector);
return NVME_INVALID_IRQ_VECTOR | NVME_DNR;
}
- if (!(NVME_CQ_FLAGS_PC(qflags))) {
+ if (unlikely(!(NVME_CQ_FLAGS_PC(qflags)))) {
+ trace_nvme_err_invalid_create_cq_qflags(NVME_CQ_FLAGS_PC(qflags));
return NVME_INVALID_FIELD | NVME_DNR;
}
@@ -543,6 +592,8 @@ static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c)
uint64_t prp1 = le64_to_cpu(c->prp1);
uint64_t prp2 = le64_to_cpu(c->prp2);
+ trace_nvme_identify_ctrl();
+
return nvme_dma_read_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl),
prp1, prp2);
}
@@ -554,11 +605,15 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c)
uint64_t prp1 = le64_to_cpu(c->prp1);
uint64_t prp2 = le64_to_cpu(c->prp2);
- if (nsid == 0 || nsid > n->num_namespaces) {
+ trace_nvme_identify_ns(nsid);
+
+ if (unlikely(nsid == 0 || nsid > n->num_namespaces)) {
+ trace_nvme_err_invalid_ns(nsid, n->num_namespaces);
return NVME_INVALID_NSID | NVME_DNR;
}
ns = &n->namespaces[nsid - 1];
+
return nvme_dma_read_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns),
prp1, prp2);
}
@@ -573,6 +628,8 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c)
uint16_t ret;
int i, j = 0;
+ trace_nvme_identify_nslist(min_nsid);
+
list = g_malloc0(data_len);
for (i = 0; i < n->num_namespaces; i++) {
if (i < min_nsid) {
@@ -601,6 +658,7 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
case 0x02:
return nvme_identify_nslist(n, c);
default:
+ trace_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns));
return NVME_INVALID_FIELD | NVME_DNR;
}
}
@@ -613,11 +671,14 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
switch (dw10) {
case NVME_VOLATILE_WRITE_CACHE:
result = blk_enable_write_cache(n->conf.blk);
+ trace_nvme_getfeat_vwcache(result ? "enabled" : "disabled");
break;
case NVME_NUMBER_OF_QUEUES:
result = cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16));
+ trace_nvme_getfeat_numq(result);
break;
default:
+ trace_nvme_err_invalid_getfeat(dw10);
return NVME_INVALID_FIELD | NVME_DNR;
}
@@ -635,10 +696,14 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
blk_set_enable_write_cache(n->conf.blk, dw11 & 1);
break;
case NVME_NUMBER_OF_QUEUES:
+ trace_nvme_setfeat_numq((dw11 & 0xFFFF) + 1,
+ ((dw11 >> 16) & 0xFFFF) + 1,
+ n->num_queues - 1, n->num_queues - 1);
req->cqe.result =
cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16));
break;
default:
+ trace_nvme_err_invalid_setfeat(dw10);
return NVME_INVALID_FIELD | NVME_DNR;
}
return NVME_SUCCESS;
@@ -662,6 +727,7 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
case NVME_ADM_CMD_GET_FEATURES:
return nvme_get_feature(n, cmd, req);
default:
+ trace_nvme_err_invalid_admin_opc(cmd->opcode);
return NVME_INVALID_OPCODE | NVME_DNR;
}
}
@@ -721,15 +787,78 @@ static int nvme_start_ctrl(NvmeCtrl *n)
uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12;
uint32_t page_size = 1 << page_bits;
- if (n->cq[0] || n->sq[0] || !n->bar.asq || !n->bar.acq ||
- n->bar.asq & (page_size - 1) || n->bar.acq & (page_size - 1) ||
- NVME_CC_MPS(n->bar.cc) < NVME_CAP_MPSMIN(n->bar.cap) ||
- NVME_CC_MPS(n->bar.cc) > NVME_CAP_MPSMAX(n->bar.cap) ||
- NVME_CC_IOCQES(n->bar.cc) < NVME_CTRL_CQES_MIN(n->id_ctrl.cqes) ||
- NVME_CC_IOCQES(n->bar.cc) > NVME_CTRL_CQES_MAX(n->id_ctrl.cqes) ||
- NVME_CC_IOSQES(n->bar.cc) < NVME_CTRL_SQES_MIN(n->id_ctrl.sqes) ||
- NVME_CC_IOSQES(n->bar.cc) > NVME_CTRL_SQES_MAX(n->id_ctrl.sqes) ||
- !NVME_AQA_ASQS(n->bar.aqa) || !NVME_AQA_ACQS(n->bar.aqa)) {
+ if (unlikely(n->cq[0])) {
+ trace_nvme_err_startfail_cq();
+ return -1;
+ }
+ if (unlikely(n->sq[0])) {
+ trace_nvme_err_startfail_sq();
+ return -1;
+ }
+ if (unlikely(!n->bar.asq)) {
+ trace_nvme_err_startfail_nbarasq();
+ return -1;
+ }
+ if (unlikely(!n->bar.acq)) {
+ trace_nvme_err_startfail_nbaracq();
+ return -1;
+ }
+ if (unlikely(n->bar.asq & (page_size - 1))) {
+ trace_nvme_err_startfail_asq_misaligned(n->bar.asq);
+ return -1;
+ }
+ if (unlikely(n->bar.acq & (page_size - 1))) {
+ trace_nvme_err_startfail_acq_misaligned(n->bar.acq);
+ return -1;
+ }
+ if (unlikely(NVME_CC_MPS(n->bar.cc) <
+ NVME_CAP_MPSMIN(n->bar.cap))) {
+ trace_nvme_err_startfail_page_too_small(
+ NVME_CC_MPS(n->bar.cc),
+ NVME_CAP_MPSMIN(n->bar.cap));
+ return -1;
+ }
+ if (unlikely(NVME_CC_MPS(n->bar.cc) >
+ NVME_CAP_MPSMAX(n->bar.cap))) {
+ trace_nvme_err_startfail_page_too_large(
+ NVME_CC_MPS(n->bar.cc),
+ NVME_CAP_MPSMAX(n->bar.cap));
+ return -1;
+ }
+ if (unlikely(NVME_CC_IOCQES(n->bar.cc) <
+ NVME_CTRL_CQES_MIN(n->id_ctrl.cqes))) {
+ trace_nvme_err_startfail_cqent_too_small(
+ NVME_CC_IOCQES(n->bar.cc),
+ NVME_CTRL_CQES_MIN(n->bar.cap));
+ return -1;
+ }
+ if (unlikely(NVME_CC_IOCQES(n->bar.cc) >
+ NVME_CTRL_CQES_MAX(n->id_ctrl.cqes))) {
+ trace_nvme_err_startfail_cqent_too_large(
+ NVME_CC_IOCQES(n->bar.cc),
+ NVME_CTRL_CQES_MAX(n->bar.cap));
+ return -1;
+ }
+ if (unlikely(NVME_CC_IOSQES(n->bar.cc) <
+ NVME_CTRL_SQES_MIN(n->id_ctrl.sqes))) {
+ trace_nvme_err_startfail_sqent_too_small(
+ NVME_CC_IOSQES(n->bar.cc),
+ NVME_CTRL_SQES_MIN(n->bar.cap));
+ return -1;
+ }
+ if (unlikely(NVME_CC_IOSQES(n->bar.cc) >
+ NVME_CTRL_SQES_MAX(n->id_ctrl.sqes))) {
+ trace_nvme_err_startfail_sqent_too_large(
+ NVME_CC_IOSQES(n->bar.cc),
+ NVME_CTRL_SQES_MAX(n->bar.cap));
+ return -1;
+ }
+ if (unlikely(!NVME_AQA_ASQS(n->bar.aqa))) {
+ trace_nvme_err_startfail_asqent_sz_zero();
+ return -1;
+ }
+ if (unlikely(!NVME_AQA_ACQS(n->bar.aqa))) {
+ trace_nvme_err_startfail_acqent_sz_zero();
return -1;
}
@@ -749,16 +878,48 @@ static int nvme_start_ctrl(NvmeCtrl *n)
static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
unsigned size)
{
+ if (unlikely(offset & (sizeof(uint32_t) - 1))) {
+ NVME_GUEST_ERR(nvme_ub_mmiowr_misaligned32,
+ "MMIO write not 32-bit aligned,"
+ " offset=0x%"PRIx64"", offset);
+ /* should be ignored, fall through for now */
+ }
+
+ if (unlikely(size < sizeof(uint32_t))) {
+ NVME_GUEST_ERR(nvme_ub_mmiowr_toosmall,
+ "MMIO write smaller than 32-bits,"
+ " offset=0x%"PRIx64", size=%u",
+ offset, size);
+ /* should be ignored, fall through for now */
+ }
+
switch (offset) {
- case 0xc:
+ case 0xc: /* INTMS */
+ if (unlikely(msix_enabled(&(n->parent_obj)))) {
+ NVME_GUEST_ERR(nvme_ub_mmiowr_intmask_with_msix,
+ "undefined access to interrupt mask set"
+ " when MSI-X is enabled");
+ /* should be ignored, fall through for now */
+ }
n->bar.intms |= data & 0xffffffff;
n->bar.intmc = n->bar.intms;
+ trace_nvme_mmio_intm_set(data & 0xffffffff,
+ n->bar.intmc);
break;
- case 0x10:
+ case 0x10: /* INTMC */
+ if (unlikely(msix_enabled(&(n->parent_obj)))) {
+ NVME_GUEST_ERR(nvme_ub_mmiowr_intmask_with_msix,
+ "undefined access to interrupt mask clr"
+ " when MSI-X is enabled");
+ /* should be ignored, fall through for now */
+ }
n->bar.intms &= ~(data & 0xffffffff);
n->bar.intmc = n->bar.intms;
+ trace_nvme_mmio_intm_clr(data & 0xffffffff,
+ n->bar.intmc);
break;
- case 0x14:
+ case 0x14: /* CC */
+ trace_nvme_mmio_cfg(data & 0xffffffff);
/* Windows first sends data, then sends enable bit */
if (!NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc) &&
!NVME_CC_SHN(data) && !NVME_CC_SHN(n->bar.cc))
@@ -768,40 +929,82 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
if (NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc)) {
n->bar.cc = data;
- if (nvme_start_ctrl(n)) {
+ if (unlikely(nvme_start_ctrl(n))) {
+ trace_nvme_err_startfail();
n->bar.csts = NVME_CSTS_FAILED;
} else {
+ trace_nvme_mmio_start_success();
n->bar.csts = NVME_CSTS_READY;
}
} else if (!NVME_CC_EN(data) && NVME_CC_EN(n->bar.cc)) {
+ trace_nvme_mmio_stopped();
nvme_clear_ctrl(n);
n->bar.csts &= ~NVME_CSTS_READY;
}
if (NVME_CC_SHN(data) && !(NVME_CC_SHN(n->bar.cc))) {
- nvme_clear_ctrl(n);
- n->bar.cc = data;
- n->bar.csts |= NVME_CSTS_SHST_COMPLETE;
+ trace_nvme_mmio_shutdown_set();
+ nvme_clear_ctrl(n);
+ n->bar.cc = data;
+ n->bar.csts |= NVME_CSTS_SHST_COMPLETE;
} else if (!NVME_CC_SHN(data) && NVME_CC_SHN(n->bar.cc)) {
- n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE;
- n->bar.cc = data;
+ trace_nvme_mmio_shutdown_cleared();
+ n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE;
+ n->bar.cc = data;
+ }
+ break;
+ case 0x1C: /* CSTS */
+ if (data & (1 << 4)) {
+ NVME_GUEST_ERR(nvme_ub_mmiowr_ssreset_w1c_unsupported,
+ "attempted to W1C CSTS.NSSRO"
+ " but CAP.NSSRS is zero (not supported)");
+ } else if (data != 0) {
+ NVME_GUEST_ERR(nvme_ub_mmiowr_ro_csts,
+ "attempted to set a read only bit"
+ " of controller status");
+ }
+ break;
+ case 0x20: /* NSSR */
+ if (data == 0x4E564D65) {
+ trace_nvme_ub_mmiowr_ssreset_unsupported();
+ } else {
+ /* The spec says that writes of other values have no effect */
+ return;
}
break;
- case 0x24:
+ case 0x24: /* AQA */
n->bar.aqa = data & 0xffffffff;
+ trace_nvme_mmio_aqattr(data & 0xffffffff);
break;
- case 0x28:
+ case 0x28: /* ASQ */
n->bar.asq = data;
+ trace_nvme_mmio_asqaddr(data);
break;
- case 0x2c:
+ case 0x2c: /* ASQ hi */
n->bar.asq |= data << 32;
+ trace_nvme_mmio_asqaddr_hi(data, n->bar.asq);
break;
- case 0x30:
+ case 0x30: /* ACQ */
+ trace_nvme_mmio_acqaddr(data);
n->bar.acq = data;
break;
- case 0x34:
+ case 0x34: /* ACQ hi */
n->bar.acq |= data << 32;
+ trace_nvme_mmio_acqaddr_hi(data, n->bar.acq);
break;
+ case 0x38: /* CMBLOC */
+ NVME_GUEST_ERR(nvme_ub_mmiowr_cmbloc_reserved,
+ "invalid write to reserved CMBLOC"
+ " when CMBSZ is zero, ignored");
+ return;
+ case 0x3C: /* CMBSZ */
+ NVME_GUEST_ERR(nvme_ub_mmiowr_cmbsz_readonly,
+ "invalid write to read only CMBSZ, ignored");
+ return;
default:
+ NVME_GUEST_ERR(nvme_ub_mmiowr_invalid,
+ "invalid MMIO write,"
+ " offset=0x%"PRIx64", data=%"PRIx64"",
+ offset, data);
break;
}
}
@@ -812,9 +1015,26 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size)
uint8_t *ptr = (uint8_t *)&n->bar;
uint64_t val = 0;
+ if (unlikely(addr & (sizeof(uint32_t) - 1))) {
+ NVME_GUEST_ERR(nvme_ub_mmiord_misaligned32,
+ "MMIO read not 32-bit aligned,"
+ " offset=0x%"PRIx64"", addr);
+ /* should RAZ, fall through for now */
+ } else if (unlikely(size < sizeof(uint32_t))) {
+ NVME_GUEST_ERR(nvme_ub_mmiord_toosmall,
+ "MMIO read smaller than 32-bits,"
+ " offset=0x%"PRIx64"", addr);
+ /* should RAZ, fall through for now */
+ }
+
if (addr < sizeof(n->bar)) {
memcpy(&val, ptr + addr, size);
+ } else {
+ NVME_GUEST_ERR(nvme_ub_mmiord_invalid_ofs,
+ "MMIO read beyond last register,"
+ " offset=0x%"PRIx64", returning 0", addr);
}
+
return val;
}
@@ -822,22 +1042,36 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
{
uint32_t qid;
- if (addr & ((1 << 2) - 1)) {
+ if (unlikely(addr & ((1 << 2) - 1))) {
+ NVME_GUEST_ERR(nvme_ub_db_wr_misaligned,
+ "doorbell write not 32-bit aligned,"
+ " offset=0x%"PRIx64", ignoring", addr);
return;
}
if (((addr - 0x1000) >> 2) & 1) {
+ /* Completion queue doorbell write */
+
uint16_t new_head = val & 0xffff;
int start_sqs;
NvmeCQueue *cq;
qid = (addr - (0x1000 + (1 << 2))) >> 3;
- if (nvme_check_cqid(n, qid)) {
+ if (unlikely(nvme_check_cqid(n, qid))) {
+ NVME_GUEST_ERR(nvme_ub_db_wr_invalid_cq,
+ "completion queue doorbell write"
+ " for nonexistent queue,"
+ " sqid=%"PRIu32", ignoring", qid);
return;
}
cq = n->cq[qid];
- if (new_head >= cq->size) {
+ if (unlikely(new_head >= cq->size)) {
+ NVME_GUEST_ERR(nvme_ub_db_wr_invalid_cqhead,
+ "completion queue doorbell write value"
+ " beyond queue size, sqid=%"PRIu32","
+ " new_head=%"PRIu16", ignoring",
+ qid, new_head);
return;
}
@@ -855,16 +1089,27 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
nvme_isr_notify(n, cq);
}
} else {
+ /* Submission queue doorbell write */
+
uint16_t new_tail = val & 0xffff;
NvmeSQueue *sq;
qid = (addr - 0x1000) >> 3;
- if (nvme_check_sqid(n, qid)) {
+ if (unlikely(nvme_check_sqid(n, qid))) {
+ NVME_GUEST_ERR(nvme_ub_db_wr_invalid_sq,
+ "submission queue doorbell write"
+ " for nonexistent queue,"
+ " sqid=%"PRIu32", ignoring", qid);
return;
}
sq = n->sq[qid];
- if (new_tail >= sq->size) {
+ if (unlikely(new_tail >= sq->size)) {
+ NVME_GUEST_ERR(nvme_ub_db_wr_invalid_sqtail,
+ "submission queue doorbell write value"
+ " beyond queue size, sqid=%"PRIu32","
+ " new_tail=%"PRIu16", ignoring",
+ qid, new_tail);
return;
}
diff --git a/hw/block/trace-events b/hw/block/trace-events
index 962a3bfa24..5acd495207 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -11,6 +11,99 @@ virtio_blk_submit_multireq(void *vdev, void *mrb, int start, int num_reqs, uint6
hd_geometry_lchs_guess(void *blk, int cyls, int heads, int secs) "blk %p LCHS %d %d %d"
hd_geometry_guess(void *blk, uint32_t cyls, uint32_t heads, uint32_t secs, int trans) "blk %p CHS %u %u %u trans %d"
+# hw/block/nvme.c
+# nvme traces for successful events
+nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u"
+nvme_irq_pin(void) "pulsing IRQ pin"
+nvme_irq_masked(void) "IRQ is masked"
+nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64""
+nvme_rw(char const *verb, uint32_t blk_count, uint64_t byte_count, uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64""
+nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16""
+nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d"
+nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16""
+nvme_del_cq(uint16_t cqid) "deleted completion queue, sqid=%"PRIu16""
+nvme_identify_ctrl(void) "identify controller"
+nvme_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16""
+nvme_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16""
+nvme_getfeat_vwcache(char const* result) "get feature volatile write cache, result=%s"
+nvme_getfeat_numq(int result) "get feature number of queues, result=%d"
+nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d"
+nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64""
+nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64""
+nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64""
+nvme_mmio_aqattr(uint64_t data) "wrote MMIO, admin queue attributes=0x%"PRIx64""
+nvme_mmio_asqaddr(uint64_t data) "wrote MMIO, admin submission queue address=0x%"PRIx64""
+nvme_mmio_acqaddr(uint64_t data) "wrote MMIO, admin completion queue address=0x%"PRIx64""
+nvme_mmio_asqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin submission queue high half=0x%"PRIx64", new_address=0x%"PRIx64""
+nvme_mmio_acqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin completion queue high half=0x%"PRIx64", new_address=0x%"PRIx64""
+nvme_mmio_start_success(void) "setting controller enable bit succeeded"
+nvme_mmio_stopped(void) "cleared controller enable bit"
+nvme_mmio_shutdown_set(void) "shutdown bit set"
+nvme_mmio_shutdown_cleared(void) "shutdown bit cleared"
+
+# nvme traces for error conditions
+nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
+nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64""
+nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64""
+nvme_err_invalid_prp2_missing(void) "PRP2 is null and more data to be transferred"
+nvme_err_invalid_field(void) "invalid field"
+nvme_err_invalid_prp(void) "invalid PRP"
+nvme_err_invalid_sgl(void) "invalid SGL"
+nvme_err_invalid_ns(uint32_t ns, uint32_t limit) "invalid namespace %u not within 1-%u"
+nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8""
+nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8""
+nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64""
+nvme_err_invalid_del_sq(uint16_t qid) "invalid submission queue deletion, sid=%"PRIu16""
+nvme_err_invalid_create_sq_cqid(uint16_t cqid) "failed creating submission queue, invalid cqid=%"PRIu16""
+nvme_err_invalid_create_sq_sqid(uint16_t sqid) "failed creating submission queue, invalid sqid=%"PRIu16""
+nvme_err_invalid_create_sq_size(uint16_t qsize) "failed creating submission queue, invalid qsize=%"PRIu16""
+nvme_err_invalid_create_sq_addr(uint64_t addr) "failed creating submission queue, addr=0x%"PRIx64""
+nvme_err_invalid_create_sq_qflags(uint16_t qflags) "failed creating submission queue, qflags=%"PRIu16""
+nvme_err_invalid_del_cq_cqid(uint16_t cqid) "failed deleting completion queue, cqid=%"PRIu16""
+nvme_err_invalid_del_cq_notempty(uint16_t cqid) "failed deleting completion queue, it is not empty, cqid=%"PRIu16""
+nvme_err_invalid_create_cq_cqid(uint16_t cqid) "failed creating completion queue, cqid=%"PRIu16""
+nvme_err_invalid_create_cq_size(uint16_t size) "failed creating completion queue, size=%"PRIu16""
+nvme_err_invalid_create_cq_addr(uint64_t addr) "failed creating completion queue, addr=0x%"PRIx64""
+nvme_err_invalid_create_cq_vector(uint16_t vector) "failed creating completion queue, vector=%"PRIu16""
+nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion queue, qflags=%"PRIu16""
+nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16""
+nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32""
+nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32""
+nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues"
+nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues"
+nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null"
+nvme_err_startfail_nbaracq(void) "nvme_start_ctrl failed because the admin completion queue address is null"
+nvme_err_startfail_asq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin submission queue address is misaligned: 0x%"PRIx64""
+nvme_err_startfail_acq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin completion queue address is misaligned: 0x%"PRIx64""
+nvme_err_startfail_page_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too small: log2size=%u, min=%u"
+nvme_err_startfail_page_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too large: log2size=%u, max=%u"
+nvme_err_startfail_cqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too small: log2size=%u, min=%u"
+nvme_err_startfail_cqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too large: log2size=%u, max=%u"
+nvme_err_startfail_sqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too small: log2size=%u, min=%u"
+nvme_err_startfail_sqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too large: log2size=%u, max=%u"
+nvme_err_startfail_asqent_sz_zero(void) "nvme_start_ctrl failed because the admin submission queue size is zero"
+nvme_err_startfail_acqent_sz_zero(void) "nvme_start_ctrl failed because the admin completion queue size is zero"
+nvme_err_startfail(void) "setting controller enable bit failed"
+
+# Traces for undefined behavior
+nvme_ub_mmiowr_misaligned32(uint64_t offset) "MMIO write not 32-bit aligned, offset=0x%"PRIx64""
+nvme_ub_mmiowr_toosmall(uint64_t offset, unsigned size) "MMIO write smaller than 32 bits, offset=0x%"PRIx64", size=%u"
+nvme_ub_mmiowr_intmask_with_msix(void) "undefined access to interrupt mask set when MSI-X is enabled"
+nvme_ub_mmiowr_ro_csts(void) "attempted to set a read only bit of controller status"
+nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CAP.NSSRS is zero (not supported)"
+nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)"
+nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored"
+nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored"
+nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64""
+nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64""
+nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64""
+nvme_ub_mmiord_invalid_ofs(uint64_t offset) "MMIO read beyond last register, offset=0x%"PRIx64", returning 0"
+nvme_ub_db_wr_misaligned(uint64_t offset) "doorbell write not 32-bit aligned, offset=0x%"PRIx64", ignoring"
+nvme_ub_db_wr_invalid_cq(uint32_t qid) "completion queue doorbell write for nonexistent queue, cqid=%"PRIu32", ignoring"
+nvme_ub_db_wr_invalid_cqhead(uint32_t qid, uint16_t new_head) "completion queue doorbell write value beyond queue size, cqid=%"PRIu32", new_head=%"PRIu16", ignoring"
+nvme_ub_db_wr_invalid_sq(uint32_t qid) "submission queue doorbell write for nonexistent queue, sqid=%"PRIu32", ignoring"
+nvme_ub_db_wr_invalid_sqtail(uint32_t qid, uint16_t new_tail) "submission queue doorbell write value beyond queue size, sqid=%"PRIu32", new_head=%"PRIu16", ignoring"
+
# hw/block/xen_disk.c
xen_disk_alloc(char *name) "%s"
xen_disk_init(char *name) "%s"
diff --git a/include/block/block.h b/include/block/block.h
index c05cac57e5..9b12774ddf 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -585,7 +585,7 @@ void bdrv_io_unplug(BlockDriverState *bs);
* Begin a quiesced section of all users of @bs. This is part of
* bdrv_drained_begin.
*/
-void bdrv_parent_drained_begin(BlockDriverState *bs);
+void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore);
/**
* bdrv_parent_drained_end:
@@ -593,7 +593,7 @@ void bdrv_parent_drained_begin(BlockDriverState *bs);
* End a quiesced section of all users of @bs. This is part of
* bdrv_drained_end.
*/
-void bdrv_parent_drained_end(BlockDriverState *bs);
+void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore);
/**
* bdrv_drained_begin:
@@ -608,12 +608,23 @@ void bdrv_parent_drained_end(BlockDriverState *bs);
void bdrv_drained_begin(BlockDriverState *bs);
/**
+ * Like bdrv_drained_begin, but recursively begins a quiesced section for
+ * exclusive access to all child nodes as well.
+ */
+void bdrv_subtree_drained_begin(BlockDriverState *bs);
+
+/**
* bdrv_drained_end:
*
* End a quiescent section started by bdrv_drained_begin().
*/
void bdrv_drained_end(BlockDriverState *bs);
+/**
+ * End a quiescent section started by bdrv_subtree_drained_begin().
+ */
+void bdrv_subtree_drained_end(BlockDriverState *bs);
+
void bdrv_add_child(BlockDriverState *parent, BlockDriverState *child,
Error **errp);
void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index a5482775ec..29cafa4236 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -717,6 +717,8 @@ struct BlockDriverState {
/* Accessed with atomic ops. */
int quiesce_counter;
+ int recursive_quiesce_counter;
+
unsigned int write_gen; /* Current data generation */
/* Protected by reqs_lock. */
@@ -768,6 +770,9 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags);
+void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
+void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);
+
int get_tmp_filename(char *filename, int size);
BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
const char *filename);
@@ -1045,7 +1050,6 @@ bool blk_dev_is_tray_open(BlockBackend *blk);
bool blk_dev_is_medium_locked(BlockBackend *blk);
void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes);
-bool bdrv_requests_pending(BlockDriverState *bs);
void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out);
void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index a8cdbc300b..e94a6881b2 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3918,6 +3918,10 @@
# does not support all kinds of operations, all kinds of children, nor
# all block drivers.
#
+# FIXME Removing children from a quorum node means introducing gaps in the
+# child indices. This cannot be represented in the 'children' list of
+# BlockdevOptionsQuorum, as returned by .bdrv_refresh_filename().
+#
# Warning: The data in a new quorum child MUST be consistent with that of
# the rest of the array.
#
diff --git a/qemu-doc.texi b/qemu-doc.texi
index ae90f7199e..8d0c809ad5 100644
--- a/qemu-doc.texi
+++ b/qemu-doc.texi
@@ -2589,12 +2589,6 @@ deprecated.
@section System emulator command line arguments
-@subsection -drive boot=on|off (since 1.3.0)
-
-The ``boot=on|off'' option to the ``-drive'' argument is
-ignored. Applications should use the ``bootindex=N'' parameter
-to set an absolute ordering between devices instead.
-
@subsection -tdf (since 1.3.0)
The ``-tdf'' argument is ignored. The behaviour implemented
@@ -2690,20 +2684,27 @@ longer be directly supported in QEMU.
The ``-drive if=scsi'' argument is replaced by the the
``-device BUS-TYPE'' argument combined with ``-drive if=none''.
+@subsection -drive cyls=...,heads=...,secs=...,trans=... (since 2.10.0)
+
+The drive geometry arguments are replaced by the the geometry arguments
+that can be specified with the ``-device'' parameter.
+
+@subsection -drive serial=... (since 2.10.0)
+
+The drive serial argument is replaced by the the serial argument
+that can be specified with the ``-device'' parameter.
+
+@subsection -drive addr=... (since 2.10.0)
+
+The drive addr argument is replaced by the the addr argument
+that can be specified with the ``-device'' parameter.
+
@subsection -net dump (since 2.10.0)
The ``--net dump'' argument is now replaced with the
``-object filter-dump'' argument which works in combination
with the modern ``-netdev`` backends instead.
-@subsection -hdachs (since 2.10.0)
-
-The ``-hdachs'' argument is now a synonym for setting
-the ``cyls'', ``heads'', ``secs'', and ``trans'' properties
-on the ``ide-hd'' device using the ``-device'' argument.
-The new syntax allows different settings to be provided
-per disk.
-
@subsection -usbdevice (since 2.10.0)
The ``-usbdevice DEV'' argument is now a synonym for setting
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index de8e3de726..a6a70fc3dc 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -2013,8 +2013,11 @@ static int reopen_f(BlockBackend *blk, int argc, char **argv)
opts = qopts ? qemu_opts_to_qdict(qopts, NULL) : NULL;
qemu_opts_reset(&reopen_opts);
+ bdrv_subtree_drained_begin(bs);
brq = bdrv_reopen_queue(NULL, bs, opts, flags);
bdrv_reopen_multiple(bdrv_get_aio_context(bs), brq, &local_err);
+ bdrv_subtree_drained_end(bs);
+
if (local_err) {
error_report_err(local_err);
} else {
diff --git a/qemu-options.hx b/qemu-options.hx
index 6d56f4bdb3..678181c599 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -846,8 +846,8 @@ of available connectors of a given interface type.
@item media=@var{media}
This option defines the type of the media: disk or cdrom.
@item cyls=@var{c},heads=@var{h},secs=@var{s}[,trans=@var{t}]
-These options have the same definition as they have in @option{-hdachs}.
-These parameters are deprecated, use the corresponding parameters
+Force disk physical geometry and the optional BIOS translation (trans=none or
+lba). These parameters are deprecated, use the corresponding parameters
of @code{-device} instead.
@item snapshot=@var{snapshot}
@var{snapshot} is "on" or "off" and controls snapshot mode for the given drive
@@ -1027,21 +1027,6 @@ the raw disk image you use is not written back. You can however force
the write back by pressing @key{C-a s} (@pxref{disk_images}).
ETEXI
-DEF("hdachs", HAS_ARG, QEMU_OPTION_hdachs, \
- "-hdachs c,h,s[,t]\n" \
- " force hard disk 0 physical geometry and the optional BIOS\n" \
- " translation (t=none or lba) (usually QEMU can guess them)\n",
- QEMU_ARCH_ALL)
-STEXI
-@item -hdachs @var{c},@var{h},@var{s},[,@var{t}]
-@findex -hdachs
-Force hard disk 0 physical geometry (1 <= @var{c} <= 16383, 1 <=
-@var{h} <= 16, 1 <= @var{s} <= 63) and optionally force the BIOS
-translation mode (@var{t}=none, lba or auto). Usually QEMU can guess
-all those parameters. This option is deprecated, please use
-@code{-device ide-hd,cyls=c,heads=h,secs=s,...} instead.
-ETEXI
-
DEF("fsdev", HAS_ARG, QEMU_OPTION_fsdev,
"-fsdev fsdriver,id=id[,path=path,][security_model={mapped-xattr|mapped-file|passthrough|none}]\n"
" [,writeout=immediate][,readonly][,socket=socket|sock_fd=sock_fd][,fmode=fmode][,dmode=dmode]\n"
diff --git a/tests/Makefile.include b/tests/Makefile.include
index 77f8183117..39a4b5359d 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -80,6 +80,7 @@ gcov-files-test-thread-pool-y = thread-pool.c
gcov-files-test-hbitmap-y = util/hbitmap.c
check-unit-y += tests/test-hbitmap$(EXESUF)
gcov-files-test-hbitmap-y = blockjob.c
+check-unit-y += tests/test-bdrv-drain$(EXESUF)
check-unit-y += tests/test-blockjob$(EXESUF)
check-unit-y += tests/test-blockjob-txn$(EXESUF)
check-unit-y += tests/test-x86-cpuid$(EXESUF)
@@ -595,6 +596,7 @@ tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(test-block-obj-y)
tests/test-aio$(EXESUF): tests/test-aio.o $(test-block-obj-y)
tests/test-aio-multithread$(EXESUF): tests/test-aio-multithread.o $(test-block-obj-y)
tests/test-throttle$(EXESUF): tests/test-throttle.o $(test-block-obj-y)
+tests/test-bdrv-drain$(EXESUF): tests/test-bdrv-drain.o $(test-block-obj-y) $(test-util-obj-y)
tests/test-blockjob$(EXESUF): tests/test-blockjob.o $(test-block-obj-y) $(test-util-obj-y)
tests/test-blockjob-txn$(EXESUF): tests/test-blockjob-txn.o $(test-block-obj-y) $(test-util-obj-y)
tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(test-block-obj-y)
diff --git a/tests/qemu-iotests/197 b/tests/qemu-iotests/197
index 887eb4f496..5e869fe2b7 100755
--- a/tests/qemu-iotests/197
+++ b/tests/qemu-iotests/197
@@ -60,6 +60,10 @@ echo '=== Copy-on-read ==='
echo
# Prep the images
+# VPC rounds image sizes to a specific geometry, force a specific size.
+if [ "$IMGFMT" = "vpc" ]; then
+ IMGOPTS=$(_optstr_add "$IMGOPTS" "force_size")
+fi
_make_test_img 4G
$QEMU_IO -c "write -P 55 3G 1k" "$TEST_IMG" | _filter_qemu_io
IMGPROTO=file IMGFMT=qcow2 IMGOPTS= TEST_IMG_FILE="$TEST_WRAP" \
diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
index d9237799e9..f08248bfd9 100644
--- a/tests/qemu-iotests/common.filter
+++ b/tests/qemu-iotests/common.filter
@@ -134,7 +134,8 @@ _filter_img_create()
-e "s# log_size=[0-9]\\+##g" \
-e "s# refcount_bits=[0-9]\\+##g" \
-e "s# key-secret=[a-zA-Z0-9]\\+##g" \
- -e "s# iter-time=[0-9]\\+##g"
+ -e "s# iter-time=[0-9]\\+##g" \
+ -e "s# force_size=\\(on\\|off\\)##g"
}
_filter_img_info()
diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c
new file mode 100644
index 0000000000..d760e2b243
--- /dev/null
+++ b/tests/test-bdrv-drain.c
@@ -0,0 +1,651 @@
+/*
+ * Block node draining tests
+ *
+ * Copyright (c) 2017 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "block/block.h"
+#include "block/blockjob_int.h"
+#include "sysemu/block-backend.h"
+#include "qapi/error.h"
+
+typedef struct BDRVTestState {
+ int drain_count;
+} BDRVTestState;
+
+static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs)
+{
+ BDRVTestState *s = bs->opaque;
+ s->drain_count++;
+}
+
+static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs)
+{
+ BDRVTestState *s = bs->opaque;
+ s->drain_count--;
+}
+
+static void bdrv_test_close(BlockDriverState *bs)
+{
+ BDRVTestState *s = bs->opaque;
+ g_assert_cmpint(s->drain_count, >, 0);
+}
+
+static int coroutine_fn bdrv_test_co_preadv(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
+{
+ /* We want this request to stay until the polling loop in drain waits for
+ * it to complete. We need to sleep a while as bdrv_drain_invoke() comes
+ * first and polls its result, too, but it shouldn't accidentally complete
+ * this request yet. */
+ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000);
+
+ return 0;
+}
+
+static BlockDriver bdrv_test = {
+ .format_name = "test",
+ .instance_size = sizeof(BDRVTestState),
+
+ .bdrv_close = bdrv_test_close,
+ .bdrv_co_preadv = bdrv_test_co_preadv,
+
+ .bdrv_co_drain_begin = bdrv_test_co_drain_begin,
+ .bdrv_co_drain_end = bdrv_test_co_drain_end,
+
+ .bdrv_child_perm = bdrv_format_default_perms,
+};
+
+static void aio_ret_cb(void *opaque, int ret)
+{
+ int *aio_ret = opaque;
+ *aio_ret = ret;
+}
+
+typedef struct CallInCoroutineData {
+ void (*entry)(void);
+ bool done;
+} CallInCoroutineData;
+
+static coroutine_fn void call_in_coroutine_entry(void *opaque)
+{
+ CallInCoroutineData *data = opaque;
+
+ data->entry();
+ data->done = true;
+}
+
+static void call_in_coroutine(void (*entry)(void))
+{
+ Coroutine *co;
+ CallInCoroutineData data = {
+ .entry = entry,
+ .done = false,
+ };
+
+ co = qemu_coroutine_create(call_in_coroutine_entry, &data);
+ qemu_coroutine_enter(co);
+ while (!data.done) {
+ aio_poll(qemu_get_aio_context(), true);
+ }
+}
+
+enum drain_type {
+ BDRV_DRAIN_ALL,
+ BDRV_DRAIN,
+ BDRV_SUBTREE_DRAIN,
+ DRAIN_TYPE_MAX,
+};
+
+static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs)
+{
+ switch (drain_type) {
+ case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break;
+ case BDRV_DRAIN: bdrv_drained_begin(bs); break;
+ case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_begin(bs); break;
+ default: g_assert_not_reached();
+ }
+}
+
+static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs)
+{
+ switch (drain_type) {
+ case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break;
+ case BDRV_DRAIN: bdrv_drained_end(bs); break;
+ case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_end(bs); break;
+ default: g_assert_not_reached();
+ }
+}
+
+static void test_drv_cb_common(enum drain_type drain_type, bool recursive)
+{
+ BlockBackend *blk;
+ BlockDriverState *bs, *backing;
+ BDRVTestState *s, *backing_s;
+ BlockAIOCB *acb;
+ int aio_ret;
+
+ QEMUIOVector qiov;
+ struct iovec iov = {
+ .iov_base = NULL,
+ .iov_len = 0,
+ };
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+ bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
+ &error_abort);
+ s = bs->opaque;
+ blk_insert_bs(blk, bs, &error_abort);
+
+ backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
+ backing_s = backing->opaque;
+ bdrv_set_backing_hd(bs, backing, &error_abort);
+
+ /* Simple bdrv_drain_all_begin/end pair, check that CBs are called */
+ g_assert_cmpint(s->drain_count, ==, 0);
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
+
+ do_drain_begin(drain_type, bs);
+
+ g_assert_cmpint(s->drain_count, ==, 1);
+ g_assert_cmpint(backing_s->drain_count, ==, !!recursive);
+
+ do_drain_end(drain_type, bs);
+
+ g_assert_cmpint(s->drain_count, ==, 0);
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
+
+ /* Now do the same while a request is pending */
+ aio_ret = -EINPROGRESS;
+ acb = blk_aio_preadv(blk, 0, &qiov, 0, aio_ret_cb, &aio_ret);
+ g_assert(acb != NULL);
+ g_assert_cmpint(aio_ret, ==, -EINPROGRESS);
+
+ g_assert_cmpint(s->drain_count, ==, 0);
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
+
+ do_drain_begin(drain_type, bs);
+
+ g_assert_cmpint(aio_ret, ==, 0);
+ g_assert_cmpint(s->drain_count, ==, 1);
+ g_assert_cmpint(backing_s->drain_count, ==, !!recursive);
+
+ do_drain_end(drain_type, bs);
+
+ g_assert_cmpint(s->drain_count, ==, 0);
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
+
+ bdrv_unref(backing);
+ bdrv_unref(bs);
+ blk_unref(blk);
+}
+
+static void test_drv_cb_drain_all(void)
+{
+ test_drv_cb_common(BDRV_DRAIN_ALL, true);
+}
+
+static void test_drv_cb_drain(void)
+{
+ test_drv_cb_common(BDRV_DRAIN, false);
+}
+
+static void test_drv_cb_drain_subtree(void)
+{
+ test_drv_cb_common(BDRV_SUBTREE_DRAIN, true);
+}
+
+static void test_drv_cb_co_drain(void)
+{
+ call_in_coroutine(test_drv_cb_drain);
+}
+
+static void test_drv_cb_co_drain_subtree(void)
+{
+ call_in_coroutine(test_drv_cb_drain_subtree);
+}
+
+static void test_quiesce_common(enum drain_type drain_type, bool recursive)
+{
+ BlockBackend *blk;
+ BlockDriverState *bs, *backing;
+
+ blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+ bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
+ &error_abort);
+ blk_insert_bs(blk, bs, &error_abort);
+
+ backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
+ bdrv_set_backing_hd(bs, backing, &error_abort);
+
+ g_assert_cmpint(bs->quiesce_counter, ==, 0);
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
+
+ do_drain_begin(drain_type, bs);
+
+ g_assert_cmpint(bs->quiesce_counter, ==, 1);
+ g_assert_cmpint(backing->quiesce_counter, ==, !!recursive);
+
+ do_drain_end(drain_type, bs);
+
+ g_assert_cmpint(bs->quiesce_counter, ==, 0);
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
+
+ bdrv_unref(backing);
+ bdrv_unref(bs);
+ blk_unref(blk);
+}
+
+static void test_quiesce_drain_all(void)
+{
+ // XXX drain_all doesn't quiesce
+ //test_quiesce_common(BDRV_DRAIN_ALL, true);
+}
+
+static void test_quiesce_drain(void)
+{
+ test_quiesce_common(BDRV_DRAIN, false);
+}
+
+static void test_quiesce_drain_subtree(void)
+{
+ test_quiesce_common(BDRV_SUBTREE_DRAIN, true);
+}
+
+static void test_quiesce_co_drain(void)
+{
+ call_in_coroutine(test_quiesce_drain);
+}
+
+static void test_quiesce_co_drain_subtree(void)
+{
+ call_in_coroutine(test_quiesce_drain_subtree);
+}
+
+static void test_nested(void)
+{
+ BlockBackend *blk;
+ BlockDriverState *bs, *backing;
+ BDRVTestState *s, *backing_s;
+ enum drain_type outer, inner;
+
+ blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+ bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
+ &error_abort);
+ s = bs->opaque;
+ blk_insert_bs(blk, bs, &error_abort);
+
+ backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
+ backing_s = backing->opaque;
+ bdrv_set_backing_hd(bs, backing, &error_abort);
+
+ for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) {
+ for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) {
+ /* XXX bdrv_drain_all() doesn't increase the quiesce_counter */
+ int bs_quiesce = (outer != BDRV_DRAIN_ALL) +
+ (inner != BDRV_DRAIN_ALL);
+ int backing_quiesce = (outer == BDRV_SUBTREE_DRAIN) +
+ (inner == BDRV_SUBTREE_DRAIN);
+ int backing_cb_cnt = (outer != BDRV_DRAIN) +
+ (inner != BDRV_DRAIN);
+
+ g_assert_cmpint(bs->quiesce_counter, ==, 0);
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
+ g_assert_cmpint(s->drain_count, ==, 0);
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
+
+ do_drain_begin(outer, bs);
+ do_drain_begin(inner, bs);
+
+ g_assert_cmpint(bs->quiesce_counter, ==, bs_quiesce);
+ g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce);
+ g_assert_cmpint(s->drain_count, ==, 2);
+ g_assert_cmpint(backing_s->drain_count, ==, backing_cb_cnt);
+
+ do_drain_end(inner, bs);
+ do_drain_end(outer, bs);
+
+ g_assert_cmpint(bs->quiesce_counter, ==, 0);
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
+ g_assert_cmpint(s->drain_count, ==, 0);
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
+ }
+ }
+
+ bdrv_unref(backing);
+ bdrv_unref(bs);
+ blk_unref(blk);
+}
+
+static void test_multiparent(void)
+{
+ BlockBackend *blk_a, *blk_b;
+ BlockDriverState *bs_a, *bs_b, *backing;
+ BDRVTestState *a_s, *b_s, *backing_s;
+
+ blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+ bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
+ &error_abort);
+ a_s = bs_a->opaque;
+ blk_insert_bs(blk_a, bs_a, &error_abort);
+
+ blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+ bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
+ &error_abort);
+ b_s = bs_b->opaque;
+ blk_insert_bs(blk_b, bs_b, &error_abort);
+
+ backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
+ backing_s = backing->opaque;
+ bdrv_set_backing_hd(bs_a, backing, &error_abort);
+ bdrv_set_backing_hd(bs_b, backing, &error_abort);
+
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
+ g_assert_cmpint(a_s->drain_count, ==, 0);
+ g_assert_cmpint(b_s->drain_count, ==, 0);
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
+
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
+
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
+ g_assert_cmpint(backing->quiesce_counter, ==, 1);
+ g_assert_cmpint(a_s->drain_count, ==, 1);
+ g_assert_cmpint(b_s->drain_count, ==, 1);
+ g_assert_cmpint(backing_s->drain_count, ==, 1);
+
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
+
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 2);
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 2);
+ g_assert_cmpint(backing->quiesce_counter, ==, 2);
+ g_assert_cmpint(a_s->drain_count, ==, 2);
+ g_assert_cmpint(b_s->drain_count, ==, 2);
+ g_assert_cmpint(backing_s->drain_count, ==, 2);
+
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
+
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
+ g_assert_cmpint(backing->quiesce_counter, ==, 1);
+ g_assert_cmpint(a_s->drain_count, ==, 1);
+ g_assert_cmpint(b_s->drain_count, ==, 1);
+ g_assert_cmpint(backing_s->drain_count, ==, 1);
+
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
+
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
+ g_assert_cmpint(a_s->drain_count, ==, 0);
+ g_assert_cmpint(b_s->drain_count, ==, 0);
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
+
+ bdrv_unref(backing);
+ bdrv_unref(bs_a);
+ bdrv_unref(bs_b);
+ blk_unref(blk_a);
+ blk_unref(blk_b);
+}
+
+static void test_graph_change(void)
+{
+ BlockBackend *blk_a, *blk_b;
+ BlockDriverState *bs_a, *bs_b, *backing;
+ BDRVTestState *a_s, *b_s, *backing_s;
+
+ blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+ bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
+ &error_abort);
+ a_s = bs_a->opaque;
+ blk_insert_bs(blk_a, bs_a, &error_abort);
+
+ blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+ bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
+ &error_abort);
+ b_s = bs_b->opaque;
+ blk_insert_bs(blk_b, bs_b, &error_abort);
+
+ backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
+ backing_s = backing->opaque;
+ bdrv_set_backing_hd(bs_a, backing, &error_abort);
+
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
+ g_assert_cmpint(a_s->drain_count, ==, 0);
+ g_assert_cmpint(b_s->drain_count, ==, 0);
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
+
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
+ do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
+
+ bdrv_set_backing_hd(bs_b, backing, &error_abort);
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 5);
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 5);
+ g_assert_cmpint(backing->quiesce_counter, ==, 5);
+ g_assert_cmpint(a_s->drain_count, ==, 5);
+ g_assert_cmpint(b_s->drain_count, ==, 5);
+ g_assert_cmpint(backing_s->drain_count, ==, 5);
+
+ bdrv_set_backing_hd(bs_b, NULL, &error_abort);
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 3);
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 2);
+ g_assert_cmpint(backing->quiesce_counter, ==, 3);
+ g_assert_cmpint(a_s->drain_count, ==, 3);
+ g_assert_cmpint(b_s->drain_count, ==, 2);
+ g_assert_cmpint(backing_s->drain_count, ==, 3);
+
+ bdrv_set_backing_hd(bs_b, backing, &error_abort);
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 5);
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 5);
+ g_assert_cmpint(backing->quiesce_counter, ==, 5);
+ g_assert_cmpint(a_s->drain_count, ==, 5);
+ g_assert_cmpint(b_s->drain_count, ==, 5);
+ g_assert_cmpint(backing_s->drain_count, ==, 5);
+
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
+ do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
+
+ g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
+ g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
+ g_assert_cmpint(backing->quiesce_counter, ==, 0);
+ g_assert_cmpint(a_s->drain_count, ==, 0);
+ g_assert_cmpint(b_s->drain_count, ==, 0);
+ g_assert_cmpint(backing_s->drain_count, ==, 0);
+
+ bdrv_unref(backing);
+ bdrv_unref(bs_a);
+ bdrv_unref(bs_b);
+ blk_unref(blk_a);
+ blk_unref(blk_b);
+}
+
+
+typedef struct TestBlockJob {
+ BlockJob common;
+ bool should_complete;
+} TestBlockJob;
+
+static void test_job_completed(BlockJob *job, void *opaque)
+{
+ block_job_completed(job, 0);
+}
+
+static void coroutine_fn test_job_start(void *opaque)
+{
+ TestBlockJob *s = opaque;
+
+ while (!s->should_complete) {
+ block_job_sleep_ns(&s->common, 100000);
+ }
+
+ block_job_defer_to_main_loop(&s->common, test_job_completed, NULL);
+}
+
+static void test_job_complete(BlockJob *job, Error **errp)
+{
+ TestBlockJob *s = container_of(job, TestBlockJob, common);
+ s->should_complete = true;
+}
+
+BlockJobDriver test_job_driver = {
+ .instance_size = sizeof(TestBlockJob),
+ .start = test_job_start,
+ .complete = test_job_complete,
+};
+
+static void test_blockjob_common(enum drain_type drain_type)
+{
+ BlockBackend *blk_src, *blk_target;
+ BlockDriverState *src, *target;
+ BlockJob *job;
+ int ret;
+
+ src = bdrv_new_open_driver(&bdrv_test, "source", BDRV_O_RDWR,
+ &error_abort);
+ blk_src = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+ blk_insert_bs(blk_src, src, &error_abort);
+
+ target = bdrv_new_open_driver(&bdrv_test, "target", BDRV_O_RDWR,
+ &error_abort);
+ blk_target = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
+ blk_insert_bs(blk_target, target, &error_abort);
+
+ job = block_job_create("job0", &test_job_driver, src, 0, BLK_PERM_ALL, 0,
+ 0, NULL, NULL, &error_abort);
+ block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort);
+ block_job_start(job);
+
+ g_assert_cmpint(job->pause_count, ==, 0);
+ g_assert_false(job->paused);
+ g_assert_false(job->busy); /* We're in block_job_sleep_ns() */
+
+ do_drain_begin(drain_type, src);
+
+ if (drain_type == BDRV_DRAIN_ALL) {
+ /* bdrv_drain_all() drains both src and target */
+ g_assert_cmpint(job->pause_count, ==, 2);
+ } else {
+ g_assert_cmpint(job->pause_count, ==, 1);
+ }
+ /* XXX We don't wait until the job is actually paused. Is this okay? */
+ /* g_assert_true(job->paused); */
+ g_assert_false(job->busy); /* The job is paused */
+
+ do_drain_end(drain_type, src);
+
+ g_assert_cmpint(job->pause_count, ==, 0);
+ g_assert_false(job->paused);
+ g_assert_false(job->busy); /* We're in block_job_sleep_ns() */
+
+ do_drain_begin(drain_type, target);
+
+ if (drain_type == BDRV_DRAIN_ALL) {
+ /* bdrv_drain_all() drains both src and target */
+ g_assert_cmpint(job->pause_count, ==, 2);
+ } else {
+ g_assert_cmpint(job->pause_count, ==, 1);
+ }
+ /* XXX We don't wait until the job is actually paused. Is this okay? */
+ /* g_assert_true(job->paused); */
+ g_assert_false(job->busy); /* The job is paused */
+
+ do_drain_end(drain_type, target);
+
+ g_assert_cmpint(job->pause_count, ==, 0);
+ g_assert_false(job->paused);
+ g_assert_false(job->busy); /* We're in block_job_sleep_ns() */
+
+ ret = block_job_complete_sync(job, &error_abort);
+ g_assert_cmpint(ret, ==, 0);
+
+ blk_unref(blk_src);
+ blk_unref(blk_target);
+ bdrv_unref(src);
+ bdrv_unref(target);
+}
+
+static void test_blockjob_drain_all(void)
+{
+ test_blockjob_common(BDRV_DRAIN_ALL);
+}
+
+static void test_blockjob_drain(void)
+{
+ test_blockjob_common(BDRV_DRAIN);
+}
+
+static void test_blockjob_drain_subtree(void)
+{
+ test_blockjob_common(BDRV_SUBTREE_DRAIN);
+}
+
+int main(int argc, char **argv)
+{
+ bdrv_init();
+ qemu_init_main_loop(&error_abort);
+
+ g_test_init(&argc, &argv, NULL);
+
+ g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all);
+ g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain);
+ g_test_add_func("/bdrv-drain/driver-cb/drain_subtree",
+ test_drv_cb_drain_subtree);
+
+ // XXX bdrv_drain_all() doesn't work in coroutine context
+ g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain);
+ g_test_add_func("/bdrv-drain/driver-cb/co/drain_subtree",
+ test_drv_cb_co_drain_subtree);
+
+
+ g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all);
+ g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain);
+ g_test_add_func("/bdrv-drain/quiesce/drain_subtree",
+ test_quiesce_drain_subtree);
+
+ // XXX bdrv_drain_all() doesn't work in coroutine context
+ g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain);
+ g_test_add_func("/bdrv-drain/quiesce/co/drain_subtree",
+ test_quiesce_co_drain_subtree);
+
+ g_test_add_func("/bdrv-drain/nested", test_nested);
+ g_test_add_func("/bdrv-drain/multiparent", test_multiparent);
+ g_test_add_func("/bdrv-drain/graph-change", test_graph_change);
+
+ g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
+ g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain);
+ g_test_add_func("/bdrv-drain/blockjob/drain_subtree",
+ test_blockjob_drain_subtree);
+
+ return g_test_run();
+}
diff --git a/vl.c b/vl.c
index d3a5c5d021..444b7507da 100644
--- a/vl.c
+++ b/vl.c
@@ -3052,9 +3052,8 @@ int main(int argc, char **argv, char **envp)
const char *boot_order = NULL;
const char *boot_once = NULL;
DisplayState *ds;
- int cyls, heads, secs, translation;
QemuOpts *opts, *machine_opts;
- QemuOpts *hda_opts = NULL, *icount_opts = NULL, *accel_opts = NULL;
+ QemuOpts *icount_opts = NULL, *accel_opts = NULL;
QemuOptsList *olist;
int optind;
const char *optarg;
@@ -3146,8 +3145,6 @@ int main(int argc, char **argv, char **envp)
cpu_model = NULL;
snapshot = 0;
- cyls = heads = secs = 0;
- translation = BIOS_ATA_TRANSLATION_AUTO;
nb_nics = 0;
@@ -3186,7 +3183,7 @@ int main(int argc, char **argv, char **envp)
if (optind >= argc)
break;
if (argv[optind][0] != '-') {
- hda_opts = drive_add(IF_DEFAULT, 0, argv[optind++], HD_OPTS);
+ drive_add(IF_DEFAULT, 0, argv[optind++], HD_OPTS);
} else {
const QEMUOption *popt;
@@ -3206,21 +3203,6 @@ int main(int argc, char **argv, char **envp)
cpu_model = optarg;
break;
case QEMU_OPTION_hda:
- {
- char buf[256];
- if (cyls == 0)
- snprintf(buf, sizeof(buf), "%s", HD_OPTS);
- else
- snprintf(buf, sizeof(buf),
- "%s,cyls=%d,heads=%d,secs=%d%s",
- HD_OPTS , cyls, heads, secs,
- translation == BIOS_ATA_TRANSLATION_LBA ?
- ",trans=lba" :
- translation == BIOS_ATA_TRANSLATION_NONE ?
- ",trans=none" : "");
- drive_add(IF_DEFAULT, 0, optarg, buf);
- break;
- }
case QEMU_OPTION_hdb:
case QEMU_OPTION_hdc:
case QEMU_OPTION_hdd:
@@ -3271,70 +3253,6 @@ int main(int argc, char **argv, char **envp)
case QEMU_OPTION_snapshot:
snapshot = 1;
break;
- case QEMU_OPTION_hdachs:
- {
- const char *p;
- p = optarg;
- cyls = strtol(p, (char **)&p, 0);
- if (cyls < 1 || cyls > 16383)
- goto chs_fail;
- if (*p != ',')
- goto chs_fail;
- p++;
- heads = strtol(p, (char **)&p, 0);
- if (heads < 1 || heads > 16)
- goto chs_fail;
- if (*p != ',')
- goto chs_fail;
- p++;
- secs = strtol(p, (char **)&p, 0);
- if (secs < 1 || secs > 63)
- goto chs_fail;
- if (*p == ',') {
- p++;
- if (!strcmp(p, "large")) {
- translation = BIOS_ATA_TRANSLATION_LARGE;
- } else if (!strcmp(p, "rechs")) {
- translation = BIOS_ATA_TRANSLATION_RECHS;
- } else if (!strcmp(p, "none")) {
- translation = BIOS_ATA_TRANSLATION_NONE;
- } else if (!strcmp(p, "lba")) {
- translation = BIOS_ATA_TRANSLATION_LBA;
- } else if (!strcmp(p, "auto")) {
- translation = BIOS_ATA_TRANSLATION_AUTO;
- } else {
- goto chs_fail;
- }
- } else if (*p != '\0') {
- chs_fail:
- error_report("invalid physical CHS format");
- exit(1);
- }
- if (hda_opts != NULL) {
- qemu_opt_set_number(hda_opts, "cyls", cyls,
- &error_abort);
- qemu_opt_set_number(hda_opts, "heads", heads,
- &error_abort);
- qemu_opt_set_number(hda_opts, "secs", secs,
- &error_abort);
- if (translation == BIOS_ATA_TRANSLATION_LARGE) {
- qemu_opt_set(hda_opts, "trans", "large",
- &error_abort);
- } else if (translation == BIOS_ATA_TRANSLATION_RECHS) {
- qemu_opt_set(hda_opts, "trans", "rechs",
- &error_abort);
- } else if (translation == BIOS_ATA_TRANSLATION_LBA) {
- qemu_opt_set(hda_opts, "trans", "lba",
- &error_abort);
- } else if (translation == BIOS_ATA_TRANSLATION_NONE) {
- qemu_opt_set(hda_opts, "trans", "none",
- &error_abort);
- }
- }
- }
- error_report("'-hdachs' is deprecated, please use '-device"
- " ide-hd,cyls=c,heads=h,secs=s,...' instead");
- break;
case QEMU_OPTION_numa:
opts = qemu_opts_parse_noisily(qemu_find_opts("numa"),
optarg, true);