aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block.c34
-rw-r--r--block/io.c60
-rw-r--r--include/block/block.h1
-rw-r--r--include/block/block_int.h1
4 files changed, 79 insertions, 17 deletions
diff --git a/block.c b/block.c
index c8586f41ba..6c128007fd 100644
--- a/block.c
+++ b/block.c
@@ -333,6 +333,10 @@ BlockDriverState *bdrv_new(void)
qemu_co_queue_init(&bs->flush_queue);
+ for (i = 0; i < bdrv_drain_all_count; i++) {
+ bdrv_drained_begin(bs);
+ }
+
QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
return bs;
@@ -1164,7 +1168,7 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
int open_flags, Error **errp)
{
Error *local_err = NULL;
- int ret;
+ int i, ret;
bdrv_assign_node_name(bs, node_name, &local_err);
if (local_err) {
@@ -1212,6 +1216,12 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
assert(bdrv_min_mem_align(bs) != 0);
assert(is_power_of_2(bs->bl.request_alignment));
+ for (i = 0; i < bs->quiesce_counter; i++) {
+ if (drv->bdrv_co_drain_begin) {
+ drv->bdrv_co_drain_begin(bs);
+ }
+ }
+
return 0;
open_failed:
bs->drv = NULL;
@@ -2033,7 +2043,12 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
child->role->detach(child);
}
if (old_bs->quiesce_counter && child->role->drained_end) {
- for (i = 0; i < old_bs->quiesce_counter; i++) {
+ int num = old_bs->quiesce_counter;
+ if (child->role->parent_is_bds) {
+ num -= bdrv_drain_all_count;
+ }
+ assert(num >= 0);
+ for (i = 0; i < num; i++) {
child->role->drained_end(child);
}
}
@@ -2045,7 +2060,12 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
if (new_bs) {
QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
if (new_bs->quiesce_counter && child->role->drained_begin) {
- for (i = 0; i < new_bs->quiesce_counter; i++) {
+ int num = new_bs->quiesce_counter;
+ if (child->role->parent_is_bds) {
+ num -= bdrv_drain_all_count;
+ }
+ assert(num >= 0);
+ for (i = 0; i < num; i++) {
child->role->drained_begin(child);
}
}
@@ -4049,6 +4069,14 @@ BlockDriverState *bdrv_next_node(BlockDriverState *bs)
return QTAILQ_NEXT(bs, node_list);
}
+BlockDriverState *bdrv_next_all_states(BlockDriverState *bs)
+{
+ if (!bs) {
+ return QTAILQ_FIRST(&all_bdrv_states);
+ }
+ return QTAILQ_NEXT(bs, bs_list);
+}
+
const char *bdrv_get_node_name(const BlockDriverState *bs)
{
return bs->node_name;
diff --git a/block/io.c b/block/io.c
index 1834a14aa6..ef4fedd364 100644
--- a/block/io.c
+++ b/block/io.c
@@ -38,6 +38,8 @@
/* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */
#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
+static AioWait drain_all_aio_wait;
+
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int bytes, BdrvRequestFlags flags);
@@ -472,6 +474,29 @@ static void bdrv_drain_assert_idle(BlockDriverState *bs)
}
}
+unsigned int bdrv_drain_all_count = 0;
+
+static bool bdrv_drain_all_poll(void)
+{
+ BlockDriverState *bs = NULL;
+ bool result = false;
+
+ /* Execute pending BHs first (may modify the graph) and check everything
+ * else only after the BHs have executed. */
+ while (aio_poll(qemu_get_aio_context(), false));
+
+ /* bdrv_drain_poll() can't make changes to the graph and we are holding the
+ * main AioContext lock, so iterating bdrv_next_all_states() is safe. */
+ while ((bs = bdrv_next_all_states(bs))) {
+ AioContext *aio_context = bdrv_get_aio_context(bs);
+ aio_context_acquire(aio_context);
+ result |= bdrv_drain_poll(bs, false, NULL, true);
+ aio_context_release(aio_context);
+ }
+
+ return result;
+}
+
/*
* Wait for pending requests to complete across all BlockDriverStates
*
@@ -486,45 +511,51 @@ static void bdrv_drain_assert_idle(BlockDriverState *bs)
*/
void bdrv_drain_all_begin(void)
{
- BlockDriverState *bs;
- BdrvNextIterator it;
+ BlockDriverState *bs = NULL;
if (qemu_in_coroutine()) {
- bdrv_co_yield_to_drain(NULL, true, false, NULL, false, true);
+ bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true);
return;
}
- /* BDRV_POLL_WHILE() for a node can only be called from its own I/O thread
- * or the main loop AioContext. We potentially use BDRV_POLL_WHILE() on
- * nodes in several different AioContexts, so make sure we're in the main
- * context. */
+ /* AIO_WAIT_WHILE() with a NULL context can only be called from the main
+ * loop AioContext, so make sure we're in the main context. */
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
+ assert(bdrv_drain_all_count < INT_MAX);
+ bdrv_drain_all_count++;
- for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ /* Quiesce all nodes, without polling in-flight requests yet. The graph
+ * cannot change during this loop. */
+ while ((bs = bdrv_next_all_states(bs))) {
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
- bdrv_do_drained_begin(bs, true, NULL, false, true);
+ bdrv_do_drained_begin(bs, false, NULL, true, false);
aio_context_release(aio_context);
}
- for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ /* Now poll the in-flight requests */
+ AIO_WAIT_WHILE(&drain_all_aio_wait, NULL, bdrv_drain_all_poll());
+
+ while ((bs = bdrv_next_all_states(bs))) {
bdrv_drain_assert_idle(bs);
}
}
void bdrv_drain_all_end(void)
{
- BlockDriverState *bs;
- BdrvNextIterator it;
+ BlockDriverState *bs = NULL;
- for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ while ((bs = bdrv_next_all_states(bs))) {
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
- bdrv_do_drained_end(bs, true, NULL, false);
+ bdrv_do_drained_end(bs, false, NULL, true);
aio_context_release(aio_context);
}
+
+ assert(bdrv_drain_all_count > 0);
+ bdrv_drain_all_count--;
}
void bdrv_drain_all(void)
@@ -647,6 +678,7 @@ void bdrv_inc_in_flight(BlockDriverState *bs)
void bdrv_wakeup(BlockDriverState *bs)
{
aio_wait_kick(bdrv_get_aio_wait(bs));
+ aio_wait_kick(&drain_all_aio_wait);
}
void bdrv_dec_in_flight(BlockDriverState *bs)
diff --git a/include/block/block.h b/include/block/block.h
index 836746e4e1..b1d6fdb97a 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -421,6 +421,7 @@ BlockDriverState *bdrv_lookup_bs(const char *device,
Error **errp);
bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base);
BlockDriverState *bdrv_next_node(BlockDriverState *bs);
+BlockDriverState *bdrv_next_all_states(BlockDriverState *bs);
typedef struct BdrvNextIterator {
enum {
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 1abfc26d76..7cd7eed83b 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -854,6 +854,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags);
+extern unsigned int bdrv_drain_all_count;
void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);