aboutsummaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2018-09-25 16:47:35 +0100
committerPeter Maydell <peter.maydell@linaro.org>2018-09-25 16:47:35 +0100
commitc5e4e49258e9b89cb34c085a419dd9f862935c48 (patch)
treeaa7547e348af2bf5070321c329e2dbe3daf6290a /block
parent0a736f7ab83df26dbe5421bb3d1f7892def05dee (diff)
parent9c76ff9c16be890e70fce30754b096ff9950d1ee (diff)
Merge remote-tracking branch 'remotes/xanclic/tags/pull-block-2018-09-25' into staging
Block layer patches: - Drain fixes - node-name parameters for block-commit - Refactor block jobs to use transactional callbacks for exiting # gpg: Signature made Tue 25 Sep 2018 16:12:44 BST # gpg: using RSA key F407DB0061D5CF40 # gpg: Good signature from "Max Reitz <mreitz@redhat.com>" # Primary key fingerprint: 91BE B60A 30DB 3E88 57D1 1829 F407 DB00 61D5 CF40 * remotes/xanclic/tags/pull-block-2018-09-25: (42 commits) test-bdrv-drain: Test draining job source child and parent block: Use a single global AioWait test-bdrv-drain: Fix outdated comments test-bdrv-drain: AIO_WAIT_WHILE() in job .commit/.abort job: Avoid deadlocks in job_completed_txn_abort() test-bdrv-drain: Test nested poll in bdrv_drain_poll_top_level() block: Remove aio_poll() in bdrv_drain_poll variants blockjob: Lie better in child_job_drained_poll() block-backend: Decrease in_flight only after callback block-backend: Fix potential double blk_delete() block-backend: Add .drained_poll callback block: Add missing locking in bdrv_co_drain_bh_cb() test-bdrv-drain: Test AIO_WAIT_WHILE() in completion callback job: Use AIO_WAIT_WHILE() in job_finish_sync() test-blockjob: Acquire AioContext around job_cancel_sync() test-bdrv-drain: Drain with block jobs in an I/O thread aio-wait: Increase num_waiters even in home thread blockjob: Wake up BDS when job becomes idle job: Fix missing locking due to mismerge job: Fix nested aio_poll() hanging in job_txn_apply ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'block')
-rw-r--r--block/block-backend.c31
-rw-r--r--block/commit.c97
-rw-r--r--block/io.c30
-rw-r--r--block/linux-aio.c2
-rw-r--r--block/mirror.c49
-rw-r--r--block/stream.c28
6 files changed, 148 insertions, 89 deletions
diff --git a/block/block-backend.c b/block/block-backend.c
index 14a1b7ac6a..7b1ec5071b 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -88,7 +88,6 @@ struct BlockBackend {
* Accessed with atomic ops.
*/
unsigned int in_flight;
- AioWait wait;
};
typedef struct BlockBackendAIOCB {
@@ -121,6 +120,7 @@ static void blk_root_inherit_options(int *child_flags, QDict *child_options,
abort();
}
static void blk_root_drained_begin(BdrvChild *child);
+static bool blk_root_drained_poll(BdrvChild *child);
static void blk_root_drained_end(BdrvChild *child);
static void blk_root_change_media(BdrvChild *child, bool load);
@@ -294,6 +294,7 @@ static const BdrvChildRole child_root = {
.get_parent_desc = blk_root_get_parent_desc,
.drained_begin = blk_root_drained_begin,
+ .drained_poll = blk_root_drained_poll,
.drained_end = blk_root_drained_end,
.activate = blk_root_activate,
@@ -433,6 +434,7 @@ int blk_get_refcnt(BlockBackend *blk)
*/
void blk_ref(BlockBackend *blk)
{
+ assert(blk->refcnt > 0);
blk->refcnt++;
}
@@ -445,7 +447,13 @@ void blk_unref(BlockBackend *blk)
{
if (blk) {
assert(blk->refcnt > 0);
- if (!--blk->refcnt) {
+ if (blk->refcnt > 1) {
+ blk->refcnt--;
+ } else {
+ blk_drain(blk);
+ /* blk_drain() cannot resurrect blk, nobody held a reference */
+ assert(blk->refcnt == 1);
+ blk->refcnt = 0;
blk_delete(blk);
}
}
@@ -1289,7 +1297,7 @@ static void blk_inc_in_flight(BlockBackend *blk)
static void blk_dec_in_flight(BlockBackend *blk)
{
atomic_dec(&blk->in_flight);
- aio_wait_kick(&blk->wait);
+ aio_wait_kick();
}
static void error_callback_bh(void *opaque)
@@ -1330,8 +1338,8 @@ static const AIOCBInfo blk_aio_em_aiocb_info = {
static void blk_aio_complete(BlkAioEmAIOCB *acb)
{
if (acb->has_returned) {
- blk_dec_in_flight(acb->rwco.blk);
acb->common.cb(acb->common.opaque, acb->rwco.ret);
+ blk_dec_in_flight(acb->rwco.blk);
qemu_aio_unref(acb);
}
}
@@ -1590,9 +1598,8 @@ void blk_drain(BlockBackend *blk)
}
/* We may have -ENOMEDIUM completions in flight */
- AIO_WAIT_WHILE(&blk->wait,
- blk_get_aio_context(blk),
- atomic_mb_read(&blk->in_flight) > 0);
+ AIO_WAIT_WHILE(blk_get_aio_context(blk),
+ atomic_mb_read(&blk->in_flight) > 0);
if (bs) {
bdrv_drained_end(bs);
@@ -1611,8 +1618,7 @@ void blk_drain_all(void)
aio_context_acquire(ctx);
/* We may have -ENOMEDIUM completions in flight */
- AIO_WAIT_WHILE(&blk->wait, ctx,
- atomic_mb_read(&blk->in_flight) > 0);
+ AIO_WAIT_WHILE(ctx, atomic_mb_read(&blk->in_flight) > 0);
aio_context_release(ctx);
}
@@ -2189,6 +2195,13 @@ static void blk_root_drained_begin(BdrvChild *child)
}
}
+static bool blk_root_drained_poll(BdrvChild *child)
+{
+ BlockBackend *blk = child->opaque;
+ assert(blk->quiesce_counter);
+ return !!blk->in_flight;
+}
+
static void blk_root_drained_end(BdrvChild *child)
{
BlockBackend *blk = child->opaque;
diff --git a/block/commit.c b/block/commit.c
index da69165de3..a2da5740b0 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -36,6 +36,7 @@ typedef struct CommitBlockJob {
BlockDriverState *commit_top_bs;
BlockBackend *top;
BlockBackend *base;
+ BlockDriverState *base_bs;
BlockdevOnError on_error;
int base_flags;
char *backing_file_str;
@@ -68,61 +69,67 @@ static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base,
return 0;
}
-static void commit_exit(Job *job)
+static int commit_prepare(Job *job)
{
CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
- BlockJob *bjob = &s->common;
- BlockDriverState *top = blk_bs(s->top);
- BlockDriverState *base = blk_bs(s->base);
- BlockDriverState *commit_top_bs = s->commit_top_bs;
- bool remove_commit_top_bs = false;
-
- /* Make sure commit_top_bs and top stay around until bdrv_replace_node() */
- bdrv_ref(top);
- bdrv_ref(commit_top_bs);
/* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before
* the normal backing chain can be restored. */
blk_unref(s->base);
+ s->base = NULL;
+
+ /* FIXME: bdrv_drop_intermediate treats total failures and partial failures
+ * identically. Further work is needed to disambiguate these cases. */
+ return bdrv_drop_intermediate(s->commit_top_bs, s->base_bs,
+ s->backing_file_str);
+}
- if (!job_is_cancelled(job) && job->ret == 0) {
- /* success */
- job->ret = bdrv_drop_intermediate(s->commit_top_bs, base,
- s->backing_file_str);
- } else {
- /* XXX Can (or should) we somehow keep 'consistent read' blocked even
- * after the failed/cancelled commit job is gone? If we already wrote
- * something to base, the intermediate images aren't valid any more. */
- remove_commit_top_bs = true;
+static void commit_abort(Job *job)
+{
+ CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
+ BlockDriverState *top_bs = blk_bs(s->top);
+
+ /* Make sure commit_top_bs and top stay around until bdrv_replace_node() */
+ bdrv_ref(top_bs);
+ bdrv_ref(s->commit_top_bs);
+
+ if (s->base) {
+ blk_unref(s->base);
}
+ /* free the blockers on the intermediate nodes so that bdrv_replace_nodes
+ * can succeed */
+ block_job_remove_all_bdrv(&s->common);
+
+ /* If bdrv_drop_intermediate() failed (or was not invoked), remove the
+ * commit filter driver from the backing chain now. Do this as the final
+ * step so that the 'consistent read' permission can be granted.
+ *
+ * XXX Can (or should) we somehow keep 'consistent read' blocked even
+ * after the failed/cancelled commit job is gone? If we already wrote
+ * something to base, the intermediate images aren't valid any more. */
+ bdrv_child_try_set_perm(s->commit_top_bs->backing, 0, BLK_PERM_ALL,
+ &error_abort);
+ bdrv_replace_node(s->commit_top_bs, backing_bs(s->commit_top_bs),
+ &error_abort);
+
+ bdrv_unref(s->commit_top_bs);
+ bdrv_unref(top_bs);
+}
+
+static void commit_clean(Job *job)
+{
+ CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
+
/* restore base open flags here if appropriate (e.g., change the base back
* to r/o). These reopens do not need to be atomic, since we won't abort
* even on failure here */
- if (s->base_flags != bdrv_get_flags(base)) {
- bdrv_reopen(base, s->base_flags, NULL);
+ if (s->base_flags != bdrv_get_flags(s->base_bs)) {
+ bdrv_reopen(s->base_bs, s->base_flags, NULL);
}
+
g_free(s->backing_file_str);
blk_unref(s->top);
-
- /* If there is more than one reference to the job (e.g. if called from
- * job_finish_sync()), job_completed() won't free it and therefore the
- * blockers on the intermediate nodes remain. This would cause
- * bdrv_set_backing_hd() to fail. */
- block_job_remove_all_bdrv(bjob);
-
- /* If bdrv_drop_intermediate() didn't already do that, remove the commit
- * filter driver from the backing chain. Do this as the final step so that
- * the 'consistent read' permission can be granted. */
- if (remove_commit_top_bs) {
- bdrv_child_try_set_perm(commit_top_bs->backing, 0, BLK_PERM_ALL,
- &error_abort);
- bdrv_replace_node(commit_top_bs, backing_bs(commit_top_bs),
- &error_abort);
- }
-
- bdrv_unref(commit_top_bs);
- bdrv_unref(top);
}
static int coroutine_fn commit_run(Job *job, Error **errp)
@@ -211,7 +218,9 @@ static const BlockJobDriver commit_job_driver = {
.user_resume = block_job_user_resume,
.drain = block_job_drain,
.run = commit_run,
- .exit = commit_exit,
+ .prepare = commit_prepare,
+ .abort = commit_abort,
+ .clean = commit_clean
},
};
@@ -249,7 +258,8 @@ static BlockDriver bdrv_commit_top = {
};
void commit_start(const char *job_id, BlockDriverState *bs,
- BlockDriverState *base, BlockDriverState *top, int64_t speed,
+ BlockDriverState *base, BlockDriverState *top,
+ int creation_flags, int64_t speed,
BlockdevOnError on_error, const char *backing_file_str,
const char *filter_node_name, Error **errp)
{
@@ -267,7 +277,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
}
s = block_job_create(job_id, &commit_job_driver, NULL, bs, 0, BLK_PERM_ALL,
- speed, JOB_DEFAULT, NULL, NULL, errp);
+ speed, creation_flags, NULL, NULL, errp);
if (!s) {
return;
}
@@ -344,6 +354,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
if (ret < 0) {
goto fail;
}
+ s->base_bs = base;
/* Required permissions are already taken with block_job_add_bdrv() */
s->top = blk_new(0, BLK_PERM_ALL);
diff --git a/block/io.c b/block/io.c
index 7100344c7b..bd9d688f8b 100644
--- a/block/io.c
+++ b/block/io.c
@@ -38,8 +38,6 @@
/* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */
#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
-static AioWait drain_all_aio_wait;
-
static void bdrv_parent_cb_resize(BlockDriverState *bs);
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int bytes, BdrvRequestFlags flags);
@@ -268,10 +266,6 @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive,
BdrvChild *ignore_parent)
{
- /* Execute pending BHs first and check everything else only after the BHs
- * have executed. */
- while (aio_poll(bs->aio_context, false));
-
return bdrv_drain_poll(bs, recursive, ignore_parent, false);
}
@@ -288,6 +282,18 @@ static void bdrv_co_drain_bh_cb(void *opaque)
BlockDriverState *bs = data->bs;
if (bs) {
+ AioContext *ctx = bdrv_get_aio_context(bs);
+ AioContext *co_ctx = qemu_coroutine_get_aio_context(co);
+
+ /*
+ * When the coroutine yielded, the lock for its home context was
+ * released, so we need to re-acquire it here. If it explicitly
+ * acquired a different context, the lock is still held and we don't
+ * want to lock it a second time (or AIO_WAIT_WHILE() would hang).
+ */
+ if (ctx == co_ctx) {
+ aio_context_acquire(ctx);
+ }
bdrv_dec_in_flight(bs);
if (data->begin) {
bdrv_do_drained_begin(bs, data->recursive, data->parent,
@@ -296,6 +302,9 @@ static void bdrv_co_drain_bh_cb(void *opaque)
bdrv_do_drained_end(bs, data->recursive, data->parent,
data->ignore_bds_parents);
}
+ if (ctx == co_ctx) {
+ aio_context_release(ctx);
+ }
} else {
assert(data->begin);
bdrv_drain_all_begin();
@@ -496,10 +505,6 @@ static bool bdrv_drain_all_poll(void)
BlockDriverState *bs = NULL;
bool result = false;
- /* Execute pending BHs first (may modify the graph) and check everything
- * else only after the BHs have executed. */
- while (aio_poll(qemu_get_aio_context(), false));
-
/* bdrv_drain_poll() can't make changes to the graph and we are holding the
* main AioContext lock, so iterating bdrv_next_all_states() is safe. */
while ((bs = bdrv_next_all_states(bs))) {
@@ -550,7 +555,7 @@ void bdrv_drain_all_begin(void)
}
/* Now poll the in-flight requests */
- AIO_WAIT_WHILE(&drain_all_aio_wait, NULL, bdrv_drain_all_poll());
+ AIO_WAIT_WHILE(NULL, bdrv_drain_all_poll());
while ((bs = bdrv_next_all_states(bs))) {
bdrv_drain_assert_idle(bs);
@@ -706,8 +711,7 @@ void bdrv_inc_in_flight(BlockDriverState *bs)
void bdrv_wakeup(BlockDriverState *bs)
{
- aio_wait_kick(bdrv_get_aio_wait(bs));
- aio_wait_kick(&drain_all_aio_wait);
+ aio_wait_kick();
}
void bdrv_dec_in_flight(BlockDriverState *bs)
diff --git a/block/linux-aio.c b/block/linux-aio.c
index 19eb922fdd..217ce60138 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -234,9 +234,9 @@ static void qemu_laio_process_completions(LinuxAioState *s)
static void qemu_laio_process_completions_and_submit(LinuxAioState *s)
{
+ aio_context_acquire(s->aio_context);
qemu_laio_process_completions(s);
- aio_context_acquire(s->aio_context);
if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
ioq_submit(s);
}
diff --git a/block/mirror.c b/block/mirror.c
index b8941db6c1..56d9ef7474 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -79,6 +79,7 @@ typedef struct MirrorBlockJob {
int max_iov;
bool initial_zeroing_ongoing;
int in_active_write_counter;
+ bool prepared;
} MirrorBlockJob;
typedef struct MirrorBDSOpaque {
@@ -607,7 +608,12 @@ static void mirror_wait_for_all_io(MirrorBlockJob *s)
}
}
-static void mirror_exit(Job *job)
+/**
+ * mirror_exit_common: handle both abort() and prepare() cases.
+ * for .prepare, returns 0 on success and -errno on failure.
+ * for .abort cases, denoted by abort = true, MUST return 0.
+ */
+static int mirror_exit_common(Job *job)
{
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
BlockJob *bjob = &s->common;
@@ -617,7 +623,13 @@ static void mirror_exit(Job *job)
BlockDriverState *target_bs = blk_bs(s->target);
BlockDriverState *mirror_top_bs = s->mirror_top_bs;
Error *local_err = NULL;
- int ret = job->ret;
+ bool abort = job->ret < 0;
+ int ret = 0;
+
+ if (s->prepared) {
+ return 0;
+ }
+ s->prepared = true;
bdrv_release_dirty_bitmap(src, s->dirty_bitmap);
@@ -642,7 +654,7 @@ static void mirror_exit(Job *job)
* required before it could become a backing file of target_bs. */
bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL,
&error_abort);
- if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
+ if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
BlockDriverState *backing = s->is_none_mode ? src : s->base;
if (backing_bs(target_bs) != backing) {
bdrv_set_backing_hd(target_bs, backing, &local_err);
@@ -658,11 +670,8 @@ static void mirror_exit(Job *job)
aio_context_acquire(replace_aio_context);
}
- if (s->should_complete && ret == 0) {
- BlockDriverState *to_replace = src;
- if (s->to_replace) {
- to_replace = s->to_replace;
- }
+ if (s->should_complete && !abort) {
+ BlockDriverState *to_replace = s->to_replace ?: src;
if (bdrv_get_flags(target_bs) != bdrv_get_flags(to_replace)) {
bdrv_reopen(target_bs, bdrv_get_flags(to_replace), NULL);
@@ -711,7 +720,18 @@ static void mirror_exit(Job *job)
bdrv_unref(mirror_top_bs);
bdrv_unref(src);
- job->ret = ret;
+ return ret;
+}
+
+static int mirror_prepare(Job *job)
+{
+ return mirror_exit_common(job);
+}
+
+static void mirror_abort(Job *job)
+{
+ int ret = mirror_exit_common(job);
+ assert(ret == 0);
}
static void mirror_throttle(MirrorBlockJob *s)
@@ -1132,7 +1152,8 @@ static const BlockJobDriver mirror_job_driver = {
.user_resume = block_job_user_resume,
.drain = block_job_drain,
.run = mirror_run,
- .exit = mirror_exit,
+ .prepare = mirror_prepare,
+ .abort = mirror_abort,
.pause = mirror_pause,
.complete = mirror_complete,
},
@@ -1149,7 +1170,8 @@ static const BlockJobDriver commit_active_job_driver = {
.user_resume = block_job_user_resume,
.drain = block_job_drain,
.run = mirror_run,
- .exit = mirror_exit,
+ .prepare = mirror_prepare,
+ .abort = mirror_abort,
.pause = mirror_pause,
.complete = mirror_complete,
},
@@ -1639,7 +1661,8 @@ fail:
void mirror_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *target, const char *replaces,
- int64_t speed, uint32_t granularity, int64_t buf_size,
+ int creation_flags, int64_t speed,
+ uint32_t granularity, int64_t buf_size,
MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
@@ -1655,7 +1678,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
}
is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
- mirror_start_job(job_id, bs, JOB_DEFAULT, target, replaces,
+ mirror_start_job(job_id, bs, creation_flags, target, replaces,
speed, granularity, buf_size, backing_mode,
on_source_error, on_target_error, unmap, NULL, NULL,
&mirror_job_driver, is_none_mode, base, false,
diff --git a/block/stream.c b/block/stream.c
index 67e1e72e23..81a7ec8ece 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -54,16 +54,16 @@ static int coroutine_fn stream_populate(BlockBackend *blk,
return blk_co_preadv(blk, offset, qiov.size, &qiov, BDRV_REQ_COPY_ON_READ);
}
-static void stream_exit(Job *job)
+static int stream_prepare(Job *job)
{
StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
BlockJob *bjob = &s->common;
BlockDriverState *bs = blk_bs(bjob->blk);
BlockDriverState *base = s->base;
Error *local_err = NULL;
- int ret = job->ret;
+ int ret = 0;
- if (!job_is_cancelled(job) && bs->backing && ret == 0) {
+ if (bs->backing) {
const char *base_id = NULL, *base_fmt = NULL;
if (base) {
base_id = s->backing_file_str;
@@ -75,12 +75,19 @@ static void stream_exit(Job *job)
bdrv_set_backing_hd(bs, base, &local_err);
if (local_err) {
error_report_err(local_err);
- ret = -EPERM;
- goto out;
+ return -EPERM;
}
}
-out:
+ return ret;
+}
+
+static void stream_clean(Job *job)
+{
+ StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
+ BlockJob *bjob = &s->common;
+ BlockDriverState *bs = blk_bs(bjob->blk);
+
/* Reopen the image back in read-only mode if necessary */
if (s->bs_flags != bdrv_get_flags(bs)) {
/* Give up write permissions before making it read-only */
@@ -89,7 +96,6 @@ out:
}
g_free(s->backing_file_str);
- job->ret = ret;
}
static int coroutine_fn stream_run(Job *job, Error **errp)
@@ -206,7 +212,8 @@ static const BlockJobDriver stream_job_driver = {
.job_type = JOB_TYPE_STREAM,
.free = block_job_free,
.run = stream_run,
- .exit = stream_exit,
+ .prepare = stream_prepare,
+ .clean = stream_clean,
.user_resume = block_job_user_resume,
.drain = block_job_drain,
},
@@ -214,7 +221,8 @@ static const BlockJobDriver stream_job_driver = {
void stream_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *base, const char *backing_file_str,
- int64_t speed, BlockdevOnError on_error, Error **errp)
+ int creation_flags, int64_t speed,
+ BlockdevOnError on_error, Error **errp)
{
StreamBlockJob *s;
BlockDriverState *iter;
@@ -236,7 +244,7 @@ void stream_start(const char *job_id, BlockDriverState *bs,
BLK_PERM_GRAPH_MOD,
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
BLK_PERM_WRITE,
- speed, JOB_DEFAULT, NULL, NULL, errp);
+ speed, creation_flags, NULL, NULL, errp);
if (!s) {
goto fail;
}