diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2018-09-25 16:47:35 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2018-09-25 16:47:35 +0100 |
commit | c5e4e49258e9b89cb34c085a419dd9f862935c48 (patch) | |
tree | aa7547e348af2bf5070321c329e2dbe3daf6290a /block | |
parent | 0a736f7ab83df26dbe5421bb3d1f7892def05dee (diff) | |
parent | 9c76ff9c16be890e70fce30754b096ff9950d1ee (diff) |
Merge remote-tracking branch 'remotes/xanclic/tags/pull-block-2018-09-25' into staging
Block layer patches:
- Drain fixes
- node-name parameters for block-commit
- Refactor block jobs to use transactional callbacks for exiting
# gpg: Signature made Tue 25 Sep 2018 16:12:44 BST
# gpg: using RSA key F407DB0061D5CF40
# gpg: Good signature from "Max Reitz <mreitz@redhat.com>"
# Primary key fingerprint: 91BE B60A 30DB 3E88 57D1 1829 F407 DB00 61D5 CF40
* remotes/xanclic/tags/pull-block-2018-09-25: (42 commits)
test-bdrv-drain: Test draining job source child and parent
block: Use a single global AioWait
test-bdrv-drain: Fix outdated comments
test-bdrv-drain: AIO_WAIT_WHILE() in job .commit/.abort
job: Avoid deadlocks in job_completed_txn_abort()
test-bdrv-drain: Test nested poll in bdrv_drain_poll_top_level()
block: Remove aio_poll() in bdrv_drain_poll variants
blockjob: Lie better in child_job_drained_poll()
block-backend: Decrease in_flight only after callback
block-backend: Fix potential double blk_delete()
block-backend: Add .drained_poll callback
block: Add missing locking in bdrv_co_drain_bh_cb()
test-bdrv-drain: Test AIO_WAIT_WHILE() in completion callback
job: Use AIO_WAIT_WHILE() in job_finish_sync()
test-blockjob: Acquire AioContext around job_cancel_sync()
test-bdrv-drain: Drain with block jobs in an I/O thread
aio-wait: Increase num_waiters even in home thread
blockjob: Wake up BDS when job becomes idle
job: Fix missing locking due to mismerge
job: Fix nested aio_poll() hanging in job_txn_apply
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'block')
-rw-r--r-- | block/block-backend.c | 31 | ||||
-rw-r--r-- | block/commit.c | 97 | ||||
-rw-r--r-- | block/io.c | 30 | ||||
-rw-r--r-- | block/linux-aio.c | 2 | ||||
-rw-r--r-- | block/mirror.c | 49 | ||||
-rw-r--r-- | block/stream.c | 28 |
6 files changed, 148 insertions, 89 deletions
diff --git a/block/block-backend.c b/block/block-backend.c index 14a1b7ac6a..7b1ec5071b 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -88,7 +88,6 @@ struct BlockBackend { * Accessed with atomic ops. */ unsigned int in_flight; - AioWait wait; }; typedef struct BlockBackendAIOCB { @@ -121,6 +120,7 @@ static void blk_root_inherit_options(int *child_flags, QDict *child_options, abort(); } static void blk_root_drained_begin(BdrvChild *child); +static bool blk_root_drained_poll(BdrvChild *child); static void blk_root_drained_end(BdrvChild *child); static void blk_root_change_media(BdrvChild *child, bool load); @@ -294,6 +294,7 @@ static const BdrvChildRole child_root = { .get_parent_desc = blk_root_get_parent_desc, .drained_begin = blk_root_drained_begin, + .drained_poll = blk_root_drained_poll, .drained_end = blk_root_drained_end, .activate = blk_root_activate, @@ -433,6 +434,7 @@ int blk_get_refcnt(BlockBackend *blk) */ void blk_ref(BlockBackend *blk) { + assert(blk->refcnt > 0); blk->refcnt++; } @@ -445,7 +447,13 @@ void blk_unref(BlockBackend *blk) { if (blk) { assert(blk->refcnt > 0); - if (!--blk->refcnt) { + if (blk->refcnt > 1) { + blk->refcnt--; + } else { + blk_drain(blk); + /* blk_drain() cannot resurrect blk, nobody held a reference */ + assert(blk->refcnt == 1); + blk->refcnt = 0; blk_delete(blk); } } @@ -1289,7 +1297,7 @@ static void blk_inc_in_flight(BlockBackend *blk) static void blk_dec_in_flight(BlockBackend *blk) { atomic_dec(&blk->in_flight); - aio_wait_kick(&blk->wait); + aio_wait_kick(); } static void error_callback_bh(void *opaque) @@ -1330,8 +1338,8 @@ static const AIOCBInfo blk_aio_em_aiocb_info = { static void blk_aio_complete(BlkAioEmAIOCB *acb) { if (acb->has_returned) { - blk_dec_in_flight(acb->rwco.blk); acb->common.cb(acb->common.opaque, acb->rwco.ret); + blk_dec_in_flight(acb->rwco.blk); qemu_aio_unref(acb); } } @@ -1590,9 +1598,8 @@ void blk_drain(BlockBackend *blk) } /* We may have -ENOMEDIUM completions in flight */ - AIO_WAIT_WHILE(&blk->wait, - blk_get_aio_context(blk), - atomic_mb_read(&blk->in_flight) > 0); + AIO_WAIT_WHILE(blk_get_aio_context(blk), + atomic_mb_read(&blk->in_flight) > 0); if (bs) { bdrv_drained_end(bs); @@ -1611,8 +1618,7 @@ void blk_drain_all(void) aio_context_acquire(ctx); /* We may have -ENOMEDIUM completions in flight */ - AIO_WAIT_WHILE(&blk->wait, ctx, - atomic_mb_read(&blk->in_flight) > 0); + AIO_WAIT_WHILE(ctx, atomic_mb_read(&blk->in_flight) > 0); aio_context_release(ctx); } @@ -2189,6 +2195,13 @@ static void blk_root_drained_begin(BdrvChild *child) } } +static bool blk_root_drained_poll(BdrvChild *child) +{ + BlockBackend *blk = child->opaque; + assert(blk->quiesce_counter); + return !!blk->in_flight; +} + static void blk_root_drained_end(BdrvChild *child) { BlockBackend *blk = child->opaque; diff --git a/block/commit.c b/block/commit.c index da69165de3..a2da5740b0 100644 --- a/block/commit.c +++ b/block/commit.c @@ -36,6 +36,7 @@ typedef struct CommitBlockJob { BlockDriverState *commit_top_bs; BlockBackend *top; BlockBackend *base; + BlockDriverState *base_bs; BlockdevOnError on_error; int base_flags; char *backing_file_str; @@ -68,61 +69,67 @@ static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base, return 0; } -static void commit_exit(Job *job) +static int commit_prepare(Job *job) { CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); - BlockJob *bjob = &s->common; - BlockDriverState *top = blk_bs(s->top); - BlockDriverState *base = blk_bs(s->base); - BlockDriverState *commit_top_bs = s->commit_top_bs; - bool remove_commit_top_bs = false; - - /* Make sure commit_top_bs and top stay around until bdrv_replace_node() */ - bdrv_ref(top); - bdrv_ref(commit_top_bs); /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before * the normal backing chain can be restored. */ blk_unref(s->base); + s->base = NULL; + + /* FIXME: bdrv_drop_intermediate treats total failures and partial failures + * identically. Further work is needed to disambiguate these cases. */ + return bdrv_drop_intermediate(s->commit_top_bs, s->base_bs, + s->backing_file_str); +} - if (!job_is_cancelled(job) && job->ret == 0) { - /* success */ - job->ret = bdrv_drop_intermediate(s->commit_top_bs, base, - s->backing_file_str); - } else { - /* XXX Can (or should) we somehow keep 'consistent read' blocked even - * after the failed/cancelled commit job is gone? If we already wrote - * something to base, the intermediate images aren't valid any more. */ - remove_commit_top_bs = true; +static void commit_abort(Job *job) +{ + CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); + BlockDriverState *top_bs = blk_bs(s->top); + + /* Make sure commit_top_bs and top stay around until bdrv_replace_node() */ + bdrv_ref(top_bs); + bdrv_ref(s->commit_top_bs); + + if (s->base) { + blk_unref(s->base); } + /* free the blockers on the intermediate nodes so that bdrv_replace_nodes + * can succeed */ + block_job_remove_all_bdrv(&s->common); + + /* If bdrv_drop_intermediate() failed (or was not invoked), remove the + * commit filter driver from the backing chain now. Do this as the final + * step so that the 'consistent read' permission can be granted. + * + * XXX Can (or should) we somehow keep 'consistent read' blocked even + * after the failed/cancelled commit job is gone? If we already wrote + * something to base, the intermediate images aren't valid any more. */ + bdrv_child_try_set_perm(s->commit_top_bs->backing, 0, BLK_PERM_ALL, + &error_abort); + bdrv_replace_node(s->commit_top_bs, backing_bs(s->commit_top_bs), + &error_abort); + + bdrv_unref(s->commit_top_bs); + bdrv_unref(top_bs); +} + +static void commit_clean(Job *job) +{ + CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); + /* restore base open flags here if appropriate (e.g., change the base back * to r/o). These reopens do not need to be atomic, since we won't abort * even on failure here */ - if (s->base_flags != bdrv_get_flags(base)) { - bdrv_reopen(base, s->base_flags, NULL); + if (s->base_flags != bdrv_get_flags(s->base_bs)) { + bdrv_reopen(s->base_bs, s->base_flags, NULL); } + g_free(s->backing_file_str); blk_unref(s->top); - - /* If there is more than one reference to the job (e.g. if called from - * job_finish_sync()), job_completed() won't free it and therefore the - * blockers on the intermediate nodes remain. This would cause - * bdrv_set_backing_hd() to fail. */ - block_job_remove_all_bdrv(bjob); - - /* If bdrv_drop_intermediate() didn't already do that, remove the commit - * filter driver from the backing chain. Do this as the final step so that - * the 'consistent read' permission can be granted. */ - if (remove_commit_top_bs) { - bdrv_child_try_set_perm(commit_top_bs->backing, 0, BLK_PERM_ALL, - &error_abort); - bdrv_replace_node(commit_top_bs, backing_bs(commit_top_bs), - &error_abort); - } - - bdrv_unref(commit_top_bs); - bdrv_unref(top); } static int coroutine_fn commit_run(Job *job, Error **errp) @@ -211,7 +218,9 @@ static const BlockJobDriver commit_job_driver = { .user_resume = block_job_user_resume, .drain = block_job_drain, .run = commit_run, - .exit = commit_exit, + .prepare = commit_prepare, + .abort = commit_abort, + .clean = commit_clean }, }; @@ -249,7 +258,8 @@ static BlockDriver bdrv_commit_top = { }; void commit_start(const char *job_id, BlockDriverState *bs, - BlockDriverState *base, BlockDriverState *top, int64_t speed, + BlockDriverState *base, BlockDriverState *top, + int creation_flags, int64_t speed, BlockdevOnError on_error, const char *backing_file_str, const char *filter_node_name, Error **errp) { @@ -267,7 +277,7 @@ void commit_start(const char *job_id, BlockDriverState *bs, } s = block_job_create(job_id, &commit_job_driver, NULL, bs, 0, BLK_PERM_ALL, - speed, JOB_DEFAULT, NULL, NULL, errp); + speed, creation_flags, NULL, NULL, errp); if (!s) { return; } @@ -344,6 +354,7 @@ void commit_start(const char *job_id, BlockDriverState *bs, if (ret < 0) { goto fail; } + s->base_bs = base; /* Required permissions are already taken with block_job_add_bdrv() */ s->top = blk_new(0, BLK_PERM_ALL); diff --git a/block/io.c b/block/io.c index 7100344c7b..bd9d688f8b 100644 --- a/block/io.c +++ b/block/io.c @@ -38,8 +38,6 @@ /* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */ #define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS) -static AioWait drain_all_aio_wait; - static void bdrv_parent_cb_resize(BlockDriverState *bs); static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, BdrvRequestFlags flags); @@ -268,10 +266,6 @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive, BdrvChild *ignore_parent) { - /* Execute pending BHs first and check everything else only after the BHs - * have executed. */ - while (aio_poll(bs->aio_context, false)); - return bdrv_drain_poll(bs, recursive, ignore_parent, false); } @@ -288,6 +282,18 @@ static void bdrv_co_drain_bh_cb(void *opaque) BlockDriverState *bs = data->bs; if (bs) { + AioContext *ctx = bdrv_get_aio_context(bs); + AioContext *co_ctx = qemu_coroutine_get_aio_context(co); + + /* + * When the coroutine yielded, the lock for its home context was + * released, so we need to re-acquire it here. If it explicitly + * acquired a different context, the lock is still held and we don't + * want to lock it a second time (or AIO_WAIT_WHILE() would hang). + */ + if (ctx == co_ctx) { + aio_context_acquire(ctx); + } bdrv_dec_in_flight(bs); if (data->begin) { bdrv_do_drained_begin(bs, data->recursive, data->parent, @@ -296,6 +302,9 @@ static void bdrv_co_drain_bh_cb(void *opaque) bdrv_do_drained_end(bs, data->recursive, data->parent, data->ignore_bds_parents); } + if (ctx == co_ctx) { + aio_context_release(ctx); + } } else { assert(data->begin); bdrv_drain_all_begin(); @@ -496,10 +505,6 @@ static bool bdrv_drain_all_poll(void) BlockDriverState *bs = NULL; bool result = false; - /* Execute pending BHs first (may modify the graph) and check everything - * else only after the BHs have executed. */ - while (aio_poll(qemu_get_aio_context(), false)); - /* bdrv_drain_poll() can't make changes to the graph and we are holding the * main AioContext lock, so iterating bdrv_next_all_states() is safe. */ while ((bs = bdrv_next_all_states(bs))) { @@ -550,7 +555,7 @@ void bdrv_drain_all_begin(void) } /* Now poll the in-flight requests */ - AIO_WAIT_WHILE(&drain_all_aio_wait, NULL, bdrv_drain_all_poll()); + AIO_WAIT_WHILE(NULL, bdrv_drain_all_poll()); while ((bs = bdrv_next_all_states(bs))) { bdrv_drain_assert_idle(bs); @@ -706,8 +711,7 @@ void bdrv_inc_in_flight(BlockDriverState *bs) void bdrv_wakeup(BlockDriverState *bs) { - aio_wait_kick(bdrv_get_aio_wait(bs)); - aio_wait_kick(&drain_all_aio_wait); + aio_wait_kick(); } void bdrv_dec_in_flight(BlockDriverState *bs) diff --git a/block/linux-aio.c b/block/linux-aio.c index 19eb922fdd..217ce60138 100644 --- a/block/linux-aio.c +++ b/block/linux-aio.c @@ -234,9 +234,9 @@ static void qemu_laio_process_completions(LinuxAioState *s) static void qemu_laio_process_completions_and_submit(LinuxAioState *s) { + aio_context_acquire(s->aio_context); qemu_laio_process_completions(s); - aio_context_acquire(s->aio_context); if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) { ioq_submit(s); } diff --git a/block/mirror.c b/block/mirror.c index b8941db6c1..56d9ef7474 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -79,6 +79,7 @@ typedef struct MirrorBlockJob { int max_iov; bool initial_zeroing_ongoing; int in_active_write_counter; + bool prepared; } MirrorBlockJob; typedef struct MirrorBDSOpaque { @@ -607,7 +608,12 @@ static void mirror_wait_for_all_io(MirrorBlockJob *s) } } -static void mirror_exit(Job *job) +/** + * mirror_exit_common: handle both abort() and prepare() cases. + * for .prepare, returns 0 on success and -errno on failure. + * for .abort cases, denoted by abort = true, MUST return 0. + */ +static int mirror_exit_common(Job *job) { MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job); BlockJob *bjob = &s->common; @@ -617,7 +623,13 @@ static void mirror_exit(Job *job) BlockDriverState *target_bs = blk_bs(s->target); BlockDriverState *mirror_top_bs = s->mirror_top_bs; Error *local_err = NULL; - int ret = job->ret; + bool abort = job->ret < 0; + int ret = 0; + + if (s->prepared) { + return 0; + } + s->prepared = true; bdrv_release_dirty_bitmap(src, s->dirty_bitmap); @@ -642,7 +654,7 @@ static void mirror_exit(Job *job) * required before it could become a backing file of target_bs. */ bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL, &error_abort); - if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { + if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { BlockDriverState *backing = s->is_none_mode ? src : s->base; if (backing_bs(target_bs) != backing) { bdrv_set_backing_hd(target_bs, backing, &local_err); @@ -658,11 +670,8 @@ static void mirror_exit(Job *job) aio_context_acquire(replace_aio_context); } - if (s->should_complete && ret == 0) { - BlockDriverState *to_replace = src; - if (s->to_replace) { - to_replace = s->to_replace; - } + if (s->should_complete && !abort) { + BlockDriverState *to_replace = s->to_replace ?: src; if (bdrv_get_flags(target_bs) != bdrv_get_flags(to_replace)) { bdrv_reopen(target_bs, bdrv_get_flags(to_replace), NULL); @@ -711,7 +720,18 @@ static void mirror_exit(Job *job) bdrv_unref(mirror_top_bs); bdrv_unref(src); - job->ret = ret; + return ret; +} + +static int mirror_prepare(Job *job) +{ + return mirror_exit_common(job); +} + +static void mirror_abort(Job *job) +{ + int ret = mirror_exit_common(job); + assert(ret == 0); } static void mirror_throttle(MirrorBlockJob *s) @@ -1132,7 +1152,8 @@ static const BlockJobDriver mirror_job_driver = { .user_resume = block_job_user_resume, .drain = block_job_drain, .run = mirror_run, - .exit = mirror_exit, + .prepare = mirror_prepare, + .abort = mirror_abort, .pause = mirror_pause, .complete = mirror_complete, }, @@ -1149,7 +1170,8 @@ static const BlockJobDriver commit_active_job_driver = { .user_resume = block_job_user_resume, .drain = block_job_drain, .run = mirror_run, - .exit = mirror_exit, + .prepare = mirror_prepare, + .abort = mirror_abort, .pause = mirror_pause, .complete = mirror_complete, }, @@ -1639,7 +1661,8 @@ fail: void mirror_start(const char *job_id, BlockDriverState *bs, BlockDriverState *target, const char *replaces, - int64_t speed, uint32_t granularity, int64_t buf_size, + int creation_flags, int64_t speed, + uint32_t granularity, int64_t buf_size, MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, BlockdevOnError on_source_error, BlockdevOnError on_target_error, @@ -1655,7 +1678,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs, } is_none_mode = mode == MIRROR_SYNC_MODE_NONE; base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL; - mirror_start_job(job_id, bs, JOB_DEFAULT, target, replaces, + mirror_start_job(job_id, bs, creation_flags, target, replaces, speed, granularity, buf_size, backing_mode, on_source_error, on_target_error, unmap, NULL, NULL, &mirror_job_driver, is_none_mode, base, false, diff --git a/block/stream.c b/block/stream.c index 67e1e72e23..81a7ec8ece 100644 --- a/block/stream.c +++ b/block/stream.c @@ -54,16 +54,16 @@ static int coroutine_fn stream_populate(BlockBackend *blk, return blk_co_preadv(blk, offset, qiov.size, &qiov, BDRV_REQ_COPY_ON_READ); } -static void stream_exit(Job *job) +static int stream_prepare(Job *job) { StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); BlockJob *bjob = &s->common; BlockDriverState *bs = blk_bs(bjob->blk); BlockDriverState *base = s->base; Error *local_err = NULL; - int ret = job->ret; + int ret = 0; - if (!job_is_cancelled(job) && bs->backing && ret == 0) { + if (bs->backing) { const char *base_id = NULL, *base_fmt = NULL; if (base) { base_id = s->backing_file_str; @@ -75,12 +75,19 @@ static void stream_exit(Job *job) bdrv_set_backing_hd(bs, base, &local_err); if (local_err) { error_report_err(local_err); - ret = -EPERM; - goto out; + return -EPERM; } } -out: + return ret; +} + +static void stream_clean(Job *job) +{ + StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); + BlockJob *bjob = &s->common; + BlockDriverState *bs = blk_bs(bjob->blk); + /* Reopen the image back in read-only mode if necessary */ if (s->bs_flags != bdrv_get_flags(bs)) { /* Give up write permissions before making it read-only */ @@ -89,7 +96,6 @@ out: } g_free(s->backing_file_str); - job->ret = ret; } static int coroutine_fn stream_run(Job *job, Error **errp) @@ -206,7 +212,8 @@ static const BlockJobDriver stream_job_driver = { .job_type = JOB_TYPE_STREAM, .free = block_job_free, .run = stream_run, - .exit = stream_exit, + .prepare = stream_prepare, + .clean = stream_clean, .user_resume = block_job_user_resume, .drain = block_job_drain, }, @@ -214,7 +221,8 @@ static const BlockJobDriver stream_job_driver = { void stream_start(const char *job_id, BlockDriverState *bs, BlockDriverState *base, const char *backing_file_str, - int64_t speed, BlockdevOnError on_error, Error **errp) + int creation_flags, int64_t speed, + BlockdevOnError on_error, Error **errp) { StreamBlockJob *s; BlockDriverState *iter; @@ -236,7 +244,7 @@ void stream_start(const char *job_id, BlockDriverState *bs, BLK_PERM_GRAPH_MOD, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE, - speed, JOB_DEFAULT, NULL, NULL, errp); + speed, creation_flags, NULL, NULL, errp); if (!s) { goto fail; } |