diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/block/aio-wait.h | 116 | ||||
-rw-r--r-- | include/block/aio.h | 7 | ||||
-rw-r--r-- | include/block/block.h | 54 | ||||
-rw-r--r-- | include/block/block_int.h | 61 |
4 files changed, 171 insertions, 67 deletions
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h new file mode 100644 index 0000000000..a48c744fa8 --- /dev/null +++ b/include/block/aio-wait.h @@ -0,0 +1,116 @@ +/* + * AioContext wait support + * + * Copyright (C) 2018 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef QEMU_AIO_WAIT_H +#define QEMU_AIO_WAIT_H + +#include "block/aio.h" + +/** + * AioWait: + * + * An object that facilitates synchronous waiting on a condition. The main + * loop can wait on an operation running in an IOThread as follows: + * + * AioWait *wait = ...; + * AioContext *ctx = ...; + * MyWork work = { .done = false }; + * schedule_my_work_in_iothread(ctx, &work); + * AIO_WAIT_WHILE(wait, ctx, !work.done); + * + * The IOThread must call aio_wait_kick() to notify the main loop when + * work.done changes: + * + * static void do_work(...) + * { + * ... + * work.done = true; + * aio_wait_kick(wait); + * } + */ +typedef struct { + /* Is the main loop waiting for a kick? Accessed with atomic ops. */ + bool need_kick; +} AioWait; + +/** + * AIO_WAIT_WHILE: + * @wait: the aio wait object + * @ctx: the aio context + * @cond: wait while this conditional expression is true + * + * Wait while a condition is true. Use this to implement synchronous + * operations that require event loop activity. + * + * The caller must be sure that something calls aio_wait_kick() when the value + * of @cond might have changed. + * + * The caller's thread must be the IOThread that owns @ctx or the main loop + * thread (with @ctx acquired exactly once). This function cannot be used to + * wait on conditions between two IOThreads since that could lead to deadlock, + * go via the main loop instead. + */ +#define AIO_WAIT_WHILE(wait, ctx, cond) ({ \ + bool waited_ = false; \ + bool busy_ = true; \ + AioWait *wait_ = (wait); \ + AioContext *ctx_ = (ctx); \ + if (in_aio_context_home_thread(ctx_)) { \ + while ((cond) || busy_) { \ + busy_ = aio_poll(ctx_, (cond)); \ + waited_ |= !!(cond) | busy_; \ + } \ + } else { \ + assert(qemu_get_current_aio_context() == \ + qemu_get_aio_context()); \ + assert(!wait_->need_kick); \ + /* Set wait_->need_kick before evaluating cond. */ \ + atomic_mb_set(&wait_->need_kick, true); \ + while (busy_) { \ + if ((cond)) { \ + waited_ = busy_ = true; \ + aio_context_release(ctx_); \ + aio_poll(qemu_get_aio_context(), true); \ + aio_context_acquire(ctx_); \ + } else { \ + busy_ = aio_poll(ctx_, false); \ + waited_ |= busy_; \ + } \ + } \ + atomic_set(&wait_->need_kick, false); \ + } \ + waited_; }) + +/** + * aio_wait_kick: + * @wait: the aio wait object that should re-evaluate its condition + * + * Wake up the main thread if it is waiting on AIO_WAIT_WHILE(). During + * synchronous operations performed in an IOThread, the main thread lets the + * IOThread's event loop run, waiting for the operation to complete. A + * aio_wait_kick() call will wake up the main thread. + */ +void aio_wait_kick(AioWait *wait); + +#endif /* QEMU_AIO_WAIT */ diff --git a/include/block/aio.h b/include/block/aio.h index e9aeeaec94..a1d6b9e249 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -534,11 +534,14 @@ void aio_co_enter(AioContext *ctx, struct Coroutine *co); AioContext *qemu_get_current_aio_context(void); /** + * in_aio_context_home_thread: * @ctx: the aio context * - * Return whether we are running in the I/O thread that manages @ctx. + * Return whether we are running in the thread that normally runs @ctx. Note + * that acquiring/releasing ctx does not affect the outcome, each AioContext + * still only has one home thread that is responsible for running it. */ -static inline bool aio_context_in_iothread(AioContext *ctx) +static inline bool in_aio_context_home_thread(AioContext *ctx) { return ctx == qemu_get_current_aio_context(); } diff --git a/include/block/block.h b/include/block/block.h index fac401ba3e..8b6db952a2 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -3,6 +3,7 @@ #include "block/aio.h" #include "qapi/qapi-types-block-core.h" +#include "block/aio-wait.h" #include "qemu/iov.h" #include "qemu/coroutine.h" #include "block/accounting.h" @@ -115,19 +116,19 @@ typedef struct HDGeometry { * BDRV_BLOCK_ZERO: offset reads as zero * BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this - * layer (short for DATA || ZERO), set by block layer - * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this layer + * layer rather than any backing, set by block layer + * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this + * layer, set by block layer * * Internal flag: * BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request * that the block layer recompute the answer from the returned * BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID. * - * If BDRV_BLOCK_OFFSET_VALID is set, bits 9-62 (BDRV_BLOCK_OFFSET_MASK) of - * the return value (old interface) or the entire map parameter (new - * interface) represent the offset in the returned BDS that is allocated for - * the corresponding raw data. However, whether that offset actually - * contains data also depends on BDRV_BLOCK_DATA, as follows: + * If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the + * host offset within the returned BDS that is allocated for the + * corresponding raw guest data. However, whether that offset + * actually contains data also depends on BDRV_BLOCK_DATA, as follows: * * DATA ZERO OFFSET_VALID * t t t sectors read as zero, returned file is zero at offset @@ -367,41 +368,14 @@ void bdrv_drain_all_begin(void); void bdrv_drain_all_end(void); void bdrv_drain_all(void); +/* Returns NULL when bs == NULL */ +AioWait *bdrv_get_aio_wait(BlockDriverState *bs); + #define BDRV_POLL_WHILE(bs, cond) ({ \ - bool waited_ = false; \ - bool busy_ = true; \ BlockDriverState *bs_ = (bs); \ - AioContext *ctx_ = bdrv_get_aio_context(bs_); \ - if (aio_context_in_iothread(ctx_)) { \ - while ((cond) || busy_) { \ - busy_ = aio_poll(ctx_, (cond)); \ - waited_ |= !!(cond) | busy_; \ - } \ - } else { \ - assert(qemu_get_current_aio_context() == \ - qemu_get_aio_context()); \ - /* Ask bdrv_dec_in_flight to wake up the main \ - * QEMU AioContext. Extra I/O threads never take \ - * other I/O threads' AioContexts (see for example \ - * block_job_defer_to_main_loop for how to do it). \ - */ \ - assert(!bs_->wakeup); \ - /* Set bs->wakeup before evaluating cond. */ \ - atomic_mb_set(&bs_->wakeup, true); \ - while (busy_) { \ - if ((cond)) { \ - waited_ = busy_ = true; \ - aio_context_release(ctx_); \ - aio_poll(qemu_get_aio_context(), true); \ - aio_context_acquire(ctx_); \ - } else { \ - busy_ = aio_poll(ctx_, false); \ - waited_ |= busy_; \ - } \ - } \ - atomic_set(&bs_->wakeup, false); \ - } \ - waited_; }) + AIO_WAIT_WHILE(bdrv_get_aio_wait(bs_), \ + bdrv_get_aio_context(bs_), \ + cond); }) int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int bytes); int bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes); diff --git a/include/block/block_int.h b/include/block/block_int.h index 5ea63f8fa8..64a5700f2b 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -26,6 +26,7 @@ #include "block/accounting.h" #include "block/block.h" +#include "block/aio-wait.h" #include "qemu/queue.h" #include "qemu/coroutine.h" #include "qemu/stats64.h" @@ -128,7 +129,8 @@ struct BlockDriver { int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags, Error **errp); void (*bdrv_close)(BlockDriverState *bs); - int (*bdrv_create)(const char *filename, QemuOpts *opts, Error **errp); + int coroutine_fn (*bdrv_co_create_opts)(const char *filename, QemuOpts *opts, + Error **errp); int (*bdrv_make_empty)(BlockDriverState *bs); void (*bdrv_refresh_filename)(BlockDriverState *bs, QDict *options); @@ -202,15 +204,22 @@ struct BlockDriver { /* * Building block for bdrv_block_status[_above] and * bdrv_is_allocated[_above]. The driver should answer only - * according to the current layer, and should not set - * BDRV_BLOCK_ALLOCATED, but may set BDRV_BLOCK_RAW. See block.h - * for the meaning of _DATA, _ZERO, and _OFFSET_VALID. The block - * layer guarantees input aligned to request_alignment, as well as - * non-NULL pnum and file. + * according to the current layer, and should only need to set + * BDRV_BLOCK_DATA, BDRV_BLOCK_ZERO, BDRV_BLOCK_OFFSET_VALID, + * and/or BDRV_BLOCK_RAW; if the current layer defers to a backing + * layer, the result should be 0 (and not BDRV_BLOCK_ZERO). See + * block.h for the overall meaning of the bits. As a hint, the + * flag want_zero is true if the caller cares more about precise + * mappings (favor accurate _OFFSET_VALID/_ZERO) or false for + * overall allocation (favor larger *pnum, perhaps by reporting + * _DATA instead of _ZERO). The block layer guarantees input + * clamped to bdrv_getlength() and aligned to request_alignment, + * as well as non-NULL pnum, map, and file; in turn, the driver + * must return an error or set pnum to an aligned non-zero value. */ - int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, int *pnum, - BlockDriverState **file); + int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs, + bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum, + int64_t *map, BlockDriverState **file); /* * Invalidate any cached meta-data. @@ -709,10 +718,8 @@ struct BlockDriverState { unsigned int in_flight; unsigned int serialising_in_flight; - /* Internal to BDRV_POLL_WHILE and bdrv_wakeup. Accessed with atomic - * ops. - */ - bool wakeup; + /* Kicked to signal main loop when a request completes. */ + AioWait wait; /* counter for nested bdrv_io_plug. * Accessed with atomic ops. @@ -1031,23 +1038,27 @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, uint64_t *nperm, uint64_t *nshared); /* - * Default implementation for drivers to pass bdrv_co_get_block_status() to + * Default implementation for drivers to pass bdrv_co_block_status() to * their file. */ -int64_t coroutine_fn bdrv_co_get_block_status_from_file(BlockDriverState *bs, - int64_t sector_num, - int nb_sectors, - int *pnum, - BlockDriverState **file); +int coroutine_fn bdrv_co_block_status_from_file(BlockDriverState *bs, + bool want_zero, + int64_t offset, + int64_t bytes, + int64_t *pnum, + int64_t *map, + BlockDriverState **file); /* - * Default implementation for drivers to pass bdrv_co_get_block_status() to + * Default implementation for drivers to pass bdrv_co_block_status() to * their backing file. */ -int64_t coroutine_fn bdrv_co_get_block_status_from_backing(BlockDriverState *bs, - int64_t sector_num, - int nb_sectors, - int *pnum, - BlockDriverState **file); +int coroutine_fn bdrv_co_block_status_from_backing(BlockDriverState *bs, + bool want_zero, + int64_t offset, + int64_t bytes, + int64_t *pnum, + int64_t *map, + BlockDriverState **file); const char *bdrv_get_parent_name(const BlockDriverState *bs); void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp); bool blk_dev_has_removable_media(BlockBackend *blk); |