aboutsummaryrefslogtreecommitdiff
path: root/include/block
diff options
context:
space:
mode:
Diffstat (limited to 'include/block')
-rw-r--r--include/block/aio-wait.h116
-rw-r--r--include/block/aio.h7
-rw-r--r--include/block/block.h54
-rw-r--r--include/block/block_int.h61
4 files changed, 171 insertions, 67 deletions
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
new file mode 100644
index 0000000000..a48c744fa8
--- /dev/null
+++ b/include/block/aio-wait.h
@@ -0,0 +1,116 @@
+/*
+ * AioContext wait support
+ *
+ * Copyright (C) 2018 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef QEMU_AIO_WAIT_H
+#define QEMU_AIO_WAIT_H
+
+#include "block/aio.h"
+
+/**
+ * AioWait:
+ *
+ * An object that facilitates synchronous waiting on a condition. The main
+ * loop can wait on an operation running in an IOThread as follows:
+ *
+ * AioWait *wait = ...;
+ * AioContext *ctx = ...;
+ * MyWork work = { .done = false };
+ * schedule_my_work_in_iothread(ctx, &work);
+ * AIO_WAIT_WHILE(wait, ctx, !work.done);
+ *
+ * The IOThread must call aio_wait_kick() to notify the main loop when
+ * work.done changes:
+ *
+ * static void do_work(...)
+ * {
+ * ...
+ * work.done = true;
+ * aio_wait_kick(wait);
+ * }
+ */
+typedef struct {
+ /* Is the main loop waiting for a kick? Accessed with atomic ops. */
+ bool need_kick;
+} AioWait;
+
+/**
+ * AIO_WAIT_WHILE:
+ * @wait: the aio wait object
+ * @ctx: the aio context
+ * @cond: wait while this conditional expression is true
+ *
+ * Wait while a condition is true. Use this to implement synchronous
+ * operations that require event loop activity.
+ *
+ * The caller must be sure that something calls aio_wait_kick() when the value
+ * of @cond might have changed.
+ *
+ * The caller's thread must be the IOThread that owns @ctx or the main loop
+ * thread (with @ctx acquired exactly once). This function cannot be used to
+ * wait on conditions between two IOThreads since that could lead to deadlock,
+ * go via the main loop instead.
+ */
+#define AIO_WAIT_WHILE(wait, ctx, cond) ({ \
+ bool waited_ = false; \
+ bool busy_ = true; \
+ AioWait *wait_ = (wait); \
+ AioContext *ctx_ = (ctx); \
+ if (in_aio_context_home_thread(ctx_)) { \
+ while ((cond) || busy_) { \
+ busy_ = aio_poll(ctx_, (cond)); \
+ waited_ |= !!(cond) | busy_; \
+ } \
+ } else { \
+ assert(qemu_get_current_aio_context() == \
+ qemu_get_aio_context()); \
+ assert(!wait_->need_kick); \
+ /* Set wait_->need_kick before evaluating cond. */ \
+ atomic_mb_set(&wait_->need_kick, true); \
+ while (busy_) { \
+ if ((cond)) { \
+ waited_ = busy_ = true; \
+ aio_context_release(ctx_); \
+ aio_poll(qemu_get_aio_context(), true); \
+ aio_context_acquire(ctx_); \
+ } else { \
+ busy_ = aio_poll(ctx_, false); \
+ waited_ |= busy_; \
+ } \
+ } \
+ atomic_set(&wait_->need_kick, false); \
+ } \
+ waited_; })
+
+/**
+ * aio_wait_kick:
+ * @wait: the aio wait object that should re-evaluate its condition
+ *
+ * Wake up the main thread if it is waiting on AIO_WAIT_WHILE(). During
+ * synchronous operations performed in an IOThread, the main thread lets the
+ * IOThread's event loop run, waiting for the operation to complete. A
+ * aio_wait_kick() call will wake up the main thread.
+ */
+void aio_wait_kick(AioWait *wait);
+
+#endif /* QEMU_AIO_WAIT */
diff --git a/include/block/aio.h b/include/block/aio.h
index e9aeeaec94..a1d6b9e249 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -534,11 +534,14 @@ void aio_co_enter(AioContext *ctx, struct Coroutine *co);
AioContext *qemu_get_current_aio_context(void);
/**
+ * in_aio_context_home_thread:
* @ctx: the aio context
*
- * Return whether we are running in the I/O thread that manages @ctx.
+ * Return whether we are running in the thread that normally runs @ctx. Note
+ * that acquiring/releasing ctx does not affect the outcome, each AioContext
+ * still only has one home thread that is responsible for running it.
*/
-static inline bool aio_context_in_iothread(AioContext *ctx)
+static inline bool in_aio_context_home_thread(AioContext *ctx)
{
return ctx == qemu_get_current_aio_context();
}
diff --git a/include/block/block.h b/include/block/block.h
index fac401ba3e..8b6db952a2 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -3,6 +3,7 @@
#include "block/aio.h"
#include "qapi/qapi-types-block-core.h"
+#include "block/aio-wait.h"
#include "qemu/iov.h"
#include "qemu/coroutine.h"
#include "block/accounting.h"
@@ -115,19 +116,19 @@ typedef struct HDGeometry {
* BDRV_BLOCK_ZERO: offset reads as zero
* BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data
* BDRV_BLOCK_ALLOCATED: the content of the block is determined by this
- * layer (short for DATA || ZERO), set by block layer
- * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this layer
+ * layer rather than any backing, set by block layer
+ * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this
+ * layer, set by block layer
*
* Internal flag:
* BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request
* that the block layer recompute the answer from the returned
* BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID.
*
- * If BDRV_BLOCK_OFFSET_VALID is set, bits 9-62 (BDRV_BLOCK_OFFSET_MASK) of
- * the return value (old interface) or the entire map parameter (new
- * interface) represent the offset in the returned BDS that is allocated for
- * the corresponding raw data. However, whether that offset actually
- * contains data also depends on BDRV_BLOCK_DATA, as follows:
+ * If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the
+ * host offset within the returned BDS that is allocated for the
+ * corresponding raw guest data. However, whether that offset
+ * actually contains data also depends on BDRV_BLOCK_DATA, as follows:
*
* DATA ZERO OFFSET_VALID
* t t t sectors read as zero, returned file is zero at offset
@@ -367,41 +368,14 @@ void bdrv_drain_all_begin(void);
void bdrv_drain_all_end(void);
void bdrv_drain_all(void);
+/* Returns NULL when bs == NULL */
+AioWait *bdrv_get_aio_wait(BlockDriverState *bs);
+
#define BDRV_POLL_WHILE(bs, cond) ({ \
- bool waited_ = false; \
- bool busy_ = true; \
BlockDriverState *bs_ = (bs); \
- AioContext *ctx_ = bdrv_get_aio_context(bs_); \
- if (aio_context_in_iothread(ctx_)) { \
- while ((cond) || busy_) { \
- busy_ = aio_poll(ctx_, (cond)); \
- waited_ |= !!(cond) | busy_; \
- } \
- } else { \
- assert(qemu_get_current_aio_context() == \
- qemu_get_aio_context()); \
- /* Ask bdrv_dec_in_flight to wake up the main \
- * QEMU AioContext. Extra I/O threads never take \
- * other I/O threads' AioContexts (see for example \
- * block_job_defer_to_main_loop for how to do it). \
- */ \
- assert(!bs_->wakeup); \
- /* Set bs->wakeup before evaluating cond. */ \
- atomic_mb_set(&bs_->wakeup, true); \
- while (busy_) { \
- if ((cond)) { \
- waited_ = busy_ = true; \
- aio_context_release(ctx_); \
- aio_poll(qemu_get_aio_context(), true); \
- aio_context_acquire(ctx_); \
- } else { \
- busy_ = aio_poll(ctx_, false); \
- waited_ |= busy_; \
- } \
- } \
- atomic_set(&bs_->wakeup, false); \
- } \
- waited_; })
+ AIO_WAIT_WHILE(bdrv_get_aio_wait(bs_), \
+ bdrv_get_aio_context(bs_), \
+ cond); })
int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int bytes);
int bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 5ea63f8fa8..64a5700f2b 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -26,6 +26,7 @@
#include "block/accounting.h"
#include "block/block.h"
+#include "block/aio-wait.h"
#include "qemu/queue.h"
#include "qemu/coroutine.h"
#include "qemu/stats64.h"
@@ -128,7 +129,8 @@ struct BlockDriver {
int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags,
Error **errp);
void (*bdrv_close)(BlockDriverState *bs);
- int (*bdrv_create)(const char *filename, QemuOpts *opts, Error **errp);
+ int coroutine_fn (*bdrv_co_create_opts)(const char *filename, QemuOpts *opts,
+ Error **errp);
int (*bdrv_make_empty)(BlockDriverState *bs);
void (*bdrv_refresh_filename)(BlockDriverState *bs, QDict *options);
@@ -202,15 +204,22 @@ struct BlockDriver {
/*
* Building block for bdrv_block_status[_above] and
* bdrv_is_allocated[_above]. The driver should answer only
- * according to the current layer, and should not set
- * BDRV_BLOCK_ALLOCATED, but may set BDRV_BLOCK_RAW. See block.h
- * for the meaning of _DATA, _ZERO, and _OFFSET_VALID. The block
- * layer guarantees input aligned to request_alignment, as well as
- * non-NULL pnum and file.
+ * according to the current layer, and should only need to set
+ * BDRV_BLOCK_DATA, BDRV_BLOCK_ZERO, BDRV_BLOCK_OFFSET_VALID,
+ * and/or BDRV_BLOCK_RAW; if the current layer defers to a backing
+ * layer, the result should be 0 (and not BDRV_BLOCK_ZERO). See
+ * block.h for the overall meaning of the bits. As a hint, the
+ * flag want_zero is true if the caller cares more about precise
+ * mappings (favor accurate _OFFSET_VALID/_ZERO) or false for
+ * overall allocation (favor larger *pnum, perhaps by reporting
+ * _DATA instead of _ZERO). The block layer guarantees input
+ * clamped to bdrv_getlength() and aligned to request_alignment,
+ * as well as non-NULL pnum, map, and file; in turn, the driver
+ * must return an error or set pnum to an aligned non-zero value.
*/
- int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum,
- BlockDriverState **file);
+ int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs,
+ bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
+ int64_t *map, BlockDriverState **file);
/*
* Invalidate any cached meta-data.
@@ -709,10 +718,8 @@ struct BlockDriverState {
unsigned int in_flight;
unsigned int serialising_in_flight;
- /* Internal to BDRV_POLL_WHILE and bdrv_wakeup. Accessed with atomic
- * ops.
- */
- bool wakeup;
+ /* Kicked to signal main loop when a request completes. */
+ AioWait wait;
/* counter for nested bdrv_io_plug.
* Accessed with atomic ops.
@@ -1031,23 +1038,27 @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c,
uint64_t *nperm, uint64_t *nshared);
/*
- * Default implementation for drivers to pass bdrv_co_get_block_status() to
+ * Default implementation for drivers to pass bdrv_co_block_status() to
* their file.
*/
-int64_t coroutine_fn bdrv_co_get_block_status_from_file(BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors,
- int *pnum,
- BlockDriverState **file);
+int coroutine_fn bdrv_co_block_status_from_file(BlockDriverState *bs,
+ bool want_zero,
+ int64_t offset,
+ int64_t bytes,
+ int64_t *pnum,
+ int64_t *map,
+ BlockDriverState **file);
/*
- * Default implementation for drivers to pass bdrv_co_get_block_status() to
+ * Default implementation for drivers to pass bdrv_co_block_status() to
* their backing file.
*/
-int64_t coroutine_fn bdrv_co_get_block_status_from_backing(BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors,
- int *pnum,
- BlockDriverState **file);
+int coroutine_fn bdrv_co_block_status_from_backing(BlockDriverState *bs,
+ bool want_zero,
+ int64_t offset,
+ int64_t bytes,
+ int64_t *pnum,
+ int64_t *map,
+ BlockDriverState **file);
const char *bdrv_get_parent_name(const BlockDriverState *bs);
void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp);
bool blk_dev_has_removable_media(BlockBackend *blk);