aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2022-03-05 10:59:03 +0000
committerPeter Maydell <peter.maydell@linaro.org>2022-03-05 10:59:04 +0000
commitd7e2fe4aac8b74bbfe82b2309536528b4dbe0d34 (patch)
tree8599bc6aee66060dbfd9d2cd1955bd1eea3af236
parent5c8463886d50eeb0337bd121ab877cf692731e36 (diff)
parent78fa41fc671eae51fd3390a12a041d1c4a241c66 (diff)
Merge remote-tracking branch 'remotes/kwolf-gitlab/tags/for-upstream' into staging
Block layer patches - qemu-storage-daemon: Add --daemonize - Fix x-blockdev-amend and block node activation code which incorrectly executed code in the iothread that must run in the main thread. - Add macros for coroutine-safe TLS variables (required for correctness with LTO) - Fix crashes with concurrent I/O and bdrv_refresh_limits() - Split block APIs in global state and I/O - iotests: Don't refuse to run at all without GNU sed, just skip tests that need it # gpg: Signature made Fri 04 Mar 2022 17:18:31 GMT # gpg: using RSA key DC3DEB159A9AF95D3D7456FE7F09B272C88F2FD6 # gpg: issuer "kwolf@redhat.com" # gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full] # Primary key fingerprint: DC3D EB15 9A9A F95D 3D74 56FE 7F09 B272 C88F 2FD6 * remotes/kwolf-gitlab/tags/for-upstream: (50 commits) block/amend: Keep strong reference to BDS block/amend: Always call .bdrv_amend_clean() tests/qemu-iotests: Rework the checks and spots using GNU sed iotests/graph-changes-while-io: New test iotests: Allow using QMP with the QSD block: Make bdrv_refresh_limits() non-recursive job.h: assertions in the callers of JobDriver function pointers job.h: split function pointers in JobDriver block-backend-common.h: split function pointers in BlockDevOps block_int-common.h: assertions in the callers of BdrvChildClass function pointers block_int-common.h: split function pointers in BdrvChildClass block_int-common.h: assertions in the callers of BlockDriver function pointers block_int-common.h: split function pointers in BlockDriver block/coroutines: I/O and "I/O or GS" API block/copy-before-write.h: global state API + assertions include/block/snapshot: global state API + assertions assertions for blockdev.h global state API include/sysemu/blockdev.h: global state API assertions for blockjob.h global state API include/block/blockjob.h: global state API ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--block.c321
-rw-r--r--block/amend.c28
-rw-r--r--block/backup.c1
-rw-r--r--block/block-backend.c166
-rw-r--r--block/commit.c4
-rw-r--r--block/copy-before-write.c2
-rw-r--r--block/copy-before-write.h7
-rw-r--r--block/coroutines.h81
-rw-r--r--block/create.c2
-rw-r--r--block/crypto.c68
-rw-r--r--block/dirty-bitmap.c5
-rw-r--r--block/export/export.c2
-rw-r--r--block/export/fuse.c25
-rw-r--r--block/io.c75
-rw-r--r--block/meson.build7
-rw-r--r--block/mirror.c4
-rw-r--r--block/monitor/bitmap-qmp-cmds.c6
-rw-r--r--block/nbd.c1
-rw-r--r--block/parallels.c2
-rw-r--r--block/snapshot.c28
-rw-r--r--block/stream.c2
-rw-r--r--blockdev.c29
-rw-r--r--blockjob.c16
-rw-r--r--docs/tools/qemu-storage-daemon.rst7
-rw-r--r--hw/block/pflash_cfi01.c2
-rw-r--r--hw/nvram/spapr_nvram.c2
-rw-r--r--include/block/block-common.h418
-rw-r--r--include/block/block-global-state.h253
-rw-r--r--include/block/block-io.h368
-rw-r--r--include/block/block.h878
-rw-r--r--include/block/block_int-common.h1222
-rw-r--r--include/block/block_int-global-state.h329
-rw-r--r--include/block/block_int-io.h185
-rw-r--r--include/block/block_int.h1475
-rw-r--r--include/block/blockjob.h29
-rw-r--r--include/block/blockjob_int.h28
-rw-r--r--include/block/snapshot.h13
-rw-r--r--include/qemu/coroutine-tls.h165
-rw-r--r--include/qemu/job.h22
-rw-r--r--include/qemu/main-loop.h42
-rw-r--r--include/qemu/rcu.h7
-rw-r--r--include/sysemu/block-backend-common.h102
-rw-r--r--include/sysemu/block-backend-global-state.h116
-rw-r--r--include/sysemu/block-backend-io.h161
-rw-r--r--include/sysemu/block-backend.h269
-rw-r--r--include/sysemu/blockdev.h13
-rw-r--r--include/sysemu/os-posix.h1
-rw-r--r--include/sysemu/os-win32.h8
-rw-r--r--job.c10
-rw-r--r--migration/block.c2
-rw-r--r--migration/migration.c14
-rw-r--r--migration/savevm.c8
-rw-r--r--monitor/qmp-cmds.c2
-rw-r--r--os-posix.c6
-rw-r--r--softmmu/cpus.c14
-rw-r--r--softmmu/qdev-monitor.c2
-rw-r--r--storage-daemon/qemu-storage-daemon.c58
-rw-r--r--stubs/iothread-lock-block.c8
-rw-r--r--stubs/meson.build3
-rwxr-xr-xtests/check-block.sh12
-rwxr-xr-xtests/qemu-iotests/185190
-rw-r--r--tests/qemu-iotests/185.out48
-rwxr-xr-xtests/qemu-iotests/2712
-rwxr-xr-xtests/qemu-iotests/2968
-rw-r--r--tests/qemu-iotests/296.out17
-rw-r--r--tests/qemu-iotests/common.filter65
-rw-r--r--tests/qemu-iotests/common.rc45
-rw-r--r--tests/qemu-iotests/iotests.py32
-rwxr-xr-xtests/qemu-iotests/tests/graph-changes-while-io91
-rw-r--r--tests/qemu-iotests/tests/graph-changes-while-io.out5
-rw-r--r--tests/unit/rcutorture.c10
-rw-r--r--tests/unit/test-block-iothread.c8
-rw-r--r--tests/unit/test-rcu-list.c4
-rw-r--r--util/async.c12
-rw-r--r--util/rcu.c10
75 files changed, 4860 insertions, 2823 deletions
diff --git a/block.c b/block.c
index b54d59d1fa..718e4cae8b 100644
--- a/block.c
+++ b/block.c
@@ -67,12 +67,15 @@
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
+/* Protected by BQL */
static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
+/* Protected by BQL */
static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
QTAILQ_HEAD_INITIALIZER(all_bdrv_states);
+/* Protected by BQL */
static QLIST_HEAD(, BlockDriver) bdrv_drivers =
QLIST_HEAD_INITIALIZER(bdrv_drivers);
@@ -134,6 +137,7 @@ size_t bdrv_opt_mem_align(BlockDriverState *bs)
/* page size or 4k (hdd sector size) should be on the safe side */
return MAX(4096, qemu_real_host_page_size);
}
+ IO_CODE();
return bs->bl.opt_mem_alignment;
}
@@ -144,6 +148,7 @@ size_t bdrv_min_mem_align(BlockDriverState *bs)
/* page size or 4k (hdd sector size) should be on the safe side */
return MAX(4096, qemu_real_host_page_size);
}
+ IO_CODE();
return bs->bl.min_mem_alignment;
}
@@ -269,12 +274,15 @@ void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
* image is inactivated. */
bool bdrv_is_read_only(BlockDriverState *bs)
{
+ IO_CODE();
return !(bs->open_flags & BDRV_O_RDWR);
}
int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
bool ignore_allow_rdw, Error **errp)
{
+ IO_CODE();
+
/* Do not set read_only if copy_on_read is enabled */
if (bs->copy_on_read && read_only) {
error_setg(errp, "Can't set node '%s' to r/o with copy-on-read enabled",
@@ -308,6 +316,7 @@ int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
Error **errp)
{
int ret = 0;
+ IO_CODE();
if (!(bs->open_flags & BDRV_O_RDWR)) {
return 0;
@@ -384,12 +393,14 @@ static char *bdrv_make_absolute_filename(BlockDriverState *relative_to,
char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp)
{
+ GLOBAL_STATE_CODE();
return bdrv_make_absolute_filename(bs, bs->backing_file, errp);
}
void bdrv_register(BlockDriver *bdrv)
{
assert(bdrv->format_name);
+ GLOBAL_STATE_CODE();
QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
}
@@ -398,6 +409,8 @@ BlockDriverState *bdrv_new(void)
BlockDriverState *bs;
int i;
+ GLOBAL_STATE_CODE();
+
bs = g_new0(BlockDriverState, 1);
QLIST_INIT(&bs->dirty_bitmaps);
for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
@@ -425,6 +438,7 @@ BlockDriverState *bdrv_new(void)
static BlockDriver *bdrv_do_find_format(const char *format_name)
{
BlockDriver *drv1;
+ GLOBAL_STATE_CODE();
QLIST_FOREACH(drv1, &bdrv_drivers, list) {
if (!strcmp(drv1->format_name, format_name)) {
@@ -440,6 +454,8 @@ BlockDriver *bdrv_find_format(const char *format_name)
BlockDriver *drv1;
int i;
+ GLOBAL_STATE_CODE();
+
drv1 = bdrv_do_find_format(format_name);
if (drv1) {
return drv1;
@@ -489,6 +505,7 @@ static int bdrv_format_is_whitelisted(const char *format_name, bool read_only)
int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
{
+ GLOBAL_STATE_CODE();
return bdrv_format_is_whitelisted(drv->format_name, read_only);
}
@@ -512,6 +529,7 @@ static void coroutine_fn bdrv_create_co_entry(void *opaque)
CreateCo *cco = opaque;
assert(cco->drv);
+ GLOBAL_STATE_CODE();
ret = cco->drv->bdrv_co_create_opts(cco->drv,
cco->filename, cco->opts, &local_err);
@@ -524,6 +542,8 @@ int bdrv_create(BlockDriver *drv, const char* filename,
{
int ret;
+ GLOBAL_STATE_CODE();
+
Coroutine *co;
CreateCo cco = {
.drv = drv,
@@ -578,6 +598,8 @@ static int64_t create_file_fallback_truncate(BlockBackend *blk,
int64_t size;
int ret;
+ GLOBAL_STATE_CODE();
+
ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0,
&local_err);
if (ret < 0 && ret != -ENOTSUP) {
@@ -616,6 +638,8 @@ static int create_file_fallback_zero_first_sector(BlockBackend *blk,
int64_t bytes_to_clear;
int ret;
+ GLOBAL_STATE_CODE();
+
bytes_to_clear = MIN(current_size, BDRV_SECTOR_SIZE);
if (bytes_to_clear) {
ret = blk_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP);
@@ -647,6 +671,8 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
Error *local_err = NULL;
int ret;
+ GLOBAL_STATE_CODE();
+
size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
prealloc = qapi_enum_parse(&PreallocMode_lookup, buf,
@@ -699,6 +725,8 @@ int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
QDict *qdict;
int ret;
+ GLOBAL_STATE_CODE();
+
drv = bdrv_find_protocol(filename, true, errp);
if (drv == NULL) {
return -ENOENT;
@@ -743,6 +771,7 @@ int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp)
Error *local_err = NULL;
int ret;
+ IO_CODE();
assert(bs != NULL);
if (!bs->drv) {
@@ -768,6 +797,7 @@ void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs)
{
Error *local_err = NULL;
int ret;
+ IO_CODE();
if (!bs) {
return;
@@ -796,6 +826,7 @@ int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
{
BlockDriver *drv = bs->drv;
BlockDriverState *filtered = bdrv_filter_bs(bs);
+ GLOBAL_STATE_CODE();
if (drv && drv->bdrv_probe_blocksizes) {
return drv->bdrv_probe_blocksizes(bs, bsz);
@@ -816,6 +847,7 @@ int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
{
BlockDriver *drv = bs->drv;
BlockDriverState *filtered = bdrv_filter_bs(bs);
+ GLOBAL_STATE_CODE();
if (drv && drv->bdrv_probe_geometry) {
return drv->bdrv_probe_geometry(bs, geo);
@@ -870,6 +902,7 @@ static BlockDriver *find_hdev_driver(const char *filename)
{
int score_max = 0, score;
BlockDriver *drv = NULL, *d;
+ GLOBAL_STATE_CODE();
QLIST_FOREACH(d, &bdrv_drivers, list) {
if (d->bdrv_probe_device) {
@@ -887,6 +920,7 @@ static BlockDriver *find_hdev_driver(const char *filename)
static BlockDriver *bdrv_do_find_protocol(const char *protocol)
{
BlockDriver *drv1;
+ GLOBAL_STATE_CODE();
QLIST_FOREACH(drv1, &bdrv_drivers, list) {
if (drv1->protocol_name && !strcmp(drv1->protocol_name, protocol)) {
@@ -907,6 +941,7 @@ BlockDriver *bdrv_find_protocol(const char *filename,
const char *p;
int i;
+ GLOBAL_STATE_CODE();
/* TODO Drivers without bdrv_file_open must be specified explicitly */
/*
@@ -972,6 +1007,7 @@ BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
{
int score_max = 0, score;
BlockDriver *drv = NULL, *d;
+ IO_CODE();
QLIST_FOREACH(d, &bdrv_drivers, list) {
if (d->bdrv_probe) {
@@ -993,6 +1029,8 @@ static int find_image_format(BlockBackend *file, const char *filename,
uint8_t buf[BLOCK_PROBE_BUF_SIZE];
int ret = 0;
+ GLOBAL_STATE_CODE();
+
/* Return the raw BlockDriver * to scsi-generic devices or empty drives */
if (blk_is_sg(file) || !blk_is_inserted(file) || blk_getlength(file) == 0) {
*pdrv = &bdrv_raw;
@@ -1024,6 +1062,7 @@ static int find_image_format(BlockBackend *file, const char *filename,
int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
{
BlockDriver *drv = bs->drv;
+ IO_CODE();
if (!drv) {
return -ENOMEDIUM;
@@ -1058,6 +1097,7 @@ int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
static void bdrv_join_options(BlockDriverState *bs, QDict *options,
QDict *old_options)
{
+ GLOBAL_STATE_CODE();
if (bs->drv && bs->drv->bdrv_join_options) {
bs->drv->bdrv_join_options(options, old_options);
} else {
@@ -1074,6 +1114,7 @@ static BlockdevDetectZeroesOptions bdrv_parse_detect_zeroes(QemuOpts *opts,
BlockdevDetectZeroesOptions detect_zeroes =
qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup, value,
BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, &local_err);
+ GLOBAL_STATE_CODE();
g_free(value);
if (local_err) {
error_propagate(errp, local_err);
@@ -1189,6 +1230,7 @@ static void bdrv_child_cb_drained_end(BdrvChild *child,
static int bdrv_child_cb_inactivate(BdrvChild *child)
{
BlockDriverState *bs = child->opaque;
+ GLOBAL_STATE_CODE();
assert(bs->open_flags & BDRV_O_INACTIVE);
return 0;
}
@@ -1215,6 +1257,7 @@ static void bdrv_child_cb_set_aio_ctx(BdrvChild *child, AioContext *ctx,
static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
int parent_flags, QDict *parent_options)
{
+ GLOBAL_STATE_CODE();
*child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
/* For temporary files, unconditional cache=unsafe is fine */
@@ -1235,6 +1278,7 @@ static void bdrv_backing_attach(BdrvChild *c)
BlockDriverState *parent = c->opaque;
BlockDriverState *backing_hd = c->bs;
+ GLOBAL_STATE_CODE();
assert(!parent->backing_blocker);
error_setg(&parent->backing_blocker,
"node is used as backing hd of '%s'",
@@ -1273,6 +1317,7 @@ static void bdrv_backing_detach(BdrvChild *c)
{
BlockDriverState *parent = c->opaque;
+ GLOBAL_STATE_CODE();
assert(parent->backing_blocker);
bdrv_op_unblock_all(c->bs, parent->backing_blocker);
error_free(parent->backing_blocker);
@@ -1285,6 +1330,7 @@ static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base,
BlockDriverState *parent = c->opaque;
bool read_only = bdrv_is_read_only(parent);
int ret;
+ GLOBAL_STATE_CODE();
if (read_only) {
ret = bdrv_reopen_set_read_only(parent, false, errp);
@@ -1316,6 +1362,7 @@ static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format,
int parent_flags, QDict *parent_options)
{
int flags = parent_flags;
+ GLOBAL_STATE_CODE();
/*
* First, decide whether to set, clear, or leave BDRV_O_PROTOCOL.
@@ -1391,6 +1438,7 @@ static void bdrv_child_cb_attach(BdrvChild *child)
{
BlockDriverState *bs = child->opaque;
+ assert_bdrv_graph_writable(bs);
QLIST_INSERT_HEAD(&bs->children, child, next);
if (child->role & BDRV_CHILD_COW) {
@@ -1410,6 +1458,7 @@ static void bdrv_child_cb_detach(BdrvChild *child)
bdrv_unapply_subtree_drain(child, bs);
+ assert_bdrv_graph_writable(bs);
QLIST_REMOVE(child, next);
}
@@ -1425,6 +1474,7 @@ static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base,
AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c)
{
BlockDriverState *bs = c->opaque;
+ IO_CODE();
return bdrv_get_aio_context(bs);
}
@@ -1447,12 +1497,14 @@ const BdrvChildClass child_of_bds = {
AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c)
{
+ GLOBAL_STATE_CODE();
return c->klass->get_parent_aio_context(c);
}
static int bdrv_open_flags(BlockDriverState *bs, int flags)
{
int open_flags = flags;
+ GLOBAL_STATE_CODE();
/*
* Clear flags that are internal to the block layer before opening the
@@ -1465,6 +1517,8 @@ static int bdrv_open_flags(BlockDriverState *bs, int flags)
static void update_flags_from_options(int *flags, QemuOpts *opts)
{
+ GLOBAL_STATE_CODE();
+
*flags &= ~(BDRV_O_CACHE_MASK | BDRV_O_RDWR | BDRV_O_AUTO_RDONLY);
if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
@@ -1486,6 +1540,7 @@ static void update_flags_from_options(int *flags, QemuOpts *opts)
static void update_options_from_flags(QDict *options, int flags)
{
+ GLOBAL_STATE_CODE();
if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
qdict_put_bool(options, BDRV_OPT_CACHE_DIRECT, flags & BDRV_O_NOCACHE);
}
@@ -1507,6 +1562,7 @@ static void bdrv_assign_node_name(BlockDriverState *bs,
Error **errp)
{
char *gen_node_name = NULL;
+ GLOBAL_STATE_CODE();
if (!node_name) {
node_name = gen_node_name = id_generate(ID_BLOCK);
@@ -1551,6 +1607,7 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
{
Error *local_err = NULL;
int i, ret;
+ GLOBAL_STATE_CODE();
bdrv_assign_node_name(bs, node_name, &local_err);
if (local_err) {
@@ -1631,6 +1688,8 @@ BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv,
BlockDriverState *bs;
int ret;
+ GLOBAL_STATE_CODE();
+
bs = bdrv_new();
bs->open_flags = flags;
bs->options = options ?: qdict_new();
@@ -1656,6 +1715,7 @@ BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv,
BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
int flags, Error **errp)
{
+ GLOBAL_STATE_CODE();
return bdrv_new_open_driver_opts(drv, node_name, NULL, flags, errp);
}
@@ -1750,6 +1810,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
assert(bs->file == NULL);
assert(options != NULL && bs->options != options);
+ GLOBAL_STATE_CODE();
opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
if (!qemu_opts_absorb_qdict(opts, options, errp)) {
@@ -1875,6 +1936,7 @@ static QDict *parse_json_filename(const char *filename, Error **errp)
QObject *options_obj;
QDict *options;
int ret;
+ GLOBAL_STATE_CODE();
ret = strstart(filename, "json:", &filename);
assert(ret);
@@ -1902,6 +1964,7 @@ static void parse_json_protocol(QDict *options, const char **pfilename,
{
QDict *json_options;
Error *local_err = NULL;
+ GLOBAL_STATE_CODE();
/* Parse json: pseudo-protocol */
if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
@@ -1936,6 +1999,8 @@ static int bdrv_fill_options(QDict **options, const char *filename,
BlockDriver *drv = NULL;
Error *local_err = NULL;
+ GLOBAL_STATE_CODE();
+
/*
* Caution: while qdict_get_try_str() is fine, getting non-string
* types would require more care. When @options come from
@@ -2057,11 +2122,13 @@ static bool bdrv_is_writable_after_reopen(BlockDriverState *bs,
*/
bool bdrv_is_writable(BlockDriverState *bs)
{
+ IO_CODE();
return bdrv_is_writable_after_reopen(bs, NULL);
}
static char *bdrv_child_user_desc(BdrvChild *c)
{
+ GLOBAL_STATE_CODE();
return c->klass->get_parent_desc(c);
}
@@ -2078,6 +2145,7 @@ static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp)
assert(a->bs);
assert(a->bs == b->bs);
+ GLOBAL_STATE_CODE();
if ((b->perm & a->shared_perm) == b->perm) {
return true;
@@ -2101,6 +2169,7 @@ static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp)
static bool bdrv_parent_perms_conflict(BlockDriverState *bs, Error **errp)
{
BdrvChild *a, *b;
+ GLOBAL_STATE_CODE();
/*
* During the loop we'll look at each pair twice. That's correct because
@@ -2129,6 +2198,7 @@ static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs,
uint64_t *nperm, uint64_t *nshared)
{
assert(bs->drv && bs->drv->bdrv_child_perm);
+ GLOBAL_STATE_CODE();
bs->drv->bdrv_child_perm(bs, c, role, reopen_queue,
parent_perm, parent_shared,
nperm, nshared);
@@ -2155,6 +2225,8 @@ static GSList *bdrv_topological_dfs(GSList *list, GHashTable *found,
BdrvChild *child;
g_autoptr(GHashTable) local_found = NULL;
+ GLOBAL_STATE_CODE();
+
if (!found) {
assert(!list);
found = local_found = g_hash_table_new(NULL, NULL);
@@ -2182,6 +2254,8 @@ static void bdrv_child_set_perm_abort(void *opaque)
{
BdrvChildSetPermState *s = opaque;
+ GLOBAL_STATE_CODE();
+
s->child->perm = s->old_perm;
s->child->shared_perm = s->old_shared_perm;
}
@@ -2195,6 +2269,7 @@ static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm,
uint64_t shared, Transaction *tran)
{
BdrvChildSetPermState *s = g_new(BdrvChildSetPermState, 1);
+ GLOBAL_STATE_CODE();
*s = (BdrvChildSetPermState) {
.child = c,
@@ -2212,6 +2287,7 @@ static void bdrv_drv_set_perm_commit(void *opaque)
{
BlockDriverState *bs = opaque;
uint64_t cumulative_perms, cumulative_shared_perms;
+ GLOBAL_STATE_CODE();
if (bs->drv->bdrv_set_perm) {
bdrv_get_cumulative_perm(bs, &cumulative_perms,
@@ -2223,6 +2299,7 @@ static void bdrv_drv_set_perm_commit(void *opaque)
static void bdrv_drv_set_perm_abort(void *opaque)
{
BlockDriverState *bs = opaque;
+ GLOBAL_STATE_CODE();
if (bs->drv->bdrv_abort_perm_update) {
bs->drv->bdrv_abort_perm_update(bs);
@@ -2238,6 +2315,7 @@ static int bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm,
uint64_t shared_perm, Transaction *tran,
Error **errp)
{
+ GLOBAL_STATE_CODE();
if (!bs->drv) {
return 0;
}
@@ -2266,6 +2344,7 @@ typedef struct BdrvReplaceChildState {
static void bdrv_replace_child_commit(void *opaque)
{
BdrvReplaceChildState *s = opaque;
+ GLOBAL_STATE_CODE();
if (s->free_empty_child && !s->child->bs) {
bdrv_child_free(s->child);
@@ -2278,6 +2357,7 @@ static void bdrv_replace_child_abort(void *opaque)
BdrvReplaceChildState *s = opaque;
BlockDriverState *new_bs = s->child->bs;
+ GLOBAL_STATE_CODE();
/*
* old_bs reference is transparently moved from @s to s->child.
*
@@ -2374,6 +2454,7 @@ static int bdrv_node_refresh_perm(BlockDriverState *bs, BlockReopenQueue *q,
BdrvChild *c;
int ret;
uint64_t cumulative_perms, cumulative_shared_perms;
+ GLOBAL_STATE_CODE();
bdrv_get_cumulative_perm(bs, &cumulative_perms, &cumulative_shared_perms);
@@ -2442,6 +2523,7 @@ static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q,
{
int ret;
BlockDriverState *bs;
+ GLOBAL_STATE_CODE();
for ( ; list; list = list->next) {
bs = list->data;
@@ -2466,6 +2548,8 @@ void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
uint64_t cumulative_perms = 0;
uint64_t cumulative_shared_perms = BLK_PERM_ALL;
+ GLOBAL_STATE_CODE();
+
QLIST_FOREACH(c, &bs->parents, next_parent) {
cumulative_perms |= c->perm;
cumulative_shared_perms &= c->shared_perm;
@@ -2509,6 +2593,7 @@ static int bdrv_refresh_perms(BlockDriverState *bs, Error **errp)
int ret;
Transaction *tran = tran_new();
g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs);
+ GLOBAL_STATE_CODE();
ret = bdrv_list_refresh_perms(list, NULL, tran, errp);
tran_finalize(tran, ret);
@@ -2523,6 +2608,8 @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
Transaction *tran = tran_new();
int ret;
+ GLOBAL_STATE_CODE();
+
bdrv_child_set_perm(c, perm, shared, tran);
ret = bdrv_refresh_perms(c->bs, &local_err);
@@ -2553,6 +2640,8 @@ int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp)
uint64_t parent_perms, parent_shared;
uint64_t perms, shared;
+ GLOBAL_STATE_CODE();
+
bdrv_get_cumulative_perm(bs, &parent_perms, &parent_shared);
bdrv_child_perm(bs, c->bs, c, c->role, NULL,
parent_perms, parent_shared, &perms, &shared);
@@ -2571,6 +2660,7 @@ static void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
{
+ GLOBAL_STATE_CODE();
*nperm = perm & DEFAULT_PERM_PASSTHROUGH;
*nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED;
}
@@ -2582,6 +2672,7 @@ static void bdrv_default_perms_for_cow(BlockDriverState *bs, BdrvChild *c,
uint64_t *nperm, uint64_t *nshared)
{
assert(role & BDRV_CHILD_COW);
+ GLOBAL_STATE_CODE();
/*
* We want consistent read from backing files if the parent needs it.
@@ -2618,6 +2709,7 @@ static void bdrv_default_perms_for_storage(BlockDriverState *bs, BdrvChild *c,
{
int flags;
+ GLOBAL_STATE_CODE();
assert(role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA));
flags = bdrv_reopen_get_flags(reopen_queue, bs);
@@ -2694,6 +2786,7 @@ void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
{
+ GLOBAL_STATE_CODE();
if (role & BDRV_CHILD_FILTERED) {
assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA |
BDRV_CHILD_COW)));
@@ -2752,6 +2845,7 @@ static void bdrv_replace_child_noperm(BdrvChild **childp,
assert(!child->frozen);
assert(old_bs != new_bs);
+ GLOBAL_STATE_CODE();
if (old_bs && new_bs) {
assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
@@ -2776,6 +2870,7 @@ static void bdrv_replace_child_noperm(BdrvChild **childp,
if (child->klass->detach) {
child->klass->detach(child);
}
+ assert_bdrv_graph_writable(old_bs);
QLIST_REMOVE(child, next_parent);
}
@@ -2785,6 +2880,7 @@ static void bdrv_replace_child_noperm(BdrvChild **childp,
}
if (new_bs) {
+ assert_bdrv_graph_writable(new_bs);
QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
/*
@@ -2827,6 +2923,7 @@ static void bdrv_replace_child_noperm(BdrvChild **childp,
static void bdrv_child_free(BdrvChild *child)
{
assert(!child->bs);
+ GLOBAL_STATE_CODE();
assert(!child->next.le_prev); /* not in children list */
g_free(child->name);
@@ -2845,6 +2942,7 @@ static void bdrv_attach_child_common_abort(void *opaque)
BdrvChild *child = *s->child;
BlockDriverState *bs = child->bs;
+ GLOBAL_STATE_CODE();
/*
* Pass free_empty_child=false, because we still need the child
* for the AioContext operations on the parent below; those
@@ -2907,6 +3005,7 @@ static int bdrv_attach_child_common(BlockDriverState *child_bs,
assert(child);
assert(*child == NULL);
assert(child_class->get_parent_desc);
+ GLOBAL_STATE_CODE();
new_child = g_new(BdrvChild, 1);
*new_child = (BdrvChild) {
@@ -2987,6 +3086,7 @@ static int bdrv_attach_child_noperm(BlockDriverState *parent_bs,
uint64_t perm, shared_perm;
assert(parent_bs->drv);
+ GLOBAL_STATE_CODE();
if (bdrv_recurse_has_child(child_bs, parent_bs)) {
error_setg(errp, "Making '%s' a %s child of '%s' would create a cycle",
@@ -3012,6 +3112,7 @@ static void bdrv_detach_child(BdrvChild **childp)
{
BlockDriverState *old_bs = (*childp)->bs;
+ GLOBAL_STATE_CODE();
bdrv_replace_child_noperm(childp, NULL, true);
if (old_bs) {
@@ -3051,6 +3152,8 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
BdrvChild *child = NULL;
Transaction *tran = tran_new();
+ GLOBAL_STATE_CODE();
+
ret = bdrv_attach_child_common(child_bs, child_name, child_class,
child_role, perm, shared_perm, opaque,
&child, tran, errp);
@@ -3091,6 +3194,8 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
BdrvChild *child = NULL;
Transaction *tran = tran_new();
+ GLOBAL_STATE_CODE();
+
ret = bdrv_attach_child_noperm(parent_bs, child_bs, child_name, child_class,
child_role, &child, tran, errp);
if (ret < 0) {
@@ -3117,6 +3222,8 @@ void bdrv_root_unref_child(BdrvChild *child)
{
BlockDriverState *child_bs;
+ GLOBAL_STATE_CODE();
+
child_bs = child->bs;
bdrv_detach_child(&child);
bdrv_unref(child_bs);
@@ -3191,6 +3298,7 @@ static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child,
/* Callers must ensure that child->frozen is false. */
void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
{
+ GLOBAL_STATE_CODE();
if (child == NULL) {
return;
}
@@ -3203,6 +3311,7 @@ void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
static void bdrv_parent_cb_change_media(BlockDriverState *bs, bool load)
{
BdrvChild *c;
+ GLOBAL_STATE_CODE();
QLIST_FOREACH(c, &bs->parents, next_parent) {
if (c->klass->change_media) {
c->klass->change_media(c, load);
@@ -3253,6 +3362,8 @@ static int bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs,
BdrvChild *child = is_backing ? parent_bs->backing : parent_bs->file;
BdrvChildRole role;
+ GLOBAL_STATE_CODE();
+
if (!parent_bs->drv) {
/*
* Node without drv is an object without a class :/. TODO: finally fix
@@ -3332,6 +3443,7 @@ static int bdrv_set_backing_noperm(BlockDriverState *bs,
BlockDriverState *backing_hd,
Transaction *tran, Error **errp)
{
+ GLOBAL_STATE_CODE();
return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp);
}
@@ -3341,6 +3453,7 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
int ret;
Transaction *tran = tran_new();
+ GLOBAL_STATE_CODE();
bdrv_drained_begin(bs);
ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp);
@@ -3380,6 +3493,8 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
QDict *tmp_parent_options = NULL;
Error *local_err = NULL;
+ GLOBAL_STATE_CODE();
+
if (bs->backing != NULL) {
goto free_exit;
}
@@ -3539,6 +3654,8 @@ BdrvChild *bdrv_open_child(const char *filename,
{
BlockDriverState *bs;
+ GLOBAL_STATE_CODE();
+
bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class,
child_role, allow_none, errp);
if (bs == NULL) {
@@ -3561,6 +3678,8 @@ BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp)
const char *reference = NULL;
Visitor *v = NULL;
+ GLOBAL_STATE_CODE();
+
if (ref->type == QTYPE_QSTRING) {
reference = ref->u.reference;
} else {
@@ -3603,6 +3722,8 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
BlockDriverState *bs_snapshot = NULL;
int ret;
+ GLOBAL_STATE_CODE();
+
/* if snapshot, we create a temporary backing file and open it
instead of opening 'filename' directly */
@@ -3690,6 +3811,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
assert(!child_class || !flags);
assert(!child_class == !parent);
+ GLOBAL_STATE_CODE();
if (reference) {
bool options_non_empty = options ? qdict_size(options) : false;
@@ -3958,6 +4080,8 @@ close_and_fail:
BlockDriverState *bdrv_open(const char *filename, const char *reference,
QDict *options, int flags, Error **errp)
{
+ GLOBAL_STATE_CODE();
+
return bdrv_open_inherit(filename, reference, options, flags, NULL,
NULL, 0, errp);
}
@@ -4074,6 +4198,7 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
* important to avoid graph changes between the recursive queuing here and
* bdrv_reopen_multiple(). */
assert(bs->quiesce_counter > 0);
+ GLOBAL_STATE_CODE();
if (bs_queue == NULL) {
bs_queue = g_new0(BlockReopenQueue, 1);
@@ -4212,12 +4337,15 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
BlockDriverState *bs,
QDict *options, bool keep_old_opts)
{
+ GLOBAL_STATE_CODE();
+
return bdrv_reopen_queue_child(bs_queue, bs, options, NULL, 0, false,
NULL, 0, keep_old_opts);
}
void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue)
{
+ GLOBAL_STATE_CODE();
if (bs_queue) {
BlockReopenQueueEntry *bs_entry, *next;
QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
@@ -4259,6 +4387,7 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
assert(bs_queue != NULL);
+ GLOBAL_STATE_CODE();
QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
ctx = bdrv_get_aio_context(bs_entry->state.bs);
@@ -4365,6 +4494,8 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
BlockReopenQueue *queue;
int ret;
+ GLOBAL_STATE_CODE();
+
bdrv_subtree_drained_begin(bs);
if (ctx != qemu_get_aio_context()) {
aio_context_release(ctx);
@@ -4386,6 +4517,8 @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
{
QDict *opts = qdict_new();
+ GLOBAL_STATE_CODE();
+
qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only);
return bdrv_reopen(bs, opts, true, errp);
@@ -4420,6 +4553,8 @@ static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
QObject *value;
const char *str;
+ GLOBAL_STATE_CODE();
+
value = qdict_get(reopen_state->options, child_name);
if (value == NULL) {
return 0;
@@ -4518,6 +4653,7 @@ static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
assert(reopen_state != NULL);
assert(reopen_state->bs->drv != NULL);
+ GLOBAL_STATE_CODE();
drv = reopen_state->bs->drv;
/* This function and each driver's bdrv_reopen_prepare() remove
@@ -4728,6 +4864,7 @@ static void bdrv_reopen_commit(BDRVReopenState *reopen_state)
bs = reopen_state->bs;
drv = bs->drv;
assert(drv != NULL);
+ GLOBAL_STATE_CODE();
/* If there are any driver level actions to take */
if (drv->bdrv_reopen_commit) {
@@ -4769,6 +4906,7 @@ static void bdrv_reopen_abort(BDRVReopenState *reopen_state)
assert(reopen_state != NULL);
drv = reopen_state->bs->drv;
assert(drv != NULL);
+ GLOBAL_STATE_CODE();
if (drv->bdrv_reopen_abort) {
drv->bdrv_reopen_abort(reopen_state);
@@ -4781,6 +4919,7 @@ static void bdrv_close(BlockDriverState *bs)
BdrvAioNotifier *ban, *ban_next;
BdrvChild *child, *next;
+ GLOBAL_STATE_CODE();
assert(!bs->refcnt);
bdrv_drained_begin(bs); /* complete I/O */
@@ -4840,6 +4979,7 @@ static void bdrv_close(BlockDriverState *bs)
void bdrv_close_all(void)
{
assert(job_next(NULL) == NULL);
+ GLOBAL_STATE_CODE();
/* Drop references from requests still in flight, such as canceled block
* jobs whose AIO context has not been polled yet */
@@ -4958,7 +5098,7 @@ static void bdrv_remove_filter_or_cow_child_abort(void *opaque)
static void bdrv_remove_filter_or_cow_child_commit(void *opaque)
{
BdrvRemoveFilterOrCowChild *s = opaque;
-
+ GLOBAL_STATE_CODE();
bdrv_child_free(s->child);
}
@@ -5041,6 +5181,7 @@ static int bdrv_replace_node_noperm(BlockDriverState *from,
BdrvChild *c, *next;
assert(to != NULL);
+ GLOBAL_STATE_CODE();
QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
assert(c->bs == from);
@@ -5091,6 +5232,7 @@ static int bdrv_replace_node_common(BlockDriverState *from,
BlockDriverState *to_cow_parent = NULL;
int ret;
+ GLOBAL_STATE_CODE();
assert(to != NULL);
if (detach_subchain) {
@@ -5154,11 +5296,15 @@ out:
int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
Error **errp)
{
+ GLOBAL_STATE_CODE();
+
return bdrv_replace_node_common(from, to, true, false, errp);
}
int bdrv_drop_filter(BlockDriverState *bs, Error **errp)
{
+ GLOBAL_STATE_CODE();
+
return bdrv_replace_node_common(bs, bdrv_filter_or_cow_bs(bs), true, true,
errp);
}
@@ -5181,6 +5327,8 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
int ret;
Transaction *tran = tran_new();
+ GLOBAL_STATE_CODE();
+
assert(!bs_new->backing);
ret = bdrv_attach_child_noperm(bs_new, bs_top, "backing",
@@ -5214,6 +5362,8 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
g_autoptr(GSList) refresh_list = NULL;
BlockDriverState *old_bs = child->bs;
+ GLOBAL_STATE_CODE();
+
bdrv_ref(old_bs);
bdrv_drained_begin(old_bs);
bdrv_drained_begin(new_bs);
@@ -5241,6 +5391,7 @@ static void bdrv_delete(BlockDriverState *bs)
{
assert(bdrv_op_blocker_is_empty(bs));
assert(!bs->refcnt);
+ GLOBAL_STATE_CODE();
/* remove from list, if necessary */
if (bs->node_name[0] != '\0') {
@@ -5285,6 +5436,8 @@ BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options,
node_name = qdict_get_try_str(options, "node-name");
+ GLOBAL_STATE_CODE();
+
new_node_bs = bdrv_new_open_driver_opts(drv, node_name, options, flags,
errp);
options = NULL; /* bdrv_new_open_driver() eats options */
@@ -5320,6 +5473,7 @@ fail:
int coroutine_fn bdrv_co_check(BlockDriverState *bs,
BdrvCheckResult *res, BdrvCheckMode fix)
{
+ IO_CODE();
if (bs->drv == NULL) {
return -ENOMEDIUM;
}
@@ -5345,6 +5499,8 @@ int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file,
BlockDriver *drv = bs->drv;
int ret;
+ GLOBAL_STATE_CODE();
+
if (!drv) {
return -ENOMEDIUM;
}
@@ -5386,6 +5542,9 @@ int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file,
BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
BlockDriverState *bs)
{
+
+ GLOBAL_STATE_CODE();
+
bs = bdrv_skip_filters(bs);
active = bdrv_skip_filters(active);
@@ -5403,6 +5562,8 @@ BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
/* Given a BDS, searches for the base layer. */
BlockDriverState *bdrv_find_base(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
+
return bdrv_find_overlay(bs, NULL);
}
@@ -5417,6 +5578,8 @@ bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base,
BlockDriverState *i;
BdrvChild *child;
+ GLOBAL_STATE_CODE();
+
for (i = bs; i != base; i = child_bs(child)) {
child = bdrv_filter_or_cow_child(i);
@@ -5443,6 +5606,8 @@ int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base,
BlockDriverState *i;
BdrvChild *child;
+ GLOBAL_STATE_CODE();
+
if (bdrv_is_backing_chain_frozen(bs, base, errp)) {
return -EPERM;
}
@@ -5477,6 +5642,8 @@ void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base)
BlockDriverState *i;
BdrvChild *child;
+ GLOBAL_STATE_CODE();
+
for (i = bs; i != base; i = child_bs(child)) {
child = bdrv_filter_or_cow_child(i);
if (child) {
@@ -5526,6 +5693,8 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
g_autoptr(GSList) updated_children = NULL;
GSList *p;
+ GLOBAL_STATE_CODE();
+
bdrv_ref(top);
bdrv_subtree_drained_begin(top);
@@ -5637,6 +5806,8 @@ static int64_t bdrv_sum_allocated_file_size(BlockDriverState *bs)
int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
{
BlockDriver *drv = bs->drv;
+ IO_CODE();
+
if (!drv) {
return -ENOMEDIUM;
}
@@ -5686,6 +5857,7 @@ int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
BlockDriverState *in_bs, Error **errp)
{
+ IO_CODE();
if (!drv->bdrv_measure) {
error_setg(errp, "Block driver '%s' does not support size measurement",
drv->format_name);
@@ -5701,6 +5873,7 @@ BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
int64_t bdrv_nb_sectors(BlockDriverState *bs)
{
BlockDriver *drv = bs->drv;
+ IO_CODE();
if (!drv)
return -ENOMEDIUM;
@@ -5721,6 +5894,7 @@ int64_t bdrv_nb_sectors(BlockDriverState *bs)
int64_t bdrv_getlength(BlockDriverState *bs)
{
int64_t ret = bdrv_nb_sectors(bs);
+ IO_CODE();
if (ret < 0) {
return ret;
@@ -5735,12 +5909,14 @@ int64_t bdrv_getlength(BlockDriverState *bs)
void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
{
int64_t nb_sectors = bdrv_nb_sectors(bs);
+ IO_CODE();
*nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
}
bool bdrv_is_sg(BlockDriverState *bs)
{
+ IO_CODE();
return bs->sg;
}
@@ -5750,6 +5926,7 @@ bool bdrv_is_sg(BlockDriverState *bs)
bool bdrv_supports_compressed_writes(BlockDriverState *bs)
{
BlockDriverState *filtered;
+ IO_CODE();
if (!bs->drv || !block_driver_can_compress(bs->drv)) {
return false;
@@ -5769,6 +5946,7 @@ bool bdrv_supports_compressed_writes(BlockDriverState *bs)
const char *bdrv_get_format_name(BlockDriverState *bs)
{
+ IO_CODE();
return bs->drv ? bs->drv->format_name : NULL;
}
@@ -5785,6 +5963,8 @@ void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
int i;
const char **formats = NULL;
+ GLOBAL_STATE_CODE();
+
QLIST_FOREACH(drv, &bdrv_drivers, list) {
if (drv->format_name) {
bool found = false;
@@ -5843,6 +6023,7 @@ BlockDriverState *bdrv_find_node(const char *node_name)
BlockDriverState *bs;
assert(node_name);
+ GLOBAL_STATE_CODE();
QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
if (!strcmp(node_name, bs->node_name)) {
@@ -5859,6 +6040,8 @@ BlockDeviceInfoList *bdrv_named_nodes_list(bool flat,
BlockDeviceInfoList *list;
BlockDriverState *bs;
+ GLOBAL_STATE_CODE();
+
list = NULL;
QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
BlockDeviceInfo *info = bdrv_block_device_info(NULL, bs, flat, errp);
@@ -5934,6 +6117,7 @@ static void xdbg_graph_add_edge(XDbgBlockGraphConstructor *gr, void *parent,
{
BlockPermission qapi_perm;
XDbgBlockGraphEdge *edge;
+ GLOBAL_STATE_CODE();
edge = g_new0(XDbgBlockGraphEdge, 1);
@@ -5964,6 +6148,8 @@ XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp)
BdrvChild *child;
XDbgBlockGraphConstructor *gr = xdbg_graph_new();
+ GLOBAL_STATE_CODE();
+
for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) {
char *allocated_name = NULL;
const char *name = blk_name(blk);
@@ -6007,6 +6193,8 @@ BlockDriverState *bdrv_lookup_bs(const char *device,
BlockBackend *blk;
BlockDriverState *bs;
+ GLOBAL_STATE_CODE();
+
if (device) {
blk = blk_by_name(device);
@@ -6038,6 +6226,9 @@ BlockDriverState *bdrv_lookup_bs(const char *device,
* return false. If either argument is NULL, return false. */
bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
{
+
+ GLOBAL_STATE_CODE();
+
while (top && top != base) {
top = bdrv_filter_or_cow_bs(top);
}
@@ -6047,6 +6238,7 @@ bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
BlockDriverState *bdrv_next_node(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
if (!bs) {
return QTAILQ_FIRST(&graph_bdrv_states);
}
@@ -6055,6 +6247,7 @@ BlockDriverState *bdrv_next_node(BlockDriverState *bs)
BlockDriverState *bdrv_next_all_states(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
if (!bs) {
return QTAILQ_FIRST(&all_bdrv_states);
}
@@ -6063,6 +6256,7 @@ BlockDriverState *bdrv_next_all_states(BlockDriverState *bs)
const char *bdrv_get_node_name(const BlockDriverState *bs)
{
+ IO_CODE();
return bs->node_name;
}
@@ -6070,6 +6264,7 @@ const char *bdrv_get_parent_name(const BlockDriverState *bs)
{
BdrvChild *c;
const char *name;
+ IO_CODE();
/* If multiple parents have a name, just pick the first one. */
QLIST_FOREACH(c, &bs->parents, next_parent) {
@@ -6087,6 +6282,7 @@ const char *bdrv_get_parent_name(const BlockDriverState *bs)
/* TODO check what callers really want: bs->node_name or blk_name() */
const char *bdrv_get_device_name(const BlockDriverState *bs)
{
+ IO_CODE();
return bdrv_get_parent_name(bs) ?: "";
}
@@ -6096,22 +6292,26 @@ const char *bdrv_get_device_name(const BlockDriverState *bs)
* absent, then this returns an empty (non-null) string. */
const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
{
+ IO_CODE();
return bdrv_get_parent_name(bs) ?: bs->node_name;
}
int bdrv_get_flags(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
return bs->open_flags;
}
int bdrv_has_zero_init_1(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
return 1;
}
int bdrv_has_zero_init(BlockDriverState *bs)
{
BlockDriverState *filtered;
+ GLOBAL_STATE_CODE();
if (!bs->drv) {
return 0;
@@ -6137,6 +6337,7 @@ int bdrv_has_zero_init(BlockDriverState *bs)
bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
{
+ IO_CODE();
if (!(bs->open_flags & BDRV_O_UNMAP)) {
return false;
}
@@ -6147,6 +6348,7 @@ bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
void bdrv_get_backing_filename(BlockDriverState *bs,
char *filename, int filename_size)
{
+ IO_CODE();
pstrcpy(filename, filename_size, bs->backing_file);
}
@@ -6154,6 +6356,7 @@ int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
{
int ret;
BlockDriver *drv = bs->drv;
+ IO_CODE();
/* if bs->drv == NULL, bs is closed, so there's nothing to do here */
if (!drv) {
return -ENOMEDIUM;
@@ -6182,6 +6385,7 @@ ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs,
Error **errp)
{
BlockDriver *drv = bs->drv;
+ IO_CODE();
if (drv && drv->bdrv_get_specific_info) {
return drv->bdrv_get_specific_info(bs, errp);
}
@@ -6191,6 +6395,7 @@ ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs,
BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs)
{
BlockDriver *drv = bs->drv;
+ IO_CODE();
if (!drv || !drv->bdrv_get_specific_stats) {
return NULL;
}
@@ -6199,6 +6404,7 @@ BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs)
void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
{
+ IO_CODE();
if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
return;
}
@@ -6208,6 +6414,7 @@ void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
static BlockDriverState *bdrv_find_debug_node(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
bs = bdrv_primary_bs(bs);
}
@@ -6223,6 +6430,7 @@ static BlockDriverState *bdrv_find_debug_node(BlockDriverState *bs)
int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
const char *tag)
{
+ GLOBAL_STATE_CODE();
bs = bdrv_find_debug_node(bs);
if (bs) {
return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
@@ -6233,6 +6441,7 @@ int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
{
+ GLOBAL_STATE_CODE();
bs = bdrv_find_debug_node(bs);
if (bs) {
return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
@@ -6243,6 +6452,7 @@ int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
{
+ GLOBAL_STATE_CODE();
while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
bs = bdrv_primary_bs(bs);
}
@@ -6256,6 +6466,7 @@ int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
{
+ GLOBAL_STATE_CODE();
while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
bs = bdrv_primary_bs(bs);
}
@@ -6283,6 +6494,8 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
BlockDriverState *retval = NULL;
BlockDriverState *bs_below;
+ GLOBAL_STATE_CODE();
+
if (!bs || !bs->drv || !backing_file) {
return NULL;
}
@@ -6393,19 +6606,21 @@ void bdrv_init_with_whitelist(void)
bdrv_init();
}
-int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp)
+int bdrv_activate(BlockDriverState *bs, Error **errp)
{
BdrvChild *child, *parent;
Error *local_err = NULL;
int ret;
BdrvDirtyBitmap *bm;
+ GLOBAL_STATE_CODE();
+
if (!bs->drv) {
return -ENOMEDIUM;
}
QLIST_FOREACH(child, &bs->children, next) {
- bdrv_co_invalidate_cache(child->bs, &local_err);
+ bdrv_activate(child->bs, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return -EINVAL;
@@ -6418,7 +6633,7 @@ int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp)
* Note that the required permissions of inactive images are always a
* subset of the permissions required after activating the image. This
* allows us to just get the permissions upfront without restricting
- * drv->bdrv_invalidate_cache().
+ * bdrv_co_invalidate_cache().
*
* It also means that in error cases, we don't have to try and revert to
* the old permissions (which is an operation that could fail, too). We can
@@ -6433,13 +6648,10 @@ int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp)
return ret;
}
- if (bs->drv->bdrv_co_invalidate_cache) {
- bs->drv->bdrv_co_invalidate_cache(bs, &local_err);
- if (local_err) {
- bs->open_flags |= BDRV_O_INACTIVE;
- error_propagate(errp, local_err);
- return -EINVAL;
- }
+ ret = bdrv_invalidate_cache(bs, errp);
+ if (ret < 0) {
+ bs->open_flags |= BDRV_O_INACTIVE;
+ return ret;
}
FOR_EACH_DIRTY_BITMAP(bs, bm) {
@@ -6468,17 +6680,37 @@ int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp)
return 0;
}
-void bdrv_invalidate_cache_all(Error **errp)
+int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp)
+{
+ Error *local_err = NULL;
+ IO_CODE();
+
+ assert(!(bs->open_flags & BDRV_O_INACTIVE));
+
+ if (bs->drv->bdrv_co_invalidate_cache) {
+ bs->drv->bdrv_co_invalidate_cache(bs, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+void bdrv_activate_all(Error **errp)
{
BlockDriverState *bs;
BdrvNextIterator it;
+ GLOBAL_STATE_CODE();
+
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
AioContext *aio_context = bdrv_get_aio_context(bs);
int ret;
aio_context_acquire(aio_context);
- ret = bdrv_invalidate_cache(bs, errp);
+ ret = bdrv_activate(bs, errp);
aio_context_release(aio_context);
if (ret < 0) {
bdrv_next_cleanup(&it);
@@ -6490,6 +6722,7 @@ void bdrv_invalidate_cache_all(Error **errp)
static bool bdrv_has_bds_parent(BlockDriverState *bs, bool only_active)
{
BdrvChild *parent;
+ GLOBAL_STATE_CODE();
QLIST_FOREACH(parent, &bs->parents, next_parent) {
if (parent->klass->parent_is_bds) {
@@ -6509,6 +6742,8 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs)
int ret;
uint64_t cumulative_perms, cumulative_shared_perms;
+ GLOBAL_STATE_CODE();
+
if (!bs->drv) {
return -ENOMEDIUM;
}
@@ -6572,6 +6807,8 @@ int bdrv_inactivate_all(void)
int ret = 0;
GSList *aio_ctxs = NULL, *ctx;
+ GLOBAL_STATE_CODE();
+
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
AioContext *aio_context = bdrv_get_aio_context(bs);
@@ -6615,6 +6852,7 @@ bool bdrv_is_inserted(BlockDriverState *bs)
{
BlockDriver *drv = bs->drv;
BdrvChild *child;
+ IO_CODE();
if (!drv) {
return false;
@@ -6636,6 +6874,7 @@ bool bdrv_is_inserted(BlockDriverState *bs)
void bdrv_eject(BlockDriverState *bs, bool eject_flag)
{
BlockDriver *drv = bs->drv;
+ IO_CODE();
if (drv && drv->bdrv_eject) {
drv->bdrv_eject(bs, eject_flag);
@@ -6649,7 +6888,7 @@ void bdrv_eject(BlockDriverState *bs, bool eject_flag)
void bdrv_lock_medium(BlockDriverState *bs, bool locked)
{
BlockDriver *drv = bs->drv;
-
+ IO_CODE();
trace_bdrv_lock_medium(bs, locked);
if (drv && drv->bdrv_lock_medium) {
@@ -6660,6 +6899,7 @@ void bdrv_lock_medium(BlockDriverState *bs, bool locked)
/* Get a reference to bs */
void bdrv_ref(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
bs->refcnt++;
}
@@ -6668,6 +6908,7 @@ void bdrv_ref(BlockDriverState *bs)
* deleted. */
void bdrv_unref(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
if (!bs) {
return;
}
@@ -6685,6 +6926,7 @@ struct BdrvOpBlocker {
bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
{
BdrvOpBlocker *blocker;
+ GLOBAL_STATE_CODE();
assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
if (!QLIST_EMPTY(&bs->op_blockers[op])) {
blocker = QLIST_FIRST(&bs->op_blockers[op]);
@@ -6699,6 +6941,7 @@ bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
{
BdrvOpBlocker *blocker;
+ GLOBAL_STATE_CODE();
assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
blocker = g_new0(BdrvOpBlocker, 1);
@@ -6709,6 +6952,7 @@ void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
{
BdrvOpBlocker *blocker, *next;
+ GLOBAL_STATE_CODE();
assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
if (blocker->reason == reason) {
@@ -6721,6 +6965,7 @@ void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
{
int i;
+ GLOBAL_STATE_CODE();
for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
bdrv_op_block(bs, i, reason);
}
@@ -6729,6 +6974,7 @@ void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
{
int i;
+ GLOBAL_STATE_CODE();
for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
bdrv_op_unblock(bs, i, reason);
}
@@ -6737,7 +6983,7 @@ void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
{
int i;
-
+ GLOBAL_STATE_CODE();
for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
if (!QLIST_EMPTY(&bs->op_blockers[i])) {
return false;
@@ -6759,6 +7005,8 @@ void bdrv_img_create(const char *filename, const char *fmt,
Error *local_err = NULL;
int ret = 0;
+ GLOBAL_STATE_CODE();
+
/* Find driver and parse its options */
drv = bdrv_find_format(fmt);
if (!drv) {
@@ -6936,6 +7184,7 @@ out:
AioContext *bdrv_get_aio_context(BlockDriverState *bs)
{
+ IO_CODE();
return bs ? bs->aio_context : qemu_get_aio_context();
}
@@ -6944,6 +7193,7 @@ AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs)
Coroutine *self = qemu_coroutine_self();
AioContext *old_ctx = qemu_coroutine_get_aio_context(self);
AioContext *new_ctx;
+ IO_CODE();
/*
* Increase bs->in_flight to ensure that this operation is completed before
@@ -6958,6 +7208,7 @@ AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs)
void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx)
{
+ IO_CODE();
aio_co_reschedule_self(old_ctx);
bdrv_dec_in_flight(bs);
}
@@ -6991,11 +7242,13 @@ void coroutine_fn bdrv_co_unlock(BlockDriverState *bs)
void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co)
{
+ IO_CODE();
aio_co_enter(bdrv_get_aio_context(bs), co);
}
static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban)
{
+ GLOBAL_STATE_CODE();
QLIST_REMOVE(ban, list);
g_free(ban);
}
@@ -7005,6 +7258,7 @@ static void bdrv_detach_aio_context(BlockDriverState *bs)
BdrvAioNotifier *baf, *baf_tmp;
assert(!bs->walking_aio_notifiers);
+ GLOBAL_STATE_CODE();
bs->walking_aio_notifiers = true;
QLIST_FOREACH_SAFE(baf, &bs->aio_notifiers, list, baf_tmp) {
if (baf->deleted) {
@@ -7032,6 +7286,7 @@ static void bdrv_attach_aio_context(BlockDriverState *bs,
AioContext *new_context)
{
BdrvAioNotifier *ban, *ban_tmp;
+ GLOBAL_STATE_CODE();
if (bs->quiesce_counter) {
aio_disable_external(new_context);
@@ -7078,6 +7333,7 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs,
BdrvChild *child, *parent;
g_assert(qemu_get_current_aio_context() == qemu_get_aio_context());
+ GLOBAL_STATE_CODE();
if (old_context == new_context) {
return;
@@ -7150,6 +7406,7 @@ void bdrv_set_aio_context_ignore(BlockDriverState *bs,
static bool bdrv_parent_can_set_aio_context(BdrvChild *c, AioContext *ctx,
GSList **ignore, Error **errp)
{
+ GLOBAL_STATE_CODE();
if (g_slist_find(*ignore, c)) {
return true;
}
@@ -7175,6 +7432,7 @@ static bool bdrv_parent_can_set_aio_context(BdrvChild *c, AioContext *ctx,
bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx,
GSList **ignore, Error **errp)
{
+ GLOBAL_STATE_CODE();
if (g_slist_find(*ignore, c)) {
return true;
}
@@ -7193,6 +7451,8 @@ bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx,
return true;
}
+ GLOBAL_STATE_CODE();
+
QLIST_FOREACH(c, &bs->parents, next_parent) {
if (!bdrv_parent_can_set_aio_context(c, ctx, ignore, errp)) {
return false;
@@ -7213,6 +7473,8 @@ int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
GSList *ignore;
bool ret;
+ GLOBAL_STATE_CODE();
+
ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL;
ret = bdrv_can_set_aio_context(bs, ctx, &ignore, errp);
g_slist_free(ignore);
@@ -7231,6 +7493,7 @@ int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
Error **errp)
{
+ GLOBAL_STATE_CODE();
return bdrv_child_try_set_aio_context(bs, ctx, NULL, errp);
}
@@ -7244,6 +7507,7 @@ void bdrv_add_aio_context_notifier(BlockDriverState *bs,
.detach_aio_context = detach_aio_context,
.opaque = opaque
};
+ GLOBAL_STATE_CODE();
QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
}
@@ -7255,6 +7519,7 @@ void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
void *opaque)
{
BdrvAioNotifier *ban, *ban_next;
+ GLOBAL_STATE_CODE();
QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
if (ban->attached_aio_context == attached_aio_context &&
@@ -7279,6 +7544,7 @@ int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
bool force,
Error **errp)
{
+ GLOBAL_STATE_CODE();
if (!bs->drv) {
error_setg(errp, "Node is ejected");
return -ENOMEDIUM;
@@ -7309,6 +7575,8 @@ bool bdrv_recurse_can_replace(BlockDriverState *bs,
{
BlockDriverState *filtered;
+ GLOBAL_STATE_CODE();
+
if (!bs || !bs->drv) {
return false;
}
@@ -7349,6 +7617,8 @@ BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
AioContext *aio_context;
+ GLOBAL_STATE_CODE();
+
if (!to_replace_bs) {
error_setg(errp, "Failed to find node with node-name='%s'", node_name);
return NULL;
@@ -7478,6 +7748,7 @@ static bool append_strong_runtime_options(QDict *d, BlockDriverState *bs)
* would result in exactly bs->backing. */
static bool bdrv_backing_overridden(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
if (bs->backing) {
return strcmp(bs->auto_backing_file,
bs->backing->bs->filename);
@@ -7510,6 +7781,8 @@ void bdrv_refresh_filename(BlockDriverState *bs)
bool generate_json_filename; /* Whether our default implementation should
fill exact_filename (false) or not (true) */
+ GLOBAL_STATE_CODE();
+
if (!drv) {
return;
}
@@ -7632,6 +7905,8 @@ char *bdrv_dirname(BlockDriverState *bs, Error **errp)
BlockDriver *drv = bs->drv;
BlockDriverState *child_bs;
+ GLOBAL_STATE_CODE();
+
if (!drv) {
error_setg(errp, "Node '%s' is ejected", bs->node_name);
return NULL;
@@ -7663,7 +7938,7 @@ char *bdrv_dirname(BlockDriverState *bs, Error **errp)
void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs,
Error **errp)
{
-
+ GLOBAL_STATE_CODE();
if (!parent_bs->drv || !parent_bs->drv->bdrv_add_child) {
error_setg(errp, "The node %s does not support adding a child",
bdrv_get_device_or_node_name(parent_bs));
@@ -7683,6 +7958,7 @@ void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp)
{
BdrvChild *tmp;
+ GLOBAL_STATE_CODE();
if (!parent_bs->drv || !parent_bs->drv->bdrv_del_child) {
error_setg(errp, "The node %s does not support removing a child",
bdrv_get_device_or_node_name(parent_bs));
@@ -7710,6 +7986,7 @@ int bdrv_make_empty(BdrvChild *c, Error **errp)
BlockDriver *drv = c->bs->drv;
int ret;
+ GLOBAL_STATE_CODE();
assert(c->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED));
if (!drv->bdrv_make_empty) {
@@ -7734,6 +8011,8 @@ int bdrv_make_empty(BdrvChild *c, Error **errp)
*/
BdrvChild *bdrv_cow_child(BlockDriverState *bs)
{
+ IO_CODE();
+
if (!bs || !bs->drv) {
return NULL;
}
@@ -7757,6 +8036,7 @@ BdrvChild *bdrv_cow_child(BlockDriverState *bs)
BdrvChild *bdrv_filter_child(BlockDriverState *bs)
{
BdrvChild *c;
+ IO_CODE();
if (!bs || !bs->drv) {
return NULL;
@@ -7788,6 +8068,7 @@ BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs)
{
BdrvChild *cow_child = bdrv_cow_child(bs);
BdrvChild *filter_child = bdrv_filter_child(bs);
+ IO_CODE();
/* Filter nodes cannot have COW backing files */
assert(!(cow_child && filter_child));
@@ -7808,6 +8089,7 @@ BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs)
BdrvChild *bdrv_primary_child(BlockDriverState *bs)
{
BdrvChild *c, *found = NULL;
+ IO_CODE();
QLIST_FOREACH(c, &bs->children, next) {
if (c->role & BDRV_CHILD_PRIMARY) {
@@ -7860,6 +8142,7 @@ static BlockDriverState *bdrv_do_skip_filters(BlockDriverState *bs,
*/
BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
return bdrv_do_skip_filters(bs, true);
}
@@ -7869,6 +8152,7 @@ BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs)
*/
BlockDriverState *bdrv_skip_filters(BlockDriverState *bs)
{
+ IO_CODE();
return bdrv_do_skip_filters(bs, false);
}
@@ -7878,6 +8162,7 @@ BlockDriverState *bdrv_skip_filters(BlockDriverState *bs)
*/
BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs)
{
+ IO_CODE();
return bdrv_skip_filters(bdrv_cow_bs(bdrv_skip_filters(bs)));
}
@@ -7913,8 +8198,8 @@ static bool bdrv_bsc_range_overlaps_locked(BlockDriverState *bs,
*/
bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum)
{
+ IO_CODE();
RCU_READ_LOCK_GUARD();
-
return bdrv_bsc_range_overlaps_locked(bs, offset, 1, pnum);
}
@@ -7924,6 +8209,7 @@ bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum)
void bdrv_bsc_invalidate_range(BlockDriverState *bs,
int64_t offset, int64_t bytes)
{
+ IO_CODE();
RCU_READ_LOCK_GUARD();
if (bdrv_bsc_range_overlaps_locked(bs, offset, bytes, NULL)) {
@@ -7938,6 +8224,7 @@ void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes)
{
BdrvBlockStatusCache *new_bsc = g_new(BdrvBlockStatusCache, 1);
BdrvBlockStatusCache *old_bsc;
+ IO_CODE();
*new_bsc = (BdrvBlockStatusCache) {
.valid = true,
diff --git a/block/amend.c b/block/amend.c
index 392df9ef83..f696a006e3 100644
--- a/block/amend.c
+++ b/block/amend.c
@@ -53,10 +53,31 @@ static int coroutine_fn blockdev_amend_run(Job *job, Error **errp)
return ret;
}
+static int blockdev_amend_pre_run(BlockdevAmendJob *s, Error **errp)
+{
+ if (s->bs->drv->bdrv_amend_pre_run) {
+ return s->bs->drv->bdrv_amend_pre_run(s->bs, errp);
+ }
+
+ return 0;
+}
+
+static void blockdev_amend_free(Job *job)
+{
+ BlockdevAmendJob *s = container_of(job, BlockdevAmendJob, common);
+
+ if (s->bs->drv->bdrv_amend_clean) {
+ s->bs->drv->bdrv_amend_clean(s->bs);
+ }
+
+ bdrv_unref(s->bs);
+}
+
static const JobDriver blockdev_amend_job_driver = {
.instance_size = sizeof(BlockdevAmendJob),
.job_type = JOB_TYPE_AMEND,
.run = blockdev_amend_run,
+ .free = blockdev_amend_free,
};
void qmp_x_blockdev_amend(const char *job_id,
@@ -110,8 +131,15 @@ void qmp_x_blockdev_amend(const char *job_id,
return;
}
+ bdrv_ref(bs);
s->bs = bs,
s->opts = QAPI_CLONE(BlockdevAmendOptions, options),
s->force = has_force ? force : false;
+
+ if (blockdev_amend_pre_run(s, errp)) {
+ job_early_fail(&s->common);
+ return;
+ }
+
job_start(&s->common);
}
diff --git a/block/backup.c b/block/backup.c
index 21d5983779..5cfd0b999c 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -372,6 +372,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
assert(bs);
assert(target);
+ GLOBAL_STATE_CODE();
/* QMP interface protects us from these cases */
assert(sync_mode != MIRROR_SYNC_MODE_INCREMENTAL);
diff --git a/block/block-backend.c b/block/block-backend.c
index 4ff6b4d785..e0e1aff4b1 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -79,6 +79,7 @@ struct BlockBackend {
bool allow_aio_context_change;
bool allow_write_beyond_eof;
+ /* Protected by BQL */
NotifierList remove_bs_notifiers, insert_bs_notifiers;
QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;
@@ -111,12 +112,14 @@ static const AIOCBInfo block_backend_aiocb_info = {
static void drive_info_del(DriveInfo *dinfo);
static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
-/* All BlockBackends */
+/* All BlockBackends. Protected by BQL. */
static QTAILQ_HEAD(, BlockBackend) block_backends =
QTAILQ_HEAD_INITIALIZER(block_backends);
-/* All BlockBackends referenced by the monitor and which are iterated through by
- * blk_next() */
+/*
+ * All BlockBackends referenced by the monitor and which are iterated through by
+ * blk_next(). Protected by BQL.
+ */
static QTAILQ_HEAD(, BlockBackend) monitor_block_backends =
QTAILQ_HEAD_INITIALIZER(monitor_block_backends);
@@ -236,6 +239,7 @@ static void blk_root_activate(BdrvChild *child, Error **errp)
void blk_set_force_allow_inactivate(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
blk->force_allow_inactivate = true;
}
@@ -354,6 +358,8 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
{
BlockBackend *blk;
+ GLOBAL_STATE_CODE();
+
blk = g_new0(BlockBackend, 1);
blk->refcnt = 1;
blk->ctx = ctx;
@@ -391,6 +397,8 @@ BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
{
BlockBackend *blk = blk_new(bdrv_get_aio_context(bs), perm, shared_perm);
+ GLOBAL_STATE_CODE();
+
if (blk_insert_bs(blk, bs, errp) < 0) {
blk_unref(blk);
return NULL;
@@ -419,6 +427,8 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
uint64_t perm = 0;
uint64_t shared = BLK_PERM_ALL;
+ GLOBAL_STATE_CODE();
+
/*
* blk_new_open() is mainly used in .bdrv_create implementations and the
* tools where sharing isn't a major concern because the BDS stays private
@@ -496,6 +506,7 @@ static void drive_info_del(DriveInfo *dinfo)
int blk_get_refcnt(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return blk ? blk->refcnt : 0;
}
@@ -506,6 +517,7 @@ int blk_get_refcnt(BlockBackend *blk)
void blk_ref(BlockBackend *blk)
{
assert(blk->refcnt > 0);
+ GLOBAL_STATE_CODE();
blk->refcnt++;
}
@@ -516,6 +528,7 @@ void blk_ref(BlockBackend *blk)
*/
void blk_unref(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
if (blk) {
assert(blk->refcnt > 0);
if (blk->refcnt > 1) {
@@ -536,6 +549,7 @@ void blk_unref(BlockBackend *blk)
*/
BlockBackend *blk_all_next(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return blk ? QTAILQ_NEXT(blk, link)
: QTAILQ_FIRST(&block_backends);
}
@@ -544,6 +558,8 @@ void blk_remove_all_bs(void)
{
BlockBackend *blk = NULL;
+ GLOBAL_STATE_CODE();
+
while ((blk = blk_all_next(blk)) != NULL) {
AioContext *ctx = blk_get_aio_context(blk);
@@ -567,6 +583,7 @@ void blk_remove_all_bs(void)
*/
BlockBackend *blk_next(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return blk ? QTAILQ_NEXT(blk, monitor_link)
: QTAILQ_FIRST(&monitor_block_backends);
}
@@ -633,6 +650,7 @@ static void bdrv_next_reset(BdrvNextIterator *it)
BlockDriverState *bdrv_first(BdrvNextIterator *it)
{
+ GLOBAL_STATE_CODE();
bdrv_next_reset(it);
return bdrv_next(it);
}
@@ -670,6 +688,7 @@ bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp)
{
assert(!blk->name);
assert(name && name[0]);
+ GLOBAL_STATE_CODE();
if (!id_wellformed(name)) {
error_setg(errp, "Invalid device name");
@@ -697,6 +716,8 @@ bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp)
*/
void monitor_remove_blk(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
+
if (!blk->name) {
return;
}
@@ -712,6 +733,7 @@ void monitor_remove_blk(BlockBackend *blk)
*/
const char *blk_name(const BlockBackend *blk)
{
+ IO_CODE();
return blk->name ?: "";
}
@@ -723,6 +745,7 @@ BlockBackend *blk_by_name(const char *name)
{
BlockBackend *blk = NULL;
+ GLOBAL_STATE_CODE();
assert(name);
while ((blk = blk_next(blk)) != NULL) {
if (!strcmp(name, blk->name)) {
@@ -737,12 +760,16 @@ BlockBackend *blk_by_name(const char *name)
*/
BlockDriverState *blk_bs(BlockBackend *blk)
{
+ IO_CODE();
return blk->root ? blk->root->bs : NULL;
}
static BlockBackend *bdrv_first_blk(BlockDriverState *bs)
{
BdrvChild *child;
+
+ GLOBAL_STATE_CODE();
+
QLIST_FOREACH(child, &bs->parents, next_parent) {
if (child->klass == &child_root) {
return child->opaque;
@@ -757,6 +784,7 @@ static BlockBackend *bdrv_first_blk(BlockDriverState *bs)
*/
bool bdrv_has_blk(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
return bdrv_first_blk(bs) != NULL;
}
@@ -767,6 +795,7 @@ bool bdrv_is_root_node(BlockDriverState *bs)
{
BdrvChild *c;
+ GLOBAL_STATE_CODE();
QLIST_FOREACH(c, &bs->parents, next_parent) {
if (c->klass != &child_root) {
return false;
@@ -781,6 +810,7 @@ bool bdrv_is_root_node(BlockDriverState *bs)
*/
DriveInfo *blk_legacy_dinfo(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return blk->legacy_dinfo;
}
@@ -792,6 +822,7 @@ DriveInfo *blk_legacy_dinfo(BlockBackend *blk)
DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo)
{
assert(!blk->legacy_dinfo);
+ GLOBAL_STATE_CODE();
return blk->legacy_dinfo = dinfo;
}
@@ -802,6 +833,7 @@ DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo)
BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
{
BlockBackend *blk = NULL;
+ GLOBAL_STATE_CODE();
while ((blk = blk_next(blk)) != NULL) {
if (blk->legacy_dinfo == dinfo) {
@@ -816,6 +848,7 @@ BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
*/
BlockBackendPublic *blk_get_public(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return &blk->public;
}
@@ -824,6 +857,7 @@ BlockBackendPublic *blk_get_public(BlockBackend *blk)
*/
BlockBackend *blk_by_public(BlockBackendPublic *public)
{
+ GLOBAL_STATE_CODE();
return container_of(public, BlockBackend, public);
}
@@ -835,6 +869,8 @@ void blk_remove_bs(BlockBackend *blk)
ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
BdrvChild *root;
+ GLOBAL_STATE_CODE();
+
notifier_list_notify(&blk->remove_bs_notifiers, blk);
if (tgm->throttle_state) {
BlockDriverState *bs = blk_bs(blk);
@@ -869,6 +905,7 @@ void blk_remove_bs(BlockBackend *blk)
int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
{
ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
+ GLOBAL_STATE_CODE();
bdrv_ref(bs);
blk->root = bdrv_root_attach_child(bs, "root", &child_root,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
@@ -892,6 +929,7 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
*/
int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp)
{
+ GLOBAL_STATE_CODE();
return bdrv_replace_child_bs(blk->root, new_bs, errp);
}
@@ -902,6 +940,7 @@ int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
Error **errp)
{
int ret;
+ GLOBAL_STATE_CODE();
if (blk->root && !blk->disable_perm) {
ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp);
@@ -918,6 +957,7 @@ int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
{
+ GLOBAL_STATE_CODE();
*perm = blk->perm;
*shared_perm = blk->shared_perm;
}
@@ -928,6 +968,7 @@ void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
*/
int blk_attach_dev(BlockBackend *blk, DeviceState *dev)
{
+ GLOBAL_STATE_CODE();
if (blk->dev) {
return -EBUSY;
}
@@ -953,6 +994,7 @@ int blk_attach_dev(BlockBackend *blk, DeviceState *dev)
void blk_detach_dev(BlockBackend *blk, DeviceState *dev)
{
assert(blk->dev == dev);
+ GLOBAL_STATE_CODE();
blk->dev = NULL;
blk->dev_ops = NULL;
blk->dev_opaque = NULL;
@@ -966,6 +1008,7 @@ void blk_detach_dev(BlockBackend *blk, DeviceState *dev)
*/
DeviceState *blk_get_attached_dev(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return blk->dev;
}
@@ -974,6 +1017,7 @@ DeviceState *blk_get_attached_dev(BlockBackend *blk)
char *blk_get_attached_dev_id(BlockBackend *blk)
{
DeviceState *dev = blk->dev;
+ IO_CODE();
if (!dev) {
return g_strdup("");
@@ -994,6 +1038,8 @@ BlockBackend *blk_by_dev(void *dev)
{
BlockBackend *blk = NULL;
+ GLOBAL_STATE_CODE();
+
assert(dev != NULL);
while ((blk = blk_all_next(blk)) != NULL) {
if (blk->dev == dev) {
@@ -1011,6 +1057,7 @@ BlockBackend *blk_by_dev(void *dev)
void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
void *opaque)
{
+ GLOBAL_STATE_CODE();
blk->dev_ops = ops;
blk->dev_opaque = opaque;
@@ -1032,6 +1079,7 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
*/
void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp)
{
+ GLOBAL_STATE_CODE();
if (blk->dev_ops && blk->dev_ops->change_media_cb) {
bool tray_was_open, tray_is_open;
Error *local_err = NULL;
@@ -1064,6 +1112,7 @@ static void blk_root_change_media(BdrvChild *child, bool load)
*/
bool blk_dev_has_removable_media(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return !blk->dev || (blk->dev_ops && blk->dev_ops->change_media_cb);
}
@@ -1072,6 +1121,7 @@ bool blk_dev_has_removable_media(BlockBackend *blk)
*/
bool blk_dev_has_tray(BlockBackend *blk)
{
+ IO_CODE();
return blk->dev_ops && blk->dev_ops->is_tray_open;
}
@@ -1081,6 +1131,7 @@ bool blk_dev_has_tray(BlockBackend *blk)
*/
void blk_dev_eject_request(BlockBackend *blk, bool force)
{
+ GLOBAL_STATE_CODE();
if (blk->dev_ops && blk->dev_ops->eject_request_cb) {
blk->dev_ops->eject_request_cb(blk->dev_opaque, force);
}
@@ -1091,6 +1142,7 @@ void blk_dev_eject_request(BlockBackend *blk, bool force)
*/
bool blk_dev_is_tray_open(BlockBackend *blk)
{
+ IO_CODE();
if (blk_dev_has_tray(blk)) {
return blk->dev_ops->is_tray_open(blk->dev_opaque);
}
@@ -1103,6 +1155,7 @@ bool blk_dev_is_tray_open(BlockBackend *blk)
*/
bool blk_dev_is_medium_locked(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
if (blk->dev_ops && blk->dev_ops->is_medium_locked) {
return blk->dev_ops->is_medium_locked(blk->dev_opaque);
}
@@ -1123,6 +1176,7 @@ static void blk_root_resize(BdrvChild *child)
void blk_iostatus_enable(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
blk->iostatus_enabled = true;
blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
}
@@ -1131,6 +1185,7 @@ void blk_iostatus_enable(BlockBackend *blk)
* enables it _and_ the VM is configured to stop on errors */
bool blk_iostatus_is_enabled(const BlockBackend *blk)
{
+ IO_CODE();
return (blk->iostatus_enabled &&
(blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
blk->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
@@ -1139,16 +1194,19 @@ bool blk_iostatus_is_enabled(const BlockBackend *blk)
BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return blk->iostatus;
}
void blk_iostatus_disable(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
blk->iostatus_enabled = false;
}
void blk_iostatus_reset(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
if (blk_iostatus_is_enabled(blk)) {
blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
}
@@ -1156,6 +1214,7 @@ void blk_iostatus_reset(BlockBackend *blk)
void blk_iostatus_set_err(BlockBackend *blk, int error)
{
+ IO_CODE();
assert(blk_iostatus_is_enabled(blk));
if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
@@ -1165,16 +1224,19 @@ void blk_iostatus_set_err(BlockBackend *blk, int error)
void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow)
{
+ IO_CODE();
blk->allow_write_beyond_eof = allow;
}
void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow)
{
+ IO_CODE();
blk->allow_aio_context_change = allow;
}
void blk_set_disable_request_queuing(BlockBackend *blk, bool disable)
{
+ IO_CODE();
blk->disable_request_queuing = disable;
}
@@ -1228,6 +1290,7 @@ blk_co_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes,
{
int ret;
BlockDriverState *bs;
+ IO_CODE();
blk_wait_while_drained(blk);
@@ -1258,6 +1321,7 @@ int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
BdrvRequestFlags flags)
{
int ret;
+ IO_OR_GS_CODE();
blk_inc_in_flight(blk);
ret = blk_co_do_preadv(blk, offset, bytes, qiov, flags);
@@ -1274,6 +1338,7 @@ blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes,
{
int ret;
BlockDriverState *bs;
+ IO_CODE();
blk_wait_while_drained(blk);
@@ -1309,6 +1374,7 @@ int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
BdrvRequestFlags flags)
{
int ret;
+ IO_OR_GS_CODE();
blk_inc_in_flight(blk);
ret = blk_co_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags);
@@ -1321,6 +1387,7 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
int64_t bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
+ IO_OR_GS_CODE();
return blk_co_pwritev_part(blk, offset, bytes, qiov, 0, flags);
}
@@ -1349,22 +1416,26 @@ typedef struct BlkRwCo {
int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
int64_t bytes, BdrvRequestFlags flags)
{
+ IO_OR_GS_CODE();
return blk_pwritev_part(blk, offset, bytes, NULL, 0,
flags | BDRV_REQ_ZERO_WRITE);
}
int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
{
+ GLOBAL_STATE_CODE();
return bdrv_make_zero(blk->root, flags);
}
void blk_inc_in_flight(BlockBackend *blk)
{
+ IO_CODE();
qatomic_inc(&blk->in_flight);
}
void blk_dec_in_flight(BlockBackend *blk)
{
+ IO_CODE();
qatomic_dec(&blk->in_flight);
aio_wait_kick();
}
@@ -1383,6 +1454,7 @@ BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
void *opaque, int ret)
{
struct BlockBackendAIOCB *acb;
+ IO_CODE();
blk_inc_in_flight(blk);
acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
@@ -1490,6 +1562,7 @@ BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
int64_t bytes, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque)
{
+ IO_CODE();
return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_write_entry,
flags | BDRV_REQ_ZERO_WRITE, cb, opaque);
}
@@ -1498,6 +1571,7 @@ int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int bytes)
{
int ret;
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+ IO_OR_GS_CODE();
blk_inc_in_flight(blk);
ret = blk_do_preadv(blk, offset, bytes, &qiov, 0);
@@ -1511,6 +1585,7 @@ int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int bytes,
{
int ret;
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+ IO_OR_GS_CODE();
ret = blk_pwritev_part(blk, offset, bytes, &qiov, 0, flags);
@@ -1519,6 +1594,7 @@ int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int bytes,
int64_t blk_getlength(BlockBackend *blk)
{
+ IO_CODE();
if (!blk_is_available(blk)) {
return -ENOMEDIUM;
}
@@ -1528,6 +1604,7 @@ int64_t blk_getlength(BlockBackend *blk)
void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
{
+ IO_CODE();
if (!blk_bs(blk)) {
*nb_sectors_ptr = 0;
} else {
@@ -1537,6 +1614,7 @@ void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
int64_t blk_nb_sectors(BlockBackend *blk)
{
+ IO_CODE();
if (!blk_is_available(blk)) {
return -ENOMEDIUM;
}
@@ -1548,6 +1626,7 @@ BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
QEMUIOVector *qiov, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque)
{
+ IO_CODE();
assert((uint64_t)qiov->size <= INT64_MAX);
return blk_aio_prwv(blk, offset, qiov->size, qiov,
blk_aio_read_entry, flags, cb, opaque);
@@ -1557,6 +1636,7 @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
QEMUIOVector *qiov, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque)
{
+ IO_CODE();
assert((uint64_t)qiov->size <= INT64_MAX);
return blk_aio_prwv(blk, offset, qiov->size, qiov,
blk_aio_write_entry, flags, cb, opaque);
@@ -1564,11 +1644,13 @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
void blk_aio_cancel(BlockAIOCB *acb)
{
+ GLOBAL_STATE_CODE();
bdrv_aio_cancel(acb);
}
void blk_aio_cancel_async(BlockAIOCB *acb)
{
+ IO_CODE();
bdrv_aio_cancel_async(acb);
}
@@ -1576,6 +1658,8 @@ void blk_aio_cancel_async(BlockAIOCB *acb)
int coroutine_fn
blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
{
+ IO_CODE();
+
blk_wait_while_drained(blk);
if (!blk_is_available(blk)) {
@@ -1588,6 +1672,7 @@ blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
{
int ret;
+ IO_OR_GS_CODE();
blk_inc_in_flight(blk);
ret = blk_do_ioctl(blk, req, buf);
@@ -1609,6 +1694,7 @@ static void blk_aio_ioctl_entry(void *opaque)
BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
BlockCompletionFunc *cb, void *opaque)
{
+ IO_CODE();
return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque);
}
@@ -1617,6 +1703,7 @@ int coroutine_fn
blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes)
{
int ret;
+ IO_CODE();
blk_wait_while_drained(blk);
@@ -1641,6 +1728,7 @@ BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
int64_t offset, int64_t bytes,
BlockCompletionFunc *cb, void *opaque)
{
+ IO_CODE();
return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0,
cb, opaque);
}
@@ -1649,6 +1737,7 @@ int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset,
int64_t bytes)
{
int ret;
+ IO_OR_GS_CODE();
blk_inc_in_flight(blk);
ret = blk_co_do_pdiscard(blk, offset, bytes);
@@ -1660,6 +1749,7 @@ int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset,
int blk_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes)
{
int ret;
+ IO_OR_GS_CODE();
blk_inc_in_flight(blk);
ret = blk_do_pdiscard(blk, offset, bytes);
@@ -1672,6 +1762,7 @@ int blk_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes)
int coroutine_fn blk_co_do_flush(BlockBackend *blk)
{
blk_wait_while_drained(blk);
+ IO_CODE();
if (!blk_is_available(blk)) {
return -ENOMEDIUM;
@@ -1692,12 +1783,14 @@ static void blk_aio_flush_entry(void *opaque)
BlockAIOCB *blk_aio_flush(BlockBackend *blk,
BlockCompletionFunc *cb, void *opaque)
{
+ IO_CODE();
return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque);
}
int coroutine_fn blk_co_flush(BlockBackend *blk)
{
int ret;
+ IO_OR_GS_CODE();
blk_inc_in_flight(blk);
ret = blk_co_do_flush(blk);
@@ -1720,6 +1813,7 @@ int blk_flush(BlockBackend *blk)
void blk_drain(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
if (bs) {
bdrv_ref(bs);
@@ -1740,6 +1834,8 @@ void blk_drain_all(void)
{
BlockBackend *blk = NULL;
+ GLOBAL_STATE_CODE();
+
bdrv_drain_all_begin();
while ((blk = blk_all_next(blk)) != NULL) {
@@ -1759,12 +1855,14 @@ void blk_drain_all(void)
void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
BlockdevOnError on_write_error)
{
+ GLOBAL_STATE_CODE();
blk->on_read_error = on_read_error;
blk->on_write_error = on_write_error;
}
BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read)
{
+ IO_CODE();
return is_read ? blk->on_read_error : blk->on_write_error;
}
@@ -1772,6 +1870,7 @@ BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
int error)
{
BlockdevOnError on_err = blk_get_on_error(blk, is_read);
+ IO_CODE();
switch (on_err) {
case BLOCKDEV_ON_ERROR_ENOSPC:
@@ -1811,6 +1910,7 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction action,
bool is_read, int error)
{
assert(error >= 0);
+ IO_CODE();
if (action == BLOCK_ERROR_ACTION_STOP) {
/* First set the iostatus, so that "info block" returns an iostatus
@@ -1842,6 +1942,7 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction action,
bool blk_supports_write_perm(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
if (bs) {
return !bdrv_is_read_only(bs);
@@ -1856,12 +1957,14 @@ bool blk_supports_write_perm(BlockBackend *blk)
*/
bool blk_is_writable(BlockBackend *blk)
{
+ IO_CODE();
return blk->perm & BLK_PERM_WRITE;
}
bool blk_is_sg(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
if (!bs) {
return false;
@@ -1872,41 +1975,47 @@ bool blk_is_sg(BlockBackend *blk)
bool blk_enable_write_cache(BlockBackend *blk)
{
+ IO_CODE();
return blk->enable_write_cache;
}
void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
{
+ GLOBAL_STATE_CODE();
blk->enable_write_cache = wce;
}
-void blk_invalidate_cache(BlockBackend *blk, Error **errp)
+void blk_activate(BlockBackend *blk, Error **errp)
{
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
if (!bs) {
error_setg(errp, "Device '%s' has no medium", blk->name);
return;
}
- bdrv_invalidate_cache(bs, errp);
+ bdrv_activate(bs, errp);
}
bool blk_is_inserted(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
+ IO_CODE();
return bs && bdrv_is_inserted(bs);
}
bool blk_is_available(BlockBackend *blk)
{
+ IO_CODE();
return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk);
}
void blk_lock_medium(BlockBackend *blk, bool locked)
{
BlockDriverState *bs = blk_bs(blk);
+ IO_CODE();
if (bs) {
bdrv_lock_medium(bs, locked);
@@ -1917,6 +2026,7 @@ void blk_eject(BlockBackend *blk, bool eject_flag)
{
BlockDriverState *bs = blk_bs(blk);
char *id;
+ IO_CODE();
if (bs) {
bdrv_eject(bs, eject_flag);
@@ -1933,6 +2043,7 @@ void blk_eject(BlockBackend *blk, bool eject_flag)
int blk_get_flags(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
if (bs) {
return bdrv_get_flags(bs);
@@ -1945,6 +2056,7 @@ int blk_get_flags(BlockBackend *blk)
uint32_t blk_get_request_alignment(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
+ IO_CODE();
return bs ? bs->bl.request_alignment : BDRV_SECTOR_SIZE;
}
@@ -1953,6 +2065,7 @@ uint64_t blk_get_max_hw_transfer(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
uint64_t max = INT_MAX;
+ IO_CODE();
if (bs) {
max = MIN_NON_ZERO(max, bs->bl.max_hw_transfer);
@@ -1966,6 +2079,7 @@ uint32_t blk_get_max_transfer(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
uint32_t max = INT_MAX;
+ IO_CODE();
if (bs) {
max = MIN_NON_ZERO(max, bs->bl.max_transfer);
@@ -1975,33 +2089,39 @@ uint32_t blk_get_max_transfer(BlockBackend *blk)
int blk_get_max_hw_iov(BlockBackend *blk)
{
+ IO_CODE();
return MIN_NON_ZERO(blk->root->bs->bl.max_hw_iov,
blk->root->bs->bl.max_iov);
}
int blk_get_max_iov(BlockBackend *blk)
{
+ IO_CODE();
return blk->root->bs->bl.max_iov;
}
void blk_set_guest_block_size(BlockBackend *blk, int align)
{
+ IO_CODE();
blk->guest_block_size = align;
}
void *blk_try_blockalign(BlockBackend *blk, size_t size)
{
+ IO_CODE();
return qemu_try_blockalign(blk ? blk_bs(blk) : NULL, size);
}
void *blk_blockalign(BlockBackend *blk, size_t size)
{
+ IO_CODE();
return qemu_blockalign(blk ? blk_bs(blk) : NULL, size);
}
bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
{
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
if (!bs) {
return false;
@@ -2013,6 +2133,7 @@ bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
{
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
if (bs) {
bdrv_op_unblock(bs, op, reason);
@@ -2022,6 +2143,7 @@ void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
void blk_op_block_all(BlockBackend *blk, Error *reason)
{
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
if (bs) {
bdrv_op_block_all(bs, reason);
@@ -2031,6 +2153,7 @@ void blk_op_block_all(BlockBackend *blk, Error *reason)
void blk_op_unblock_all(BlockBackend *blk, Error *reason)
{
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
if (bs) {
bdrv_op_unblock_all(bs, reason);
@@ -2040,6 +2163,7 @@ void blk_op_unblock_all(BlockBackend *blk, Error *reason)
AioContext *blk_get_aio_context(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
+ IO_CODE();
if (bs) {
AioContext *ctx = bdrv_get_aio_context(blk_bs(blk));
@@ -2090,6 +2214,7 @@ static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context,
int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
Error **errp)
{
+ GLOBAL_STATE_CODE();
return blk_do_set_aio_context(blk, new_context, true, errp);
}
@@ -2126,6 +2251,7 @@ void blk_add_aio_context_notifier(BlockBackend *blk,
{
BlockBackendAioNotifier *notifier;
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
notifier = g_new(BlockBackendAioNotifier, 1);
notifier->attached_aio_context = attached_aio_context;
@@ -2148,6 +2274,8 @@ void blk_remove_aio_context_notifier(BlockBackend *blk,
BlockBackendAioNotifier *notifier;
BlockDriverState *bs = blk_bs(blk);
+ GLOBAL_STATE_CODE();
+
if (bs) {
bdrv_remove_aio_context_notifier(bs, attached_aio_context,
detach_aio_context, opaque);
@@ -2168,17 +2296,20 @@ void blk_remove_aio_context_notifier(BlockBackend *blk,
void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify)
{
+ GLOBAL_STATE_CODE();
notifier_list_add(&blk->remove_bs_notifiers, notify);
}
void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify)
{
+ GLOBAL_STATE_CODE();
notifier_list_add(&blk->insert_bs_notifiers, notify);
}
void blk_io_plug(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
+ IO_CODE();
if (bs) {
bdrv_io_plug(bs);
@@ -2188,6 +2319,7 @@ void blk_io_plug(BlockBackend *blk)
void blk_io_unplug(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
+ IO_CODE();
if (bs) {
bdrv_io_unplug(bs);
@@ -2196,18 +2328,21 @@ void blk_io_unplug(BlockBackend *blk)
BlockAcctStats *blk_get_stats(BlockBackend *blk)
{
+ IO_CODE();
return &blk->stats;
}
void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
BlockCompletionFunc *cb, void *opaque)
{
+ IO_CODE();
return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque);
}
int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
int64_t bytes, BdrvRequestFlags flags)
{
+ IO_OR_GS_CODE();
return blk_co_pwritev(blk, offset, bytes, NULL,
flags | BDRV_REQ_ZERO_WRITE);
}
@@ -2216,6 +2351,7 @@ int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
int64_t bytes)
{
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+ IO_OR_GS_CODE();
return blk_pwritev_part(blk, offset, bytes, &qiov, 0,
BDRV_REQ_WRITE_COMPRESSED);
}
@@ -2223,6 +2359,7 @@ int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
PreallocMode prealloc, BdrvRequestFlags flags, Error **errp)
{
+ IO_OR_GS_CODE();
if (!blk_is_available(blk)) {
error_setg(errp, "No medium inserted");
return -ENOMEDIUM;
@@ -2235,6 +2372,7 @@ int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
int64_t pos, int size)
{
int ret;
+ GLOBAL_STATE_CODE();
if (!blk_is_available(blk)) {
return -ENOMEDIUM;
@@ -2254,6 +2392,7 @@ int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
{
+ GLOBAL_STATE_CODE();
if (!blk_is_available(blk)) {
return -ENOMEDIUM;
}
@@ -2263,6 +2402,7 @@ int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
{
+ GLOBAL_STATE_CODE();
if (!blk_is_available(blk)) {
return -ENOMEDIUM;
}
@@ -2272,6 +2412,7 @@ int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
{
+ GLOBAL_STATE_CODE();
if (!blk_is_available(blk)) {
return -ENOMEDIUM;
}
@@ -2285,6 +2426,7 @@ int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
*/
void blk_update_root_state(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
assert(blk->root);
blk->root_state.open_flags = blk->root->bs->open_flags;
@@ -2297,6 +2439,7 @@ void blk_update_root_state(BlockBackend *blk)
*/
bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return blk->root_state.detect_zeroes;
}
@@ -2306,17 +2449,20 @@ bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk)
*/
int blk_get_open_flags_from_root_state(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return blk->root_state.open_flags;
}
BlockBackendRootState *blk_get_root_state(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return &blk->root_state;
}
int blk_commit_all(void)
{
BlockBackend *blk = NULL;
+ GLOBAL_STATE_CODE();
while ((blk = blk_all_next(blk)) != NULL) {
AioContext *aio_context = blk_get_aio_context(blk);
@@ -2341,6 +2487,7 @@ int blk_commit_all(void)
/* throttling disk I/O limits */
void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg)
{
+ GLOBAL_STATE_CODE();
throttle_group_config(&blk->public.throttle_group_member, cfg);
}
@@ -2349,6 +2496,7 @@ void blk_io_limits_disable(BlockBackend *blk)
BlockDriverState *bs = blk_bs(blk);
ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
assert(tgm->throttle_state);
+ GLOBAL_STATE_CODE();
if (bs) {
bdrv_ref(bs);
bdrv_drained_begin(bs);
@@ -2364,12 +2512,14 @@ void blk_io_limits_disable(BlockBackend *blk)
void blk_io_limits_enable(BlockBackend *blk, const char *group)
{
assert(!blk->public.throttle_group_member.throttle_state);
+ GLOBAL_STATE_CODE();
throttle_group_register_tgm(&blk->public.throttle_group_member,
group, blk_get_aio_context(blk));
}
void blk_io_limits_update_group(BlockBackend *blk, const char *group)
{
+ GLOBAL_STATE_CODE();
/* this BB is not part of any group */
if (!blk->public.throttle_group_member.throttle_state) {
return;
@@ -2437,11 +2587,13 @@ static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter)
void blk_register_buf(BlockBackend *blk, void *host, size_t size)
{
+ GLOBAL_STATE_CODE();
bdrv_register_buf(blk_bs(blk), host, size);
}
void blk_unregister_buf(BlockBackend *blk, void *host)
{
+ GLOBAL_STATE_CODE();
bdrv_unregister_buf(blk_bs(blk), host);
}
@@ -2451,6 +2603,8 @@ int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
BdrvRequestFlags write_flags)
{
int r;
+ IO_CODE();
+
r = blk_check_byte_request(blk_in, off_in, bytes);
if (r) {
return r;
@@ -2466,11 +2620,13 @@ int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
const BdrvChild *blk_root(BlockBackend *blk)
{
+ GLOBAL_STATE_CODE();
return blk->root;
}
int blk_make_empty(BlockBackend *blk, Error **errp)
{
+ GLOBAL_STATE_CODE();
if (!blk_is_available(blk)) {
error_setg(errp, "No medium inserted");
return -ENOMEDIUM;
diff --git a/block/commit.c b/block/commit.c
index b1fc7b908b..c76899f640 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -253,6 +253,8 @@ void commit_start(const char *job_id, BlockDriverState *bs,
uint64_t base_perms, iter_shared_perms;
int ret;
+ GLOBAL_STATE_CODE();
+
assert(top != bs);
if (bdrv_skip_filters(top) == bdrv_skip_filters(base)) {
error_setg(errp, "Invalid files for merge: top and base are the same");
@@ -432,6 +434,8 @@ int bdrv_commit(BlockDriverState *bs)
QEMU_AUTO_VFREE uint8_t *buf = NULL;
Error *local_err = NULL;
+ GLOBAL_STATE_CODE();
+
if (!drv)
return -ENOMEDIUM;
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index c30a5ff8de..80b7684dba 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -223,6 +223,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
QDict *opts;
assert(source->total_sectors == target->total_sectors);
+ GLOBAL_STATE_CODE();
opts = qdict_new();
qdict_put_str(opts, "driver", "copy-before-write");
@@ -245,6 +246,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
void bdrv_cbw_drop(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
bdrv_drop_filter(bs, &error_abort);
bdrv_unref(bs);
}
diff --git a/block/copy-before-write.h b/block/copy-before-write.h
index 51847e711a..6e72bb25e9 100644
--- a/block/copy-before-write.h
+++ b/block/copy-before-write.h
@@ -29,6 +29,13 @@
#include "block/block_int.h"
#include "block/block-copy.h"
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
BlockDriverState *target,
const char *filter_node_name,
diff --git a/block/coroutines.h b/block/coroutines.h
index c8c14a29c8..b293e943c8 100644
--- a/block/coroutines.h
+++ b/block/coroutines.h
@@ -30,17 +30,17 @@
/* For blk_bs() in generated block/block-gen.c */
#include "sysemu/block-backend.h"
+/*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
int coroutine_fn bdrv_co_check(BlockDriverState *bs,
BdrvCheckResult *res, BdrvCheckMode fix);
int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp);
-int generated_co_wrapper
-bdrv_preadv(BdrvChild *child, int64_t offset, unsigned int bytes,
- QEMUIOVector *qiov, BdrvRequestFlags flags);
-int generated_co_wrapper
-bdrv_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes,
- QEMUIOVector *qiov, BdrvRequestFlags flags);
-
int coroutine_fn
bdrv_co_common_block_status_above(BlockDriverState *bs,
BlockDriverState *base,
@@ -52,6 +52,51 @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
int64_t *map,
BlockDriverState **file,
int *depth);
+
+int coroutine_fn bdrv_co_readv_vmstate(BlockDriverState *bs,
+ QEMUIOVector *qiov, int64_t pos);
+int coroutine_fn bdrv_co_writev_vmstate(BlockDriverState *bs,
+ QEMUIOVector *qiov, int64_t pos);
+
+int coroutine_fn
+nbd_co_do_establish_connection(BlockDriverState *bs, Error **errp);
+
+
+int coroutine_fn
+blk_co_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags);
+
+
+int coroutine_fn
+blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset,
+ BdrvRequestFlags flags);
+
+int coroutine_fn
+blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
+
+int coroutine_fn
+blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes);
+
+int coroutine_fn blk_co_do_flush(BlockBackend *blk);
+
+
+/*
+ * "I/O or GS" API functions. These functions can run without
+ * the BQL, but only in one specific iothread/main loop.
+ *
+ * See include/block/block-io.h for more information about
+ * the "I/O or GS" API.
+ */
+
+int generated_co_wrapper
+bdrv_preadv(BdrvChild *child, int64_t offset, unsigned int bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags);
+
+int generated_co_wrapper
+bdrv_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags);
+
int generated_co_wrapper
bdrv_common_block_status_above(BlockDriverState *bs,
BlockDriverState *base,
@@ -63,46 +108,24 @@ bdrv_common_block_status_above(BlockDriverState *bs,
int64_t *map,
BlockDriverState **file,
int *depth);
-
-int coroutine_fn bdrv_co_readv_vmstate(BlockDriverState *bs,
- QEMUIOVector *qiov, int64_t pos);
-int coroutine_fn bdrv_co_writev_vmstate(BlockDriverState *bs,
- QEMUIOVector *qiov, int64_t pos);
-
int generated_co_wrapper
nbd_do_establish_connection(BlockDriverState *bs, Error **errp);
-int coroutine_fn
-nbd_co_do_establish_connection(BlockDriverState *bs, Error **errp);
-
int generated_co_wrapper
blk_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes,
QEMUIOVector *qiov, BdrvRequestFlags flags);
-int coroutine_fn
-blk_co_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes,
- QEMUIOVector *qiov, BdrvRequestFlags flags);
-
int generated_co_wrapper
blk_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes,
QEMUIOVector *qiov, size_t qiov_offset,
BdrvRequestFlags flags);
-int coroutine_fn
-blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes,
- QEMUIOVector *qiov, size_t qiov_offset,
- BdrvRequestFlags flags);
int generated_co_wrapper
blk_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
-int coroutine_fn
-blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
int generated_co_wrapper
blk_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes);
-int coroutine_fn
-blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes);
int generated_co_wrapper blk_do_flush(BlockBackend *blk);
-int coroutine_fn blk_co_do_flush(BlockBackend *blk);
#endif /* BLOCK_COROUTINES_INT_H */
diff --git a/block/create.c b/block/create.c
index 89812669df..4df43f11f4 100644
--- a/block/create.c
+++ b/block/create.c
@@ -42,6 +42,8 @@ static int coroutine_fn blockdev_create_run(Job *job, Error **errp)
BlockdevCreateJob *s = container_of(job, BlockdevCreateJob, common);
int ret;
+ GLOBAL_STATE_CODE();
+
job_progress_set_remaining(&s->common, 1);
ret = s->drv->bdrv_co_create(s->opts, errp);
job_progress_update(&s->common, 1);
diff --git a/block/crypto.c b/block/crypto.c
index c8ba4681e2..9d5fecbef8 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -778,36 +778,54 @@ block_crypto_get_specific_info_luks(BlockDriverState *bs, Error **errp)
}
static int
+block_crypto_amend_prepare(BlockDriverState *bs, Error **errp)
+{
+ BlockCrypto *crypto = bs->opaque;
+ int ret;
+
+ /* apply for exclusive read/write permissions to the underlying file */
+ crypto->updating_keys = true;
+ ret = bdrv_child_refresh_perms(bs, bs->file, errp);
+ if (ret < 0) {
+ /* Well, in this case we will not be updating any keys */
+ crypto->updating_keys = false;
+ }
+ return ret;
+}
+
+static void
+block_crypto_amend_cleanup(BlockDriverState *bs)
+{
+ BlockCrypto *crypto = bs->opaque;
+ Error *errp = NULL;
+
+ /* release exclusive read/write permissions to the underlying file */
+ crypto->updating_keys = false;
+ bdrv_child_refresh_perms(bs, bs->file, &errp);
+
+ if (errp) {
+ error_report_err(errp);
+ }
+}
+
+static int
block_crypto_amend_options_generic_luks(BlockDriverState *bs,
QCryptoBlockAmendOptions *amend_options,
bool force,
Error **errp)
{
BlockCrypto *crypto = bs->opaque;
- int ret;
assert(crypto);
assert(crypto->block);
- /* apply for exclusive read/write permissions to the underlying file*/
- crypto->updating_keys = true;
- ret = bdrv_child_refresh_perms(bs, bs->file, errp);
- if (ret) {
- goto cleanup;
- }
-
- ret = qcrypto_block_amend_options(crypto->block,
- block_crypto_read_func,
- block_crypto_write_func,
- bs,
- amend_options,
- force,
- errp);
-cleanup:
- /* release exclusive read/write permissions to the underlying file*/
- crypto->updating_keys = false;
- bdrv_child_refresh_perms(bs, bs->file, errp);
- return ret;
+ return qcrypto_block_amend_options(crypto->block,
+ block_crypto_read_func,
+ block_crypto_write_func,
+ bs,
+ amend_options,
+ force,
+ errp);
}
static int
@@ -833,8 +851,16 @@ block_crypto_amend_options_luks(BlockDriverState *bs,
if (!amend_options) {
goto cleanup;
}
+
+ ret = block_crypto_amend_prepare(bs, errp);
+ if (ret) {
+ goto perm_cleanup;
+ }
ret = block_crypto_amend_options_generic_luks(bs, amend_options,
force, errp);
+
+perm_cleanup:
+ block_crypto_amend_cleanup(bs);
cleanup:
qapi_free_QCryptoBlockAmendOptions(amend_options);
return ret;
@@ -931,6 +957,8 @@ static BlockDriver bdrv_crypto_luks = {
.bdrv_get_specific_info = block_crypto_get_specific_info_luks,
.bdrv_amend_options = block_crypto_amend_options_luks,
.bdrv_co_amend = block_crypto_co_amend_luks,
+ .bdrv_amend_pre_run = block_crypto_amend_prepare,
+ .bdrv_amend_clean = block_crypto_amend_cleanup,
.is_format = true,
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 0ef46163e3..0334b85805 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -496,6 +496,7 @@ static void coroutine_fn bdrv_co_can_store_new_dirty_bitmap_entry(void *opaque)
bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
uint32_t granularity, Error **errp)
{
+ IO_CODE();
if (qemu_in_coroutine()) {
return bdrv_co_can_store_new_dirty_bitmap(bs, name, granularity, errp);
} else {
@@ -656,6 +657,7 @@ void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
{
+ IO_CODE();
assert(!bdrv_dirty_bitmap_readonly(bitmap));
bdrv_dirty_bitmaps_lock(bitmap->bs);
if (!out) {
@@ -673,6 +675,7 @@ void bdrv_restore_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *backup)
{
HBitmap *tmp = bitmap->bitmap;
assert(!bdrv_dirty_bitmap_readonly(bitmap));
+ GLOBAL_STATE_CODE();
bitmap->bitmap = backup;
hbitmap_free(tmp);
}
@@ -737,6 +740,7 @@ void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap)
void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes)
{
BdrvDirtyBitmap *bitmap;
+ IO_CODE();
if (QLIST_EMPTY(&bs->dirty_bitmaps)) {
return;
@@ -928,6 +932,7 @@ bool bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest,
bool lock)
{
bool ret;
+ IO_CODE();
assert(!bdrv_dirty_bitmap_readonly(dest));
assert(!bdrv_dirty_bitmap_inconsistent(dest));
diff --git a/block/export/export.c b/block/export/export.c
index 6d3b9964c8..7253af3bc3 100644
--- a/block/export/export.c
+++ b/block/export/export.c
@@ -139,7 +139,7 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp)
* access since the export could be available before migration handover.
* ctx was acquired in the caller.
*/
- bdrv_invalidate_cache(bs, NULL);
+ bdrv_activate(bs, NULL);
perm = BLK_PERM_CONSISTENT_READ;
if (export->writable) {
diff --git a/block/export/fuse.c b/block/export/fuse.c
index fdda8e3c81..5029e70f84 100644
--- a/block/export/fuse.c
+++ b/block/export/fuse.c
@@ -86,8 +86,8 @@ static int fuse_export_create(BlockExport *blk_exp,
assert(blk_exp_args->type == BLOCK_EXPORT_TYPE_FUSE);
- /* For growable exports, take the RESIZE permission */
- if (args->growable) {
+ /* For growable and writable exports, take the RESIZE permission */
+ if (args->growable || blk_exp_args->writable) {
uint64_t blk_perm, blk_shared_perm;
blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm);
@@ -392,14 +392,23 @@ static int fuse_do_truncate(const FuseExport *exp, int64_t size,
{
uint64_t blk_perm, blk_shared_perm;
BdrvRequestFlags truncate_flags = 0;
- int ret;
+ bool add_resize_perm;
+ int ret, ret_check;
+
+ /* Growable and writable exports have a permanent RESIZE permission */
+ add_resize_perm = !exp->growable && !exp->writable;
if (req_zero_write) {
truncate_flags |= BDRV_REQ_ZERO_WRITE;
}
- /* Growable exports have a permanent RESIZE permission */
- if (!exp->growable) {
+ if (add_resize_perm) {
+
+ if (!qemu_in_main_thread()) {
+ /* Changing permissions like below only works in the main thread */
+ return -EPERM;
+ }
+
blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm);
ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
@@ -412,9 +421,11 @@ static int fuse_do_truncate(const FuseExport *exp, int64_t size,
ret = blk_truncate(exp->common.blk, size, true, prealloc,
truncate_flags, NULL);
- if (!exp->growable) {
+ if (add_resize_perm) {
/* Must succeed, because we are only giving up the RESIZE permission */
- blk_set_perm(exp->common.blk, blk_perm, blk_shared_perm, &error_abort);
+ ret_check = blk_set_perm(exp->common.blk, blk_perm,
+ blk_shared_perm, &error_abort);
+ assert(ret_check == 0);
}
return ret;
diff --git a/block/io.c b/block/io.c
index 4e4cb556c5..efc011ce65 100644
--- a/block/io.c
+++ b/block/io.c
@@ -70,6 +70,7 @@ static void bdrv_parent_drained_end_single_no_poll(BdrvChild *c,
void bdrv_parent_drained_end_single(BdrvChild *c)
{
int drained_end_counter = 0;
+ IO_OR_GS_CODE();
bdrv_parent_drained_end_single_no_poll(c, &drained_end_counter);
BDRV_POLL_WHILE(c->bs, qatomic_read(&drained_end_counter) > 0);
}
@@ -114,6 +115,7 @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
{
+ IO_OR_GS_CODE();
c->parent_quiesce_counter++;
if (c->klass->drained_begin) {
c->klass->drained_begin(c);
@@ -164,6 +166,8 @@ void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp)
BdrvChild *c;
bool have_limits;
+ GLOBAL_STATE_CODE();
+
if (tran) {
BdrvRefreshLimitsState *s = g_new(BdrvRefreshLimitsState, 1);
*s = (BdrvRefreshLimitsState) {
@@ -189,10 +193,6 @@ void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp)
QLIST_FOREACH(c, &bs->children, next) {
if (c->role & (BDRV_CHILD_DATA | BDRV_CHILD_FILTERED | BDRV_CHILD_COW))
{
- bdrv_refresh_limits(c->bs, tran, errp);
- if (*errp) {
- return;
- }
bdrv_merge_limits(&bs->bl, &c->bs->bl);
have_limits = true;
}
@@ -226,12 +226,14 @@ void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp)
*/
void bdrv_enable_copy_on_read(BlockDriverState *bs)
{
+ IO_CODE();
qatomic_inc(&bs->copy_on_read);
}
void bdrv_disable_copy_on_read(BlockDriverState *bs)
{
int old = qatomic_fetch_dec(&bs->copy_on_read);
+ IO_CODE();
assert(old >= 1);
}
@@ -303,6 +305,7 @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
BdrvChild *ignore_parent, bool ignore_bds_parents)
{
BdrvChild *child, *next;
+ IO_OR_GS_CODE();
if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) {
return true;
@@ -426,6 +429,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
BdrvChild *parent, bool ignore_bds_parents)
{
+ IO_OR_GS_CODE();
assert(!qemu_in_coroutine());
/* Stop things in parent-to-child order */
@@ -477,11 +481,13 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
void bdrv_drained_begin(BlockDriverState *bs)
{
+ IO_OR_GS_CODE();
bdrv_do_drained_begin(bs, false, NULL, false, true);
}
void bdrv_subtree_drained_begin(BlockDriverState *bs)
{
+ IO_OR_GS_CODE();
bdrv_do_drained_begin(bs, true, NULL, false, true);
}
@@ -538,18 +544,21 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
void bdrv_drained_end(BlockDriverState *bs)
{
int drained_end_counter = 0;
+ IO_OR_GS_CODE();
bdrv_do_drained_end(bs, false, NULL, false, &drained_end_counter);
BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
}
void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter)
{
+ IO_CODE();
bdrv_do_drained_end(bs, false, NULL, false, drained_end_counter);
}
void bdrv_subtree_drained_end(BlockDriverState *bs)
{
int drained_end_counter = 0;
+ IO_OR_GS_CODE();
bdrv_do_drained_end(bs, true, NULL, false, &drained_end_counter);
BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
}
@@ -557,6 +566,7 @@ void bdrv_subtree_drained_end(BlockDriverState *bs)
void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
{
int i;
+ IO_OR_GS_CODE();
for (i = 0; i < new_parent->recursive_quiesce_counter; i++) {
bdrv_do_drained_begin(child->bs, true, child, false, true);
@@ -567,6 +577,7 @@ void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
{
int drained_end_counter = 0;
int i;
+ IO_OR_GS_CODE();
for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
bdrv_do_drained_end(child->bs, true, child, false,
@@ -585,6 +596,7 @@ void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
*/
void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
{
+ IO_OR_GS_CODE();
assert(qemu_in_coroutine());
bdrv_drained_begin(bs);
bdrv_drained_end(bs);
@@ -592,6 +604,7 @@ void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
void bdrv_drain(BlockDriverState *bs)
{
+ IO_OR_GS_CODE();
bdrv_drained_begin(bs);
bdrv_drained_end(bs);
}
@@ -612,6 +625,7 @@ static bool bdrv_drain_all_poll(void)
{
BlockDriverState *bs = NULL;
bool result = false;
+ GLOBAL_STATE_CODE();
/* bdrv_drain_poll() can't make changes to the graph and we are holding the
* main AioContext lock, so iterating bdrv_next_all_states() is safe. */
@@ -640,6 +654,7 @@ static bool bdrv_drain_all_poll(void)
void bdrv_drain_all_begin(void)
{
BlockDriverState *bs = NULL;
+ GLOBAL_STATE_CODE();
if (qemu_in_coroutine()) {
bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL);
@@ -682,6 +697,7 @@ void bdrv_drain_all_begin(void)
void bdrv_drain_all_end_quiesce(BlockDriverState *bs)
{
int drained_end_counter = 0;
+ GLOBAL_STATE_CODE();
g_assert(bs->quiesce_counter > 0);
g_assert(!bs->refcnt);
@@ -696,6 +712,7 @@ void bdrv_drain_all_end(void)
{
BlockDriverState *bs = NULL;
int drained_end_counter = 0;
+ GLOBAL_STATE_CODE();
/*
* bdrv queue is managed by record/replay,
@@ -723,6 +740,7 @@ void bdrv_drain_all_end(void)
void bdrv_drain_all(void)
{
+ GLOBAL_STATE_CODE();
bdrv_drain_all_begin();
bdrv_drain_all_end();
}
@@ -867,6 +885,7 @@ BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs)
{
BdrvTrackedRequest *req;
Coroutine *self = qemu_coroutine_self();
+ IO_CODE();
QLIST_FOREACH(req, &bs->tracked_requests, list) {
if (req->co == self) {
@@ -886,7 +905,7 @@ void bdrv_round_to_clusters(BlockDriverState *bs,
int64_t *cluster_bytes)
{
BlockDriverInfo bdi;
-
+ IO_CODE();
if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
*cluster_offset = offset;
*cluster_bytes = bytes;
@@ -912,16 +931,19 @@ static int bdrv_get_cluster_size(BlockDriverState *bs)
void bdrv_inc_in_flight(BlockDriverState *bs)
{
+ IO_CODE();
qatomic_inc(&bs->in_flight);
}
void bdrv_wakeup(BlockDriverState *bs)
{
+ IO_CODE();
aio_wait_kick();
}
void bdrv_dec_in_flight(BlockDriverState *bs)
{
+ IO_CODE();
qatomic_dec(&bs->in_flight);
bdrv_wakeup(bs);
}
@@ -946,6 +968,7 @@ bool coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
uint64_t align)
{
bool waited;
+ IO_CODE();
qemu_co_mutex_lock(&req->bs->reqs_lock);
@@ -1040,6 +1063,7 @@ static int bdrv_check_request32(int64_t offset, int64_t bytes,
int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
int64_t bytes, BdrvRequestFlags flags)
{
+ IO_CODE();
return bdrv_pwritev(child, offset, bytes, NULL,
BDRV_REQ_ZERO_WRITE | flags);
}
@@ -1058,6 +1082,7 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
int ret;
int64_t target_size, bytes, offset = 0;
BlockDriverState *bs = child->bs;
+ IO_CODE();
target_size = bdrv_getlength(bs);
if (target_size < 0) {
@@ -1090,6 +1115,7 @@ int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int64_t bytes)
{
int ret;
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+ IO_CODE();
if (bytes < 0) {
return -EINVAL;
@@ -1111,6 +1137,7 @@ int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf,
{
int ret;
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+ IO_CODE();
if (bytes < 0) {
return -EINVAL;
@@ -1131,6 +1158,7 @@ int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
const void *buf, int64_t count)
{
int ret;
+ IO_CODE();
ret = bdrv_pwrite(child, offset, buf, count);
if (ret < 0) {
@@ -1797,6 +1825,7 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
int64_t offset, int64_t bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
+ IO_CODE();
return bdrv_co_preadv_part(child, offset, bytes, qiov, 0, flags);
}
@@ -1809,6 +1838,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
BdrvTrackedRequest req;
BdrvRequestPadding pad;
int ret;
+ IO_CODE();
trace_bdrv_co_preadv_part(bs, offset, bytes, flags);
@@ -2230,6 +2260,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
int64_t offset, int64_t bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
+ IO_CODE();
return bdrv_co_pwritev_part(child, offset, bytes, qiov, 0, flags);
}
@@ -2243,6 +2274,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
BdrvRequestPadding pad;
int ret;
bool padded = false;
+ IO_CODE();
trace_bdrv_co_pwritev_part(child->bs, offset, bytes, flags);
@@ -2326,6 +2358,7 @@ out:
int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
int64_t bytes, BdrvRequestFlags flags)
{
+ IO_CODE();
trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags);
if (!(child->bs->open_flags & BDRV_O_UNMAP)) {
@@ -2345,6 +2378,8 @@ int bdrv_flush_all(void)
BlockDriverState *bs = NULL;
int result = 0;
+ GLOBAL_STATE_CODE();
+
/*
* bdrv queue is managed by record/replay,
* creating new flush request for stopping
@@ -2639,6 +2674,7 @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
BlockDriverState *p;
int64_t eof = 0;
int dummy;
+ IO_CODE();
assert(!include_base || base); /* Can't include NULL base */
@@ -2728,6 +2764,7 @@ int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
int64_t offset, int64_t bytes, int64_t *pnum,
int64_t *map, BlockDriverState **file)
{
+ IO_CODE();
return bdrv_common_block_status_above(bs, base, false, true, offset, bytes,
pnum, map, file, NULL);
}
@@ -2735,6 +2772,7 @@ int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes,
int64_t *pnum, int64_t *map, BlockDriverState **file)
{
+ IO_CODE();
return bdrv_block_status_above(bs, bdrv_filter_or_cow_bs(bs),
offset, bytes, pnum, map, file);
}
@@ -2751,6 +2789,7 @@ int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset,
{
int ret;
int64_t pnum = bytes;
+ IO_CODE();
if (!bytes) {
return 1;
@@ -2771,6 +2810,7 @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
{
int ret;
int64_t dummy;
+ IO_CODE();
ret = bdrv_common_block_status_above(bs, bs, true, false, offset,
bytes, pnum ? pnum : &dummy, NULL,
@@ -2807,6 +2847,7 @@ int bdrv_is_allocated_above(BlockDriverState *top,
int ret = bdrv_common_block_status_above(top, base, include_base, false,
offset, bytes, pnum, NULL, NULL,
&depth);
+ IO_CODE();
if (ret < 0) {
return ret;
}
@@ -2823,6 +2864,7 @@ bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
BlockDriver *drv = bs->drv;
BlockDriverState *child_bs = bdrv_primary_bs(bs);
int ret;
+ IO_CODE();
ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL);
if (ret < 0) {
@@ -2854,6 +2896,7 @@ bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
BlockDriver *drv = bs->drv;
BlockDriverState *child_bs = bdrv_primary_bs(bs);
int ret;
+ IO_CODE();
ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL);
if (ret < 0) {
@@ -2884,6 +2927,7 @@ int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
{
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
int ret = bdrv_writev_vmstate(bs, &qiov, pos);
+ IO_CODE();
return ret < 0 ? ret : size;
}
@@ -2893,6 +2937,7 @@ int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
{
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
int ret = bdrv_readv_vmstate(bs, &qiov, pos);
+ IO_CODE();
return ret < 0 ? ret : size;
}
@@ -2902,6 +2947,7 @@ int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
void bdrv_aio_cancel(BlockAIOCB *acb)
{
+ IO_CODE();
qemu_aio_ref(acb);
bdrv_aio_cancel_async(acb);
while (acb->refcnt > 1) {
@@ -2926,6 +2972,7 @@ void bdrv_aio_cancel(BlockAIOCB *acb)
* In either case the completion callback must be called. */
void bdrv_aio_cancel_async(BlockAIOCB *acb)
{
+ IO_CODE();
if (acb->aiocb_info->cancel_async) {
acb->aiocb_info->cancel_async(acb);
}
@@ -2940,6 +2987,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
BdrvChild *child;
int current_gen;
int ret = 0;
+ IO_CODE();
bdrv_inc_in_flight(bs);
@@ -3065,6 +3113,7 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
int64_t max_pdiscard;
int head, tail, align;
BlockDriverState *bs = child->bs;
+ IO_CODE();
if (!bs || !bs->drv || !bdrv_is_inserted(bs)) {
return -ENOMEDIUM;
@@ -3183,6 +3232,7 @@ int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf)
.coroutine = qemu_coroutine_self(),
};
BlockAIOCB *acb;
+ IO_CODE();
bdrv_inc_in_flight(bs);
if (!drv || (!drv->bdrv_aio_ioctl && !drv->bdrv_co_ioctl)) {
@@ -3207,17 +3257,20 @@ out:
void *qemu_blockalign(BlockDriverState *bs, size_t size)
{
+ IO_CODE();
return qemu_memalign(bdrv_opt_mem_align(bs), size);
}
void *qemu_blockalign0(BlockDriverState *bs, size_t size)
{
+ IO_CODE();
return memset(qemu_blockalign(bs, size), 0, size);
}
void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
{
size_t align = bdrv_opt_mem_align(bs);
+ IO_CODE();
/* Ensure that NULL is never returned on success */
assert(align > 0);
@@ -3231,6 +3284,7 @@ void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
{
void *mem = qemu_try_blockalign(bs, size);
+ IO_CODE();
if (mem) {
memset(mem, 0, size);
@@ -3246,6 +3300,7 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
{
int i;
size_t alignment = bdrv_min_mem_align(bs);
+ IO_CODE();
for (i = 0; i < qiov->niov; i++) {
if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
@@ -3262,6 +3317,7 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
void bdrv_io_plug(BlockDriverState *bs)
{
BdrvChild *child;
+ IO_CODE();
QLIST_FOREACH(child, &bs->children, next) {
bdrv_io_plug(child->bs);
@@ -3278,6 +3334,7 @@ void bdrv_io_plug(BlockDriverState *bs)
void bdrv_io_unplug(BlockDriverState *bs)
{
BdrvChild *child;
+ IO_CODE();
assert(bs->io_plugged);
if (qatomic_fetch_dec(&bs->io_plugged) == 1) {
@@ -3296,6 +3353,7 @@ void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size)
{
BdrvChild *child;
+ GLOBAL_STATE_CODE();
if (bs->drv && bs->drv->bdrv_register_buf) {
bs->drv->bdrv_register_buf(bs, host, size);
}
@@ -3308,6 +3366,7 @@ void bdrv_unregister_buf(BlockDriverState *bs, void *host)
{
BdrvChild *child;
+ GLOBAL_STATE_CODE();
if (bs->drv && bs->drv->bdrv_unregister_buf) {
bs->drv->bdrv_unregister_buf(bs, host);
}
@@ -3402,6 +3461,7 @@ int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset,
BdrvRequestFlags read_flags,
BdrvRequestFlags write_flags)
{
+ IO_CODE();
trace_bdrv_co_copy_range_from(src, src_offset, dst, dst_offset, bytes,
read_flags, write_flags);
return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
@@ -3418,6 +3478,7 @@ int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset,
BdrvRequestFlags read_flags,
BdrvRequestFlags write_flags)
{
+ IO_CODE();
trace_bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes,
read_flags, write_flags);
return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
@@ -3429,6 +3490,7 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
int64_t bytes, BdrvRequestFlags read_flags,
BdrvRequestFlags write_flags)
{
+ IO_CODE();
return bdrv_co_copy_range_from(src, src_offset,
dst, dst_offset,
bytes, read_flags, write_flags);
@@ -3461,7 +3523,7 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
BdrvTrackedRequest req;
int64_t old_size, new_bytes;
int ret;
-
+ IO_CODE();
/* if bs->drv == NULL, bs is closed, so there's nothing to do here */
if (!drv) {
@@ -3579,6 +3641,7 @@ out:
void bdrv_cancel_in_flight(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
if (!bs || !bs->drv) {
return;
}
diff --git a/block/meson.build b/block/meson.build
index 8a1ce58c9c..e42bcb58d5 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -131,8 +131,11 @@ block_ss.add(module_block_h)
wrapper_py = find_program('../scripts/block-coroutine-wrapper.py')
block_gen_c = custom_target('block-gen.c',
output: 'block-gen.c',
- input: files('../include/block/block.h',
- 'coroutines.h'),
+ input: files(
+ '../include/block/block-io.h',
+ '../include/block/block-global-state.h',
+ 'coroutines.h'
+ ),
command: [wrapper_py, '@OUTPUT@', '@INPUT@'])
block_ss.add(block_gen_c)
diff --git a/block/mirror.c b/block/mirror.c
index 69b2c1c697..ce6bc58d1f 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1864,6 +1864,8 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
bool is_none_mode;
BlockDriverState *base;
+ GLOBAL_STATE_CODE();
+
if ((mode == MIRROR_SYNC_MODE_INCREMENTAL) ||
(mode == MIRROR_SYNC_MODE_BITMAP)) {
error_setg(errp, "Sync mode '%s' not supported",
@@ -1889,6 +1891,8 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
bool base_read_only;
BlockJob *job;
+ GLOBAL_STATE_CODE();
+
base_read_only = bdrv_is_read_only(base);
if (base_read_only) {
diff --git a/block/monitor/bitmap-qmp-cmds.c b/block/monitor/bitmap-qmp-cmds.c
index 9f11deec64..972e8a0afc 100644
--- a/block/monitor/bitmap-qmp-cmds.c
+++ b/block/monitor/bitmap-qmp-cmds.c
@@ -56,6 +56,8 @@ BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node,
BlockDriverState *bs;
BdrvDirtyBitmap *bitmap;
+ GLOBAL_STATE_CODE();
+
if (!node) {
error_setg(errp, "Node cannot be NULL");
return NULL;
@@ -155,6 +157,8 @@ BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name,
BdrvDirtyBitmap *bitmap;
AioContext *aio_context;
+ GLOBAL_STATE_CODE();
+
bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp);
if (!bitmap || !bs) {
return NULL;
@@ -261,6 +265,8 @@ BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target,
BlockDirtyBitmapMergeSourceList *lst;
Error *local_err = NULL;
+ GLOBAL_STATE_CODE();
+
dst = block_dirty_bitmap_lookup(node, target, &bs, errp);
if (!dst) {
return NULL;
diff --git a/block/nbd.c b/block/nbd.c
index 5853d85d60..146d25660e 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -313,6 +313,7 @@ int coroutine_fn nbd_co_do_establish_connection(BlockDriverState *bs,
BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
int ret;
bool blocking = nbd_client_connecting_wait(s);
+ IO_CODE();
assert(!s->ioc);
diff --git a/block/parallels.c b/block/parallels.c
index 6ebad2a2bb..e58c828422 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -873,7 +873,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
s->bat_dirty_bmap =
bitmap_new(DIV_ROUND_UP(s->header_size, s->bat_dirty_block));
- /* Disable migration until bdrv_invalidate_cache method is added */
+ /* Disable migration until bdrv_activate method is added */
error_setg(&s->migration_blocker, "The Parallels format used by node '%s' "
"does not support live migration",
bdrv_get_device_or_node_name(bs));
diff --git a/block/snapshot.c b/block/snapshot.c
index ccacda8bd5..d6f53c3065 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -57,6 +57,8 @@ int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info,
QEMUSnapshotInfo *sn_tab, *sn;
int nb_sns, i, ret;
+ GLOBAL_STATE_CODE();
+
ret = -ENOENT;
nb_sns = bdrv_snapshot_list(bs, &sn_tab);
if (nb_sns < 0) {
@@ -105,6 +107,7 @@ bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs,
bool ret = false;
assert(id || name);
+ GLOBAL_STATE_CODE();
nb_sns = bdrv_snapshot_list(bs, &sn_tab);
if (nb_sns < 0) {
@@ -200,6 +203,7 @@ static BlockDriverState *bdrv_snapshot_fallback(BlockDriverState *bs)
int bdrv_can_snapshot(BlockDriverState *bs)
{
BlockDriver *drv = bs->drv;
+ GLOBAL_STATE_CODE();
if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
return 0;
}
@@ -220,6 +224,9 @@ int bdrv_snapshot_create(BlockDriverState *bs,
{
BlockDriver *drv = bs->drv;
BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs);
+
+ GLOBAL_STATE_CODE();
+
if (!drv) {
return -ENOMEDIUM;
}
@@ -240,6 +247,8 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
BdrvChild **fallback_ptr;
int ret, open_ret;
+ GLOBAL_STATE_CODE();
+
if (!drv) {
error_setg(errp, "Block driver is closed");
return -ENOMEDIUM;
@@ -348,6 +357,8 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs);
int ret;
+ GLOBAL_STATE_CODE();
+
if (!drv) {
error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs));
return -ENOMEDIUM;
@@ -380,6 +391,8 @@ int bdrv_snapshot_list(BlockDriverState *bs,
{
BlockDriver *drv = bs->drv;
BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs);
+
+ GLOBAL_STATE_CODE();
if (!drv) {
return -ENOMEDIUM;
}
@@ -419,6 +432,8 @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs,
{
BlockDriver *drv = bs->drv;
+ GLOBAL_STATE_CODE();
+
if (!drv) {
error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs));
return -ENOMEDIUM;
@@ -447,6 +462,8 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
int ret;
Error *local_err = NULL;
+ GLOBAL_STATE_CODE();
+
ret = bdrv_snapshot_load_tmp(bs, id_or_name, NULL, &local_err);
if (ret == -ENOENT || ret == -EINVAL) {
error_free(local_err);
@@ -515,6 +532,8 @@ bool bdrv_all_can_snapshot(bool has_devices, strList *devices,
g_autoptr(GList) bdrvs = NULL;
GList *iterbdrvs;
+ GLOBAL_STATE_CODE();
+
if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
return false;
}
@@ -549,6 +568,8 @@ int bdrv_all_delete_snapshot(const char *name,
g_autoptr(GList) bdrvs = NULL;
GList *iterbdrvs;
+ GLOBAL_STATE_CODE();
+
if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
return -1;
}
@@ -588,6 +609,8 @@ int bdrv_all_goto_snapshot(const char *name,
g_autoptr(GList) bdrvs = NULL;
GList *iterbdrvs;
+ GLOBAL_STATE_CODE();
+
if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
return -1;
}
@@ -622,6 +645,8 @@ int bdrv_all_has_snapshot(const char *name,
g_autoptr(GList) bdrvs = NULL;
GList *iterbdrvs;
+ GLOBAL_STATE_CODE();
+
if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
return -1;
}
@@ -663,6 +688,7 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn,
{
g_autoptr(GList) bdrvs = NULL;
GList *iterbdrvs;
+ GLOBAL_STATE_CODE();
if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
return -1;
@@ -703,6 +729,8 @@ BlockDriverState *bdrv_all_find_vmstate_bs(const char *vmstate_bs,
g_autoptr(GList) bdrvs = NULL;
GList *iterbdrvs;
+ GLOBAL_STATE_CODE();
+
if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
return NULL;
}
diff --git a/block/stream.c b/block/stream.c
index 7c6b173ddd..3acb59fe6a 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -220,6 +220,8 @@ void stream_start(const char *job_id, BlockDriverState *bs,
QDict *opts;
int ret;
+ GLOBAL_STATE_CODE();
+
assert(!(base && bottom));
assert(!(backing_file_str && bottom));
diff --git a/blockdev.c b/blockdev.c
index 42e098b458..e46e831212 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -63,11 +63,13 @@
#include "qemu/main-loop.h"
#include "qemu/throttle-options.h"
+/* Protected by BQL */
QTAILQ_HEAD(, BlockDriverState) monitor_bdrv_states =
QTAILQ_HEAD_INITIALIZER(monitor_bdrv_states);
void bdrv_set_monitor_owned(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
QTAILQ_INSERT_TAIL(&monitor_bdrv_states, bs, monitor_list);
}
@@ -111,6 +113,8 @@ void override_max_devs(BlockInterfaceType type, int max_devs)
BlockBackend *blk;
DriveInfo *dinfo;
+ GLOBAL_STATE_CODE();
+
if (max_devs <= 0) {
return;
}
@@ -140,6 +144,8 @@ void blockdev_mark_auto_del(BlockBackend *blk)
DriveInfo *dinfo = blk_legacy_dinfo(blk);
BlockJob *job;
+ GLOBAL_STATE_CODE();
+
if (!dinfo) {
return;
}
@@ -161,6 +167,7 @@ void blockdev_mark_auto_del(BlockBackend *blk)
void blockdev_auto_del(BlockBackend *blk)
{
DriveInfo *dinfo = blk_legacy_dinfo(blk);
+ GLOBAL_STATE_CODE();
if (dinfo && dinfo->auto_del) {
monitor_remove_blk(blk);
@@ -185,6 +192,8 @@ QemuOpts *drive_add(BlockInterfaceType type, int index, const char *file,
{
QemuOpts *opts;
+ GLOBAL_STATE_CODE();
+
opts = qemu_opts_parse_noisily(qemu_find_opts("drive"), optstr, false);
if (!opts) {
return NULL;
@@ -205,6 +214,8 @@ DriveInfo *drive_get(BlockInterfaceType type, int bus, int unit)
BlockBackend *blk;
DriveInfo *dinfo;
+ GLOBAL_STATE_CODE();
+
for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
dinfo = blk_legacy_dinfo(blk);
if (dinfo && dinfo->type == type
@@ -227,6 +238,8 @@ void drive_check_orphaned(void)
Location loc;
bool orphans = false;
+ GLOBAL_STATE_CODE();
+
for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
dinfo = blk_legacy_dinfo(blk);
/*
@@ -260,6 +273,7 @@ void drive_check_orphaned(void)
DriveInfo *drive_get_by_index(BlockInterfaceType type, int index)
{
+ GLOBAL_STATE_CODE();
return drive_get(type,
drive_index_to_bus_id(type, index),
drive_index_to_unit_id(type, index));
@@ -271,6 +285,8 @@ int drive_get_max_bus(BlockInterfaceType type)
BlockBackend *blk;
DriveInfo *dinfo;
+ GLOBAL_STATE_CODE();
+
max_bus = -1;
for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
dinfo = blk_legacy_dinfo(blk);
@@ -628,6 +644,7 @@ BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
{
int bdrv_flags = 0;
+ GLOBAL_STATE_CODE();
/* bdrv_open() defaults to the values in bdrv_flags (for compatibility
* with other callers) rather than what we want as the real defaults.
* Apply the defaults here instead. */
@@ -646,6 +663,7 @@ void blockdev_close_all_bdrv_states(void)
{
BlockDriverState *bs, *next_bs;
+ GLOBAL_STATE_CODE();
QTAILQ_FOREACH_SAFE(bs, &monitor_bdrv_states, monitor_list, next_bs) {
AioContext *ctx = bdrv_get_aio_context(bs);
@@ -658,6 +676,7 @@ void blockdev_close_all_bdrv_states(void)
/* Iterates over the list of monitor-owned BlockDriverStates */
BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs)
{
+ GLOBAL_STATE_CODE();
return bs ? QTAILQ_NEXT(bs, monitor_list)
: QTAILQ_FIRST(&monitor_bdrv_states);
}
@@ -754,6 +773,8 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type,
const char *filename;
int i;
+ GLOBAL_STATE_CODE();
+
/* Change legacy command line options into QMP ones */
static const struct {
const char *from;
@@ -1174,6 +1195,8 @@ typedef struct BlkActionState BlkActionState;
*
* Only prepare() may fail. In a single transaction, only one of commit() or
* abort() will be called. clean() will always be called if it is present.
+ *
+ * Always run under BQL.
*/
typedef struct BlkActionOps {
size_t instance_size;
@@ -2283,6 +2306,8 @@ static TransactionProperties *get_transaction_properties(
/*
* 'Atomic' group operations. The operations are performed as a set, and if
* any fail then we roll back all operations in the group.
+ *
+ * Always run under BQL.
*/
void qmp_transaction(TransactionActionList *dev_list,
bool has_props,
@@ -2294,6 +2319,8 @@ void qmp_transaction(TransactionActionList *dev_list,
BlkActionState *state, *next;
Error *local_err = NULL;
+ GLOBAL_STATE_CODE();
+
QTAILQ_HEAD(, BlkActionState) snap_bdrv_states;
QTAILQ_INIT(&snap_bdrv_states);
@@ -3596,6 +3623,8 @@ void qmp_blockdev_del(const char *node_name, Error **errp)
AioContext *aio_context;
BlockDriverState *bs;
+ GLOBAL_STATE_CODE();
+
bs = bdrv_find_node(node_name);
if (!bs) {
error_setg(errp, "Failed to find node with node-name='%s'", node_name);
diff --git a/blockjob.c b/blockjob.c
index 10815a89fe..4868453d74 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -62,6 +62,7 @@ static bool is_block_job(Job *job)
BlockJob *block_job_next(BlockJob *bjob)
{
Job *job = bjob ? &bjob->job : NULL;
+ GLOBAL_STATE_CODE();
do {
job = job_next(job);
@@ -73,6 +74,7 @@ BlockJob *block_job_next(BlockJob *bjob)
BlockJob *block_job_get(const char *id)
{
Job *job = job_get(id);
+ GLOBAL_STATE_CODE();
if (job && is_block_job(job)) {
return container_of(job, BlockJob, job);
@@ -84,6 +86,7 @@ BlockJob *block_job_get(const char *id)
void block_job_free(Job *job)
{
BlockJob *bjob = container_of(job, BlockJob, job);
+ GLOBAL_STATE_CODE();
block_job_remove_all_bdrv(bjob);
ratelimit_destroy(&bjob->limit);
@@ -183,6 +186,7 @@ static const BdrvChildClass child_job = {
void block_job_remove_all_bdrv(BlockJob *job)
{
+ GLOBAL_STATE_CODE();
/*
* bdrv_root_unref_child() may reach child_job_[can_]set_aio_ctx(),
* which will also traverse job->nodes, so consume the list one by
@@ -205,6 +209,7 @@ void block_job_remove_all_bdrv(BlockJob *job)
bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs)
{
GSList *el;
+ GLOBAL_STATE_CODE();
for (el = job->nodes; el; el = el->next) {
BdrvChild *c = el->data;
@@ -221,6 +226,7 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
{
BdrvChild *c;
bool need_context_ops;
+ GLOBAL_STATE_CODE();
bdrv_ref(bs);
@@ -270,6 +276,8 @@ bool block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
const BlockJobDriver *drv = block_job_driver(job);
int64_t old_speed = job->speed;
+ GLOBAL_STATE_CODE();
+
if (job_apply_verb(&job->job, JOB_VERB_SET_SPEED, errp) < 0) {
return false;
}
@@ -299,6 +307,7 @@ bool block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
int64_t block_job_ratelimit_get_delay(BlockJob *job, uint64_t n)
{
+ IO_CODE();
return ratelimit_calculate_delay(&job->limit, n);
}
@@ -307,6 +316,8 @@ BlockJobInfo *block_job_query(BlockJob *job, Error **errp)
BlockJobInfo *info;
uint64_t progress_current, progress_total;
+ GLOBAL_STATE_CODE();
+
if (block_job_is_internal(job)) {
error_setg(errp, "Cannot query QEMU internal jobs");
return NULL;
@@ -434,6 +445,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
{
BlockJob *job;
int ret;
+ GLOBAL_STATE_CODE();
if (job_id == NULL && !(flags & JOB_INTERNAL)) {
job_id = bdrv_get_device_name(bs);
@@ -488,6 +500,7 @@ fail:
void block_job_iostatus_reset(BlockJob *job)
{
+ GLOBAL_STATE_CODE();
if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
return;
}
@@ -498,6 +511,7 @@ void block_job_iostatus_reset(BlockJob *job)
void block_job_user_resume(Job *job)
{
BlockJob *bjob = container_of(job, BlockJob, job);
+ GLOBAL_STATE_CODE();
block_job_iostatus_reset(bjob);
}
@@ -505,6 +519,7 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err,
int is_read, int error)
{
BlockErrorAction action;
+ IO_CODE();
switch (on_err) {
case BLOCKDEV_ON_ERROR_ENOSPC:
@@ -543,5 +558,6 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err,
AioContext *block_job_get_aio_context(BlockJob *job)
{
+ GLOBAL_STATE_CODE();
return job->job.aio_context;
}
diff --git a/docs/tools/qemu-storage-daemon.rst b/docs/tools/qemu-storage-daemon.rst
index 878e6a5c5c..8b97592663 100644
--- a/docs/tools/qemu-storage-daemon.rst
+++ b/docs/tools/qemu-storage-daemon.rst
@@ -154,6 +154,13 @@ Standard options:
created but before accepting connections. The daemon has started successfully
when the pid file is written and clients may begin connecting.
+.. option:: --daemonize
+
+ Daemonize the process. The parent process will exit once startup is complete
+ (i.e., after the pid file has been or would have been written) or failure
+ occurs. Its exit code reflects whether the child has started up successfully
+ or failed to do so.
+
Examples
--------
Launch the daemon with QMP monitor socket ``qmp.sock`` so clients can execute
diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c
index 81f9f971d8..74c7190302 100644
--- a/hw/block/pflash_cfi01.c
+++ b/hw/block/pflash_cfi01.c
@@ -1023,7 +1023,7 @@ static void postload_update_cb(void *opaque, bool running, RunState state)
{
PFlashCFI01 *pfl = opaque;
- /* This is called after bdrv_invalidate_cache_all. */
+ /* This is called after bdrv_activate_all. */
qemu_del_vm_change_state_handler(pfl->vmstate);
pfl->vmstate = NULL;
diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c
index fbfdf47e26..18b43be7f6 100644
--- a/hw/nvram/spapr_nvram.c
+++ b/hw/nvram/spapr_nvram.c
@@ -219,7 +219,7 @@ static void postload_update_cb(void *opaque, bool running, RunState state)
{
SpaprNvram *nvram = opaque;
- /* This is called after bdrv_invalidate_cache_all. */
+ /* This is called after bdrv_activate_all. */
qemu_del_vm_change_state_handler(nvram->vmstate);
nvram->vmstate = NULL;
diff --git a/include/block/block-common.h b/include/block/block-common.h
new file mode 100644
index 0000000000..0c5dc4a86a
--- /dev/null
+++ b/include/block/block-common.h
@@ -0,0 +1,418 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_COMMON_H
+#define BLOCK_COMMON_H
+
+#include "block/aio.h"
+#include "block/aio-wait.h"
+#include "qemu/iov.h"
+#include "qemu/coroutine.h"
+#include "block/accounting.h"
+#include "block/dirty-bitmap.h"
+#include "block/blockjob.h"
+#include "qemu/hbitmap.h"
+#include "qemu/transactions.h"
+
+/*
+ * generated_co_wrapper
+ *
+ * Function specifier, which does nothing but mark functions to be
+ * generated by scripts/block-coroutine-wrapper.py
+ *
+ * Read more in docs/devel/block-coroutine-wrapper.rst
+ */
+#define generated_co_wrapper
+
+/* block.c */
+typedef struct BlockDriver BlockDriver;
+typedef struct BdrvChild BdrvChild;
+typedef struct BdrvChildClass BdrvChildClass;
+
+typedef struct BlockDriverInfo {
+ /* in bytes, 0 if irrelevant */
+ int cluster_size;
+ /* offset at which the VM state can be saved (0 if not possible) */
+ int64_t vm_state_offset;
+ bool is_dirty;
+ /*
+ * True if this block driver only supports compressed writes
+ */
+ bool needs_compressed_writes;
+} BlockDriverInfo;
+
+typedef struct BlockFragInfo {
+ uint64_t allocated_clusters;
+ uint64_t total_clusters;
+ uint64_t fragmented_clusters;
+ uint64_t compressed_clusters;
+} BlockFragInfo;
+
+typedef enum {
+ BDRV_REQ_COPY_ON_READ = 0x1,
+ BDRV_REQ_ZERO_WRITE = 0x2,
+
+ /*
+ * The BDRV_REQ_MAY_UNMAP flag is used in write_zeroes requests to indicate
+ * that the block driver should unmap (discard) blocks if it is guaranteed
+ * that the result will read back as zeroes. The flag is only passed to the
+ * driver if the block device is opened with BDRV_O_UNMAP.
+ */
+ BDRV_REQ_MAY_UNMAP = 0x4,
+
+ BDRV_REQ_FUA = 0x10,
+ BDRV_REQ_WRITE_COMPRESSED = 0x20,
+
+ /*
+ * Signifies that this write request will not change the visible disk
+ * content.
+ */
+ BDRV_REQ_WRITE_UNCHANGED = 0x40,
+
+ /*
+ * Forces request serialisation. Use only with write requests.
+ */
+ BDRV_REQ_SERIALISING = 0x80,
+
+ /*
+ * Execute the request only if the operation can be offloaded or otherwise
+ * be executed efficiently, but return an error instead of using a slow
+ * fallback.
+ */
+ BDRV_REQ_NO_FALLBACK = 0x100,
+
+ /*
+ * BDRV_REQ_PREFETCH makes sense only in the context of copy-on-read
+ * (i.e., together with the BDRV_REQ_COPY_ON_READ flag or when a COR
+ * filter is involved), in which case it signals that the COR operation
+ * need not read the data into memory (qiov) but only ensure they are
+ * copied to the top layer (i.e., that COR operation is done).
+ */
+ BDRV_REQ_PREFETCH = 0x200,
+
+ /*
+ * If we need to wait for other requests, just fail immediately. Used
+ * only together with BDRV_REQ_SERIALISING.
+ */
+ BDRV_REQ_NO_WAIT = 0x400,
+
+ /* Mask of valid flags */
+ BDRV_REQ_MASK = 0x7ff,
+} BdrvRequestFlags;
+
+#define BDRV_O_NO_SHARE 0x0001 /* don't share permissions */
+#define BDRV_O_RDWR 0x0002
+#define BDRV_O_RESIZE 0x0004 /* request permission for resizing the node */
+#define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save
+ writes in a snapshot */
+#define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */
+#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */
+#define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the
+ thread pool */
+#define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */
+#define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */
+#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */
+#define BDRV_O_INACTIVE 0x0800 /* consistency hint for migration handoff */
+#define BDRV_O_CHECK 0x1000 /* open solely for consistency check */
+#define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */
+#define BDRV_O_UNMAP 0x4000 /* execute guest UNMAP/TRIM operations */
+#define BDRV_O_PROTOCOL 0x8000 /* if no block driver is explicitly given:
+ select an appropriate protocol driver,
+ ignoring the format layer */
+#define BDRV_O_NO_IO 0x10000 /* don't initialize for I/O */
+#define BDRV_O_AUTO_RDONLY 0x20000 /* degrade to read-only if opening
+ read-write fails */
+#define BDRV_O_IO_URING 0x40000 /* use io_uring instead of the thread pool */
+
+#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH)
+
+
+/* Option names of options parsed by the block layer */
+
+#define BDRV_OPT_CACHE_WB "cache.writeback"
+#define BDRV_OPT_CACHE_DIRECT "cache.direct"
+#define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush"
+#define BDRV_OPT_READ_ONLY "read-only"
+#define BDRV_OPT_AUTO_READ_ONLY "auto-read-only"
+#define BDRV_OPT_DISCARD "discard"
+#define BDRV_OPT_FORCE_SHARE "force-share"
+
+
+#define BDRV_SECTOR_BITS 9
+#define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS)
+
+#define BDRV_REQUEST_MAX_SECTORS MIN_CONST(SIZE_MAX >> BDRV_SECTOR_BITS, \
+ INT_MAX >> BDRV_SECTOR_BITS)
+#define BDRV_REQUEST_MAX_BYTES (BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS)
+
+/*
+ * We want allow aligning requests and disk length up to any 32bit alignment
+ * and don't afraid of overflow.
+ * To achieve it, and in the same time use some pretty number as maximum disk
+ * size, let's define maximum "length" (a limit for any offset/bytes request and
+ * for disk size) to be the greatest power of 2 less than INT64_MAX.
+ */
+#define BDRV_MAX_ALIGNMENT (1L << 30)
+#define BDRV_MAX_LENGTH (QEMU_ALIGN_DOWN(INT64_MAX, BDRV_MAX_ALIGNMENT))
+
+/*
+ * Allocation status flags for bdrv_block_status() and friends.
+ *
+ * Public flags:
+ * BDRV_BLOCK_DATA: allocation for data at offset is tied to this layer
+ * BDRV_BLOCK_ZERO: offset reads as zero
+ * BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data
+ * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this
+ * layer rather than any backing, set by block layer
+ * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this
+ * layer, set by block layer
+ *
+ * Internal flags:
+ * BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request
+ * that the block layer recompute the answer from the returned
+ * BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID.
+ * BDRV_BLOCK_RECURSE: request that the block layer will recursively search for
+ * zeroes in file child of current block node inside
+ * returned region. Only valid together with both
+ * BDRV_BLOCK_DATA and BDRV_BLOCK_OFFSET_VALID. Should not
+ * appear with BDRV_BLOCK_ZERO.
+ *
+ * If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the
+ * host offset within the returned BDS that is allocated for the
+ * corresponding raw guest data. However, whether that offset
+ * actually contains data also depends on BDRV_BLOCK_DATA, as follows:
+ *
+ * DATA ZERO OFFSET_VALID
+ * t t t sectors read as zero, returned file is zero at offset
+ * t f t sectors read as valid from file at offset
+ * f t t sectors preallocated, read as zero, returned file not
+ * necessarily zero at offset
+ * f f t sectors preallocated but read from backing_hd,
+ * returned file contains garbage at offset
+ * t t f sectors preallocated, read as zero, unknown offset
+ * t f f sectors read from unknown file or offset
+ * f t f not allocated or unknown offset, read as zero
+ * f f f not allocated or unknown offset, read from backing_hd
+ */
+#define BDRV_BLOCK_DATA 0x01
+#define BDRV_BLOCK_ZERO 0x02
+#define BDRV_BLOCK_OFFSET_VALID 0x04
+#define BDRV_BLOCK_RAW 0x08
+#define BDRV_BLOCK_ALLOCATED 0x10
+#define BDRV_BLOCK_EOF 0x20
+#define BDRV_BLOCK_RECURSE 0x40
+
+typedef QTAILQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
+
+typedef struct BDRVReopenState {
+ BlockDriverState *bs;
+ int flags;
+ BlockdevDetectZeroesOptions detect_zeroes;
+ bool backing_missing;
+ BlockDriverState *old_backing_bs; /* keep pointer for permissions update */
+ BlockDriverState *old_file_bs; /* keep pointer for permissions update */
+ QDict *options;
+ QDict *explicit_options;
+ void *opaque;
+} BDRVReopenState;
+
+/*
+ * Block operation types
+ */
+typedef enum BlockOpType {
+ BLOCK_OP_TYPE_BACKUP_SOURCE,
+ BLOCK_OP_TYPE_BACKUP_TARGET,
+ BLOCK_OP_TYPE_CHANGE,
+ BLOCK_OP_TYPE_COMMIT_SOURCE,
+ BLOCK_OP_TYPE_COMMIT_TARGET,
+ BLOCK_OP_TYPE_DATAPLANE,
+ BLOCK_OP_TYPE_DRIVE_DEL,
+ BLOCK_OP_TYPE_EJECT,
+ BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT,
+ BLOCK_OP_TYPE_INTERNAL_SNAPSHOT,
+ BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE,
+ BLOCK_OP_TYPE_MIRROR_SOURCE,
+ BLOCK_OP_TYPE_MIRROR_TARGET,
+ BLOCK_OP_TYPE_RESIZE,
+ BLOCK_OP_TYPE_STREAM,
+ BLOCK_OP_TYPE_REPLACE,
+ BLOCK_OP_TYPE_MAX,
+} BlockOpType;
+
+/* Block node permission constants */
+enum {
+ /**
+ * A user that has the "permission" of consistent reads is guaranteed that
+ * their view of the contents of the block device is complete and
+ * self-consistent, representing the contents of a disk at a specific
+ * point.
+ *
+ * For most block devices (including their backing files) this is true, but
+ * the property cannot be maintained in a few situations like for
+ * intermediate nodes of a commit block job.
+ */
+ BLK_PERM_CONSISTENT_READ = 0x01,
+
+ /** This permission is required to change the visible disk contents. */
+ BLK_PERM_WRITE = 0x02,
+
+ /**
+ * This permission (which is weaker than BLK_PERM_WRITE) is both enough and
+ * required for writes to the block node when the caller promises that
+ * the visible disk content doesn't change.
+ *
+ * As the BLK_PERM_WRITE permission is strictly stronger, either is
+ * sufficient to perform an unchanging write.
+ */
+ BLK_PERM_WRITE_UNCHANGED = 0x04,
+
+ /** This permission is required to change the size of a block node. */
+ BLK_PERM_RESIZE = 0x08,
+
+ /**
+ * There was a now-removed bit BLK_PERM_GRAPH_MOD, with value of 0x10. QEMU
+ * 6.1 and earlier may still lock the corresponding byte in block/file-posix
+ * locking. So, implementing some new permission should be very careful to
+ * not interfere with this old unused thing.
+ */
+
+ BLK_PERM_ALL = 0x0f,
+
+ DEFAULT_PERM_PASSTHROUGH = BLK_PERM_CONSISTENT_READ
+ | BLK_PERM_WRITE
+ | BLK_PERM_WRITE_UNCHANGED
+ | BLK_PERM_RESIZE,
+
+ DEFAULT_PERM_UNCHANGED = BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH,
+};
+
+/*
+ * Flags that parent nodes assign to child nodes to specify what kind of
+ * role(s) they take.
+ *
+ * At least one of DATA, METADATA, FILTERED, or COW must be set for
+ * every child.
+ */
+enum BdrvChildRoleBits {
+ /*
+ * This child stores data.
+ * Any node may have an arbitrary number of such children.
+ */
+ BDRV_CHILD_DATA = (1 << 0),
+
+ /*
+ * This child stores metadata.
+ * Any node may have an arbitrary number of metadata-storing
+ * children.
+ */
+ BDRV_CHILD_METADATA = (1 << 1),
+
+ /*
+ * A child that always presents exactly the same visible data as
+ * the parent, e.g. by virtue of the parent forwarding all reads
+ * and writes.
+ * This flag is mutually exclusive with DATA, METADATA, and COW.
+ * Any node may have at most one filtered child at a time.
+ */
+ BDRV_CHILD_FILTERED = (1 << 2),
+
+ /*
+ * Child from which to read all data that isn't allocated in the
+ * parent (i.e., the backing child); such data is copied to the
+ * parent through COW (and optionally COR).
+ * This field is mutually exclusive with DATA, METADATA, and
+ * FILTERED.
+ * Any node may have at most one such backing child at a time.
+ */
+ BDRV_CHILD_COW = (1 << 3),
+
+ /*
+ * The primary child. For most drivers, this is the child whose
+ * filename applies best to the parent node.
+ * Any node may have at most one primary child at a time.
+ */
+ BDRV_CHILD_PRIMARY = (1 << 4),
+
+ /* Useful combination of flags */
+ BDRV_CHILD_IMAGE = BDRV_CHILD_DATA
+ | BDRV_CHILD_METADATA
+ | BDRV_CHILD_PRIMARY,
+};
+
+/* Mask of BdrvChildRoleBits values */
+typedef unsigned int BdrvChildRole;
+
+typedef struct BdrvCheckResult {
+ int corruptions;
+ int leaks;
+ int check_errors;
+ int corruptions_fixed;
+ int leaks_fixed;
+ int64_t image_end_offset;
+ BlockFragInfo bfi;
+} BdrvCheckResult;
+
+typedef enum {
+ BDRV_FIX_LEAKS = 1,
+ BDRV_FIX_ERRORS = 2,
+} BdrvCheckMode;
+
+typedef struct BlockSizes {
+ uint32_t phys;
+ uint32_t log;
+} BlockSizes;
+
+typedef struct HDGeometry {
+ uint32_t heads;
+ uint32_t sectors;
+ uint32_t cylinders;
+} HDGeometry;
+
+/*
+ * Common functions that are neither I/O nor Global State.
+ *
+ * These functions must never call any function from other categories
+ * (I/O, "I/O or GS", Global State) except this one, but can be invoked by
+ * all of them.
+ */
+
+char *bdrv_perm_names(uint64_t perm);
+uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm);
+
+void bdrv_init_with_whitelist(void);
+bool bdrv_uses_whitelist(void);
+int bdrv_is_whitelisted(BlockDriver *drv, bool read_only);
+
+int bdrv_parse_aio(const char *mode, int *flags);
+int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough);
+int bdrv_parse_discard_flags(const char *mode, int *flags);
+
+int path_has_protocol(const char *path);
+int path_is_absolute(const char *path);
+char *path_combine(const char *base_path, const char *filename);
+
+char *bdrv_get_full_backing_filename_from_filename(const char *backed,
+ const char *backing,
+ Error **errp);
+
+#endif /* BLOCK_COMMON_H */
diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h
new file mode 100644
index 0000000000..25bb69bbef
--- /dev/null
+++ b/include/block/block-global-state.h
@@ -0,0 +1,253 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_GLOBAL_STATE_H
+#define BLOCK_GLOBAL_STATE_H
+
+#include "block-common.h"
+
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * If a function modifies the graph, it also uses drain and/or
+ * aio_context_acquire/release to be sure it has unique access.
+ * aio_context locking is needed together with BQL because of
+ * the thread-safe I/O API that concurrently runs and accesses
+ * the graph without the BQL.
+ *
+ * It is important to note that not all of these functions are
+ * necessarily limited to running under the BQL, but they would
+ * require additional auditing and many small thread-safety changes
+ * to move them into the I/O API. Often it's not worth doing that
+ * work since the APIs are only used with the BQL held at the
+ * moment, so they have been placed in the GS API (for now).
+ *
+ * These functions can call any function from this and other categories
+ * (I/O, "I/O or GS", Common), but must be invoked only by other GS APIs.
+ *
+ * All functions in this header must use the macro
+ * GLOBAL_STATE_CODE();
+ * to catch when they are accidentally called without the BQL.
+ */
+
+void bdrv_init(void);
+BlockDriver *bdrv_find_protocol(const char *filename,
+ bool allow_protocol_prefix,
+ Error **errp);
+BlockDriver *bdrv_find_format(const char *format_name);
+int bdrv_create(BlockDriver *drv, const char* filename,
+ QemuOpts *opts, Error **errp);
+int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp);
+
+BlockDriverState *bdrv_new(void);
+int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
+ Error **errp);
+int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
+ Error **errp);
+int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
+ Error **errp);
+BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options,
+ int flags, Error **errp);
+int bdrv_drop_filter(BlockDriverState *bs, Error **errp);
+
+BdrvChild *bdrv_open_child(const char *filename,
+ QDict *options, const char *bdref_key,
+ BlockDriverState *parent,
+ const BdrvChildClass *child_class,
+ BdrvChildRole child_role,
+ bool allow_none, Error **errp);
+BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp);
+int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
+ Error **errp);
+int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
+ const char *bdref_key, Error **errp);
+BlockDriverState *bdrv_open(const char *filename, const char *reference,
+ QDict *options, int flags, Error **errp);
+BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv,
+ const char *node_name,
+ QDict *options, int flags,
+ Error **errp);
+BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
+ int flags, Error **errp);
+BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
+ BlockDriverState *bs, QDict *options,
+ bool keep_old_opts);
+void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue);
+int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
+int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
+ Error **errp);
+int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
+ Error **errp);
+BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
+ const char *backing_file);
+void bdrv_refresh_filename(BlockDriverState *bs);
+void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp);
+int bdrv_commit(BlockDriverState *bs);
+int bdrv_make_empty(BdrvChild *c, Error **errp);
+int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file,
+ const char *backing_fmt, bool warn);
+void bdrv_register(BlockDriver *bdrv);
+int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
+ const char *backing_file_str);
+BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
+ BlockDriverState *bs);
+BlockDriverState *bdrv_find_base(BlockDriverState *bs);
+bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base,
+ Error **errp);
+int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base,
+ Error **errp);
+void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base);
+
+/*
+ * The units of offset and total_work_size may be chosen arbitrarily by the
+ * block driver; total_work_size may change during the course of the amendment
+ * operation
+ */
+typedef void BlockDriverAmendStatusCB(BlockDriverState *bs, int64_t offset,
+ int64_t total_work_size, void *opaque);
+int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts,
+ BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
+ bool force,
+ Error **errp);
+
+/* check if a named node can be replaced when doing drive-mirror */
+BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
+ const char *node_name, Error **errp);
+
+int bdrv_activate(BlockDriverState *bs, Error **errp);
+void bdrv_activate_all(Error **errp);
+int bdrv_inactivate_all(void);
+
+int bdrv_flush_all(void);
+void bdrv_close_all(void);
+void bdrv_drain_all_begin(void);
+void bdrv_drain_all_end(void);
+void bdrv_drain_all(void);
+
+int bdrv_has_zero_init_1(BlockDriverState *bs);
+int bdrv_has_zero_init(BlockDriverState *bs);
+BlockDriverState *bdrv_find_node(const char *node_name);
+BlockDeviceInfoList *bdrv_named_nodes_list(bool flat, Error **errp);
+XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp);
+BlockDriverState *bdrv_lookup_bs(const char *device,
+ const char *node_name,
+ Error **errp);
+bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base);
+BlockDriverState *bdrv_next_node(BlockDriverState *bs);
+BlockDriverState *bdrv_next_all_states(BlockDriverState *bs);
+
+typedef struct BdrvNextIterator {
+ enum {
+ BDRV_NEXT_BACKEND_ROOTS,
+ BDRV_NEXT_MONITOR_OWNED,
+ } phase;
+ BlockBackend *blk;
+ BlockDriverState *bs;
+} BdrvNextIterator;
+
+BlockDriverState *bdrv_first(BdrvNextIterator *it);
+BlockDriverState *bdrv_next(BdrvNextIterator *it);
+void bdrv_next_cleanup(BdrvNextIterator *it);
+
+BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs);
+void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
+ void *opaque, bool read_only);
+int bdrv_get_flags(BlockDriverState *bs);
+char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp);
+char *bdrv_dirname(BlockDriverState *bs, Error **errp);
+
+void bdrv_img_create(const char *filename, const char *fmt,
+ const char *base_filename, const char *base_fmt,
+ char *options, uint64_t img_size, int flags,
+ bool quiet, Error **errp);
+
+void bdrv_ref(BlockDriverState *bs);
+void bdrv_unref(BlockDriverState *bs);
+void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
+BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
+ BlockDriverState *child_bs,
+ const char *child_name,
+ const BdrvChildClass *child_class,
+ BdrvChildRole child_role,
+ Error **errp);
+
+bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp);
+void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason);
+void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason);
+void bdrv_op_block_all(BlockDriverState *bs, Error *reason);
+void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason);
+bool bdrv_op_blocker_is_empty(BlockDriverState *bs);
+
+int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
+ const char *tag);
+int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag);
+int bdrv_debug_resume(BlockDriverState *bs, const char *tag);
+bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
+
+/**
+ * Locks the AioContext of @bs if it's not the current AioContext. This avoids
+ * double locking which could lead to deadlocks: This is a coroutine_fn, so we
+ * know we already own the lock of the current AioContext.
+ *
+ * May only be called in the main thread.
+ */
+void coroutine_fn bdrv_co_lock(BlockDriverState *bs);
+
+/**
+ * Unlocks the AioContext of @bs if it's not the current AioContext.
+ */
+void coroutine_fn bdrv_co_unlock(BlockDriverState *bs);
+
+void bdrv_set_aio_context_ignore(BlockDriverState *bs,
+ AioContext *new_context, GSList **ignore);
+int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
+ Error **errp);
+int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
+ BdrvChild *ignore_child, Error **errp);
+bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx,
+ GSList **ignore, Error **errp);
+bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx,
+ GSList **ignore, Error **errp);
+AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c);
+
+int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz);
+int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo);
+
+void bdrv_add_child(BlockDriverState *parent, BlockDriverState *child,
+ Error **errp);
+void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp);
+
+/**
+ *
+ * bdrv_register_buf/bdrv_unregister_buf:
+ *
+ * Register/unregister a buffer for I/O. For example, VFIO drivers are
+ * interested to know the memory areas that would later be used for I/O, so
+ * that they can prepare IOMMU mapping etc., to get better performance.
+ */
+void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size);
+void bdrv_unregister_buf(BlockDriverState *bs, void *host);
+
+void bdrv_cancel_in_flight(BlockDriverState *bs);
+
+#endif /* BLOCK_GLOBAL_STATE_H */
diff --git a/include/block/block-io.h b/include/block/block-io.h
new file mode 100644
index 0000000000..5e3f346806
--- /dev/null
+++ b/include/block/block-io.h
@@ -0,0 +1,368 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_IO_H
+#define BLOCK_IO_H
+
+#include "block-common.h"
+
+/*
+ * I/O API functions. These functions are thread-safe, and therefore
+ * can run in any thread as long as the thread has called
+ * aio_context_acquire/release().
+ *
+ * These functions can only call functions from I/O and Common categories,
+ * but can be invoked by GS, "I/O or GS" and I/O APIs.
+ *
+ * All functions in this category must use the macro
+ * IO_CODE();
+ * to catch when they are accidentally called by the wrong API.
+ */
+
+int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
+ int64_t bytes, BdrvRequestFlags flags);
+int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags);
+int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int64_t bytes);
+int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf,
+ int64_t bytes);
+int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
+ const void *buf, int64_t bytes);
+/*
+ * Efficiently zero a region of the disk image. Note that this is a regular
+ * I/O request like read or write and should have a reasonable size. This
+ * function is not suitable for zeroing the entire image in a single request
+ * because it may allocate memory for the entire region.
+ */
+int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
+ int64_t bytes, BdrvRequestFlags flags);
+
+int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
+ PreallocMode prealloc, BdrvRequestFlags flags,
+ Error **errp);
+
+int64_t bdrv_nb_sectors(BlockDriverState *bs);
+int64_t bdrv_getlength(BlockDriverState *bs);
+int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
+BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
+ BlockDriverState *in_bs, Error **errp);
+void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
+int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp);
+void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs);
+
+
+/* async block I/O */
+void bdrv_aio_cancel(BlockAIOCB *acb);
+void bdrv_aio_cancel_async(BlockAIOCB *acb);
+
+/* sg packet commands */
+int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf);
+
+/* Ensure contents are flushed to disk. */
+int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
+
+int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
+bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
+int bdrv_block_status(BlockDriverState *bs, int64_t offset,
+ int64_t bytes, int64_t *pnum, int64_t *map,
+ BlockDriverState **file);
+int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
+ int64_t offset, int64_t bytes, int64_t *pnum,
+ int64_t *map, BlockDriverState **file);
+int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ int64_t *pnum);
+int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
+ bool include_base, int64_t offset, int64_t bytes,
+ int64_t *pnum);
+int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset,
+ int64_t bytes);
+
+int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
+ bool ignore_allow_rdw, Error **errp);
+int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
+ Error **errp);
+bool bdrv_is_read_only(BlockDriverState *bs);
+bool bdrv_is_writable(BlockDriverState *bs);
+bool bdrv_is_sg(BlockDriverState *bs);
+bool bdrv_is_inserted(BlockDriverState *bs);
+void bdrv_lock_medium(BlockDriverState *bs, bool locked);
+void bdrv_eject(BlockDriverState *bs, bool eject_flag);
+const char *bdrv_get_format_name(BlockDriverState *bs);
+
+bool bdrv_supports_compressed_writes(BlockDriverState *bs);
+const char *bdrv_get_node_name(const BlockDriverState *bs);
+const char *bdrv_get_device_name(const BlockDriverState *bs);
+const char *bdrv_get_device_or_node_name(const BlockDriverState *bs);
+int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
+ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs,
+ Error **errp);
+BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs);
+void bdrv_round_to_clusters(BlockDriverState *bs,
+ int64_t offset, int64_t bytes,
+ int64_t *cluster_offset,
+ int64_t *cluster_bytes);
+
+void bdrv_get_backing_filename(BlockDriverState *bs,
+ char *filename, int filename_size);
+
+int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
+ int64_t pos, int size);
+
+int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
+ int64_t pos, int size);
+
+/*
+ * Returns the alignment in bytes that is required so that no bounce buffer
+ * is required throughout the stack
+ */
+size_t bdrv_min_mem_align(BlockDriverState *bs);
+/* Returns optimal alignment in bytes for bounce buffer */
+size_t bdrv_opt_mem_align(BlockDriverState *bs);
+void *qemu_blockalign(BlockDriverState *bs, size_t size);
+void *qemu_blockalign0(BlockDriverState *bs, size_t size);
+void *qemu_try_blockalign(BlockDriverState *bs, size_t size);
+void *qemu_try_blockalign0(BlockDriverState *bs, size_t size);
+bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
+
+void bdrv_enable_copy_on_read(BlockDriverState *bs);
+void bdrv_disable_copy_on_read(BlockDriverState *bs);
+
+void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event);
+
+#define BLKDBG_EVENT(child, evt) \
+ do { \
+ if (child) { \
+ bdrv_debug_event(child->bs, evt); \
+ } \
+ } while (0)
+
+/**
+ * bdrv_get_aio_context:
+ *
+ * Returns: the currently bound #AioContext
+ */
+AioContext *bdrv_get_aio_context(BlockDriverState *bs);
+
+/**
+ * Move the current coroutine to the AioContext of @bs and return the old
+ * AioContext of the coroutine. Increase bs->in_flight so that draining @bs
+ * will wait for the operation to proceed until the corresponding
+ * bdrv_co_leave().
+ *
+ * Consequently, you can't call drain inside a bdrv_co_enter/leave() section as
+ * this will deadlock.
+ */
+AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs);
+
+/**
+ * Ends a section started by bdrv_co_enter(). Move the current coroutine back
+ * to old_ctx and decrease bs->in_flight again.
+ */
+void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx);
+
+/**
+ * Transfer control to @co in the aio context of @bs
+ */
+void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co);
+
+AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c);
+
+void bdrv_io_plug(BlockDriverState *bs);
+void bdrv_io_unplug(BlockDriverState *bs);
+
+bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
+ uint32_t granularity, Error **errp);
+
+/**
+ *
+ * bdrv_co_copy_range:
+ *
+ * Do offloaded copy between two children. If the operation is not implemented
+ * by the driver, or if the backend storage doesn't support it, a negative
+ * error code will be returned.
+ *
+ * Note: block layer doesn't emulate or fallback to a bounce buffer approach
+ * because usually the caller shouldn't attempt offloaded copy any more (e.g.
+ * calling copy_file_range(2)) after the first error, thus it should fall back
+ * to a read+write path in the caller level.
+ *
+ * @src: Source child to copy data from
+ * @src_offset: offset in @src image to read data
+ * @dst: Destination child to copy data to
+ * @dst_offset: offset in @dst image to write data
+ * @bytes: number of bytes to copy
+ * @flags: request flags. Supported flags:
+ * BDRV_REQ_ZERO_WRITE - treat the @src range as zero data and do zero
+ * write on @dst as if bdrv_co_pwrite_zeroes is
+ * called. Used to simplify caller code, or
+ * during BlockDriver.bdrv_co_copy_range_from()
+ * recursion.
+ * BDRV_REQ_NO_SERIALISING - do not serialize with other overlapping
+ * requests currently in flight.
+ *
+ * Returns: 0 if succeeded; negative error code if failed.
+ **/
+int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
+ BdrvChild *dst, int64_t dst_offset,
+ int64_t bytes, BdrvRequestFlags read_flags,
+ BdrvRequestFlags write_flags);
+
+/**
+ * bdrv_drained_end_no_poll:
+ *
+ * Same as bdrv_drained_end(), but do not poll for the subgraph to
+ * actually become unquiesced. Therefore, no graph changes will occur
+ * with this function.
+ *
+ * *drained_end_counter is incremented for every background operation
+ * that is scheduled, and will be decremented for every operation once
+ * it settles. The caller must poll until it reaches 0. The counter
+ * should be accessed using atomic operations only.
+ */
+void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter);
+
+
+/*
+ * "I/O or GS" API functions. These functions can run without
+ * the BQL, but only in one specific iothread/main loop.
+ *
+ * More specifically, these functions use BDRV_POLL_WHILE(bs), which
+ * requires the caller to be either in the main thread and hold
+ * the BlockdriverState (bs) AioContext lock, or directly in the
+ * home thread that runs the bs AioContext. Calling them from
+ * another thread in another AioContext would cause deadlocks.
+ *
+ * Therefore, these functions are not proper I/O, because they
+ * can't run in *any* iothreads, but only in a specific one.
+ *
+ * These functions can call any function from I/O, Common and this
+ * categories, but must be invoked only by other "I/O or GS" and GS APIs.
+ *
+ * All functions in this category must use the macro
+ * IO_OR_GS_CODE();
+ * to catch when they are accidentally called by the wrong API.
+ */
+
+#define BDRV_POLL_WHILE(bs, cond) ({ \
+ BlockDriverState *bs_ = (bs); \
+ IO_OR_GS_CODE(); \
+ AIO_WAIT_WHILE(bdrv_get_aio_context(bs_), \
+ cond); })
+
+void bdrv_drain(BlockDriverState *bs);
+void coroutine_fn bdrv_co_drain(BlockDriverState *bs);
+
+int generated_co_wrapper
+bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
+ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
+
+int generated_co_wrapper bdrv_check(BlockDriverState *bs, BdrvCheckResult *res,
+ BdrvCheckMode fix);
+
+/* Invalidate any cached metadata used by image formats */
+int generated_co_wrapper bdrv_invalidate_cache(BlockDriverState *bs,
+ Error **errp);
+int generated_co_wrapper bdrv_flush(BlockDriverState *bs);
+int generated_co_wrapper bdrv_pdiscard(BdrvChild *child, int64_t offset,
+ int64_t bytes);
+int generated_co_wrapper
+bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
+int generated_co_wrapper
+bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
+
+/**
+ * bdrv_parent_drained_begin_single:
+ *
+ * Begin a quiesced section for the parent of @c. If @poll is true, wait for
+ * any pending activity to cease.
+ */
+void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
+
+/**
+ * bdrv_parent_drained_end_single:
+ *
+ * End a quiesced section for the parent of @c.
+ *
+ * This polls @bs's AioContext until all scheduled sub-drained_ends
+ * have settled, which may result in graph changes.
+ */
+void bdrv_parent_drained_end_single(BdrvChild *c);
+
+/**
+ * bdrv_drain_poll:
+ *
+ * Poll for pending requests in @bs, its parents (except for @ignore_parent),
+ * and if @recursive is true its children as well (used for subtree drain).
+ *
+ * If @ignore_bds_parents is true, parents that are BlockDriverStates must
+ * ignore the drain request because they will be drained separately (used for
+ * drain_all).
+ *
+ * This is part of bdrv_drained_begin.
+ */
+bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
+ BdrvChild *ignore_parent, bool ignore_bds_parents);
+
+/**
+ * bdrv_drained_begin:
+ *
+ * Begin a quiesced section for exclusive access to the BDS, by disabling
+ * external request sources including NBD server, block jobs, and device model.
+ *
+ * This function can be recursive.
+ */
+void bdrv_drained_begin(BlockDriverState *bs);
+
+/**
+ * bdrv_do_drained_begin_quiesce:
+ *
+ * Quiesces a BDS like bdrv_drained_begin(), but does not wait for already
+ * running requests to complete.
+ */
+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
+ BdrvChild *parent, bool ignore_bds_parents);
+
+/**
+ * Like bdrv_drained_begin, but recursively begins a quiesced section for
+ * exclusive access to all child nodes as well.
+ */
+void bdrv_subtree_drained_begin(BlockDriverState *bs);
+
+/**
+ * bdrv_drained_end:
+ *
+ * End a quiescent section started by bdrv_drained_begin().
+ *
+ * This polls @bs's AioContext until all scheduled sub-drained_ends
+ * have settled. On one hand, that may result in graph changes. On
+ * the other, this requires that the caller either runs in the main
+ * loop; or that all involved nodes (@bs and all of its parents) are
+ * in the caller's AioContext.
+ */
+void bdrv_drained_end(BlockDriverState *bs);
+
+/**
+ * End a quiescent section started by bdrv_subtree_drained_begin().
+ */
+void bdrv_subtree_drained_end(BlockDriverState *bs);
+
+#endif /* BLOCK_IO_H */
diff --git a/include/block/block.h b/include/block/block.h
index e1713ee306..1e6b8fef1e 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -1,864 +1,32 @@
-#ifndef BLOCK_H
-#define BLOCK_H
-
-#include "block/aio.h"
-#include "block/aio-wait.h"
-#include "qemu/iov.h"
-#include "qemu/coroutine.h"
-#include "block/accounting.h"
-#include "block/dirty-bitmap.h"
-#include "block/blockjob.h"
-#include "qemu/hbitmap.h"
-#include "qemu/transactions.h"
-
/*
- * generated_co_wrapper
- *
- * Function specifier, which does nothing but mark functions to be
- * generated by scripts/block-coroutine-wrapper.py
- *
- * Read more in docs/devel/block-coroutine-wrapper.rst
- */
-#define generated_co_wrapper
-
-/* block.c */
-typedef struct BlockDriver BlockDriver;
-typedef struct BdrvChild BdrvChild;
-typedef struct BdrvChildClass BdrvChildClass;
-
-typedef struct BlockDriverInfo {
- /* in bytes, 0 if irrelevant */
- int cluster_size;
- /* offset at which the VM state can be saved (0 if not possible) */
- int64_t vm_state_offset;
- bool is_dirty;
- /*
- * True if this block driver only supports compressed writes
- */
- bool needs_compressed_writes;
-} BlockDriverInfo;
-
-typedef struct BlockFragInfo {
- uint64_t allocated_clusters;
- uint64_t total_clusters;
- uint64_t fragmented_clusters;
- uint64_t compressed_clusters;
-} BlockFragInfo;
-
-typedef enum {
- BDRV_REQ_COPY_ON_READ = 0x1,
- BDRV_REQ_ZERO_WRITE = 0x2,
-
- /*
- * The BDRV_REQ_MAY_UNMAP flag is used in write_zeroes requests to indicate
- * that the block driver should unmap (discard) blocks if it is guaranteed
- * that the result will read back as zeroes. The flag is only passed to the
- * driver if the block device is opened with BDRV_O_UNMAP.
- */
- BDRV_REQ_MAY_UNMAP = 0x4,
-
- BDRV_REQ_FUA = 0x10,
- BDRV_REQ_WRITE_COMPRESSED = 0x20,
-
- /* Signifies that this write request will not change the visible disk
- * content. */
- BDRV_REQ_WRITE_UNCHANGED = 0x40,
-
- /* Forces request serialisation. Use only with write requests. */
- BDRV_REQ_SERIALISING = 0x80,
-
- /* Execute the request only if the operation can be offloaded or otherwise
- * be executed efficiently, but return an error instead of using a slow
- * fallback. */
- BDRV_REQ_NO_FALLBACK = 0x100,
-
- /*
- * BDRV_REQ_PREFETCH makes sense only in the context of copy-on-read
- * (i.e., together with the BDRV_REQ_COPY_ON_READ flag or when a COR
- * filter is involved), in which case it signals that the COR operation
- * need not read the data into memory (qiov) but only ensure they are
- * copied to the top layer (i.e., that COR operation is done).
- */
- BDRV_REQ_PREFETCH = 0x200,
-
- /*
- * If we need to wait for other requests, just fail immediately. Used
- * only together with BDRV_REQ_SERIALISING.
- */
- BDRV_REQ_NO_WAIT = 0x400,
-
- /* Mask of valid flags */
- BDRV_REQ_MASK = 0x7ff,
-} BdrvRequestFlags;
-
-typedef struct BlockSizes {
- uint32_t phys;
- uint32_t log;
-} BlockSizes;
-
-typedef struct HDGeometry {
- uint32_t heads;
- uint32_t sectors;
- uint32_t cylinders;
-} HDGeometry;
-
-#define BDRV_O_NO_SHARE 0x0001 /* don't share permissions */
-#define BDRV_O_RDWR 0x0002
-#define BDRV_O_RESIZE 0x0004 /* request permission for resizing the node */
-#define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */
-#define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */
-#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */
-#define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the thread pool */
-#define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */
-#define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */
-#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */
-#define BDRV_O_INACTIVE 0x0800 /* consistency hint for migration handoff */
-#define BDRV_O_CHECK 0x1000 /* open solely for consistency check */
-#define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */
-#define BDRV_O_UNMAP 0x4000 /* execute guest UNMAP/TRIM operations */
-#define BDRV_O_PROTOCOL 0x8000 /* if no block driver is explicitly given:
- select an appropriate protocol driver,
- ignoring the format layer */
-#define BDRV_O_NO_IO 0x10000 /* don't initialize for I/O */
-#define BDRV_O_AUTO_RDONLY 0x20000 /* degrade to read-only if opening read-write fails */
-#define BDRV_O_IO_URING 0x40000 /* use io_uring instead of the thread pool */
-
-#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH)
-
-
-/* Option names of options parsed by the block layer */
-
-#define BDRV_OPT_CACHE_WB "cache.writeback"
-#define BDRV_OPT_CACHE_DIRECT "cache.direct"
-#define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush"
-#define BDRV_OPT_READ_ONLY "read-only"
-#define BDRV_OPT_AUTO_READ_ONLY "auto-read-only"
-#define BDRV_OPT_DISCARD "discard"
-#define BDRV_OPT_FORCE_SHARE "force-share"
-
-
-#define BDRV_SECTOR_BITS 9
-#define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS)
-
-#define BDRV_REQUEST_MAX_SECTORS MIN_CONST(SIZE_MAX >> BDRV_SECTOR_BITS, \
- INT_MAX >> BDRV_SECTOR_BITS)
-#define BDRV_REQUEST_MAX_BYTES (BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS)
-
-/*
- * We want allow aligning requests and disk length up to any 32bit alignment
- * and don't afraid of overflow.
- * To achieve it, and in the same time use some pretty number as maximum disk
- * size, let's define maximum "length" (a limit for any offset/bytes request and
- * for disk size) to be the greatest power of 2 less than INT64_MAX.
- */
-#define BDRV_MAX_ALIGNMENT (1L << 30)
-#define BDRV_MAX_LENGTH (QEMU_ALIGN_DOWN(INT64_MAX, BDRV_MAX_ALIGNMENT))
-
-/*
- * Allocation status flags for bdrv_block_status() and friends.
- *
- * Public flags:
- * BDRV_BLOCK_DATA: allocation for data at offset is tied to this layer
- * BDRV_BLOCK_ZERO: offset reads as zero
- * BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data
- * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this
- * layer rather than any backing, set by block layer
- * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this
- * layer, set by block layer
- *
- * Internal flags:
- * BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request
- * that the block layer recompute the answer from the returned
- * BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID.
- * BDRV_BLOCK_RECURSE: request that the block layer will recursively search for
- * zeroes in file child of current block node inside
- * returned region. Only valid together with both
- * BDRV_BLOCK_DATA and BDRV_BLOCK_OFFSET_VALID. Should not
- * appear with BDRV_BLOCK_ZERO.
- *
- * If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the
- * host offset within the returned BDS that is allocated for the
- * corresponding raw guest data. However, whether that offset
- * actually contains data also depends on BDRV_BLOCK_DATA, as follows:
- *
- * DATA ZERO OFFSET_VALID
- * t t t sectors read as zero, returned file is zero at offset
- * t f t sectors read as valid from file at offset
- * f t t sectors preallocated, read as zero, returned file not
- * necessarily zero at offset
- * f f t sectors preallocated but read from backing_hd,
- * returned file contains garbage at offset
- * t t f sectors preallocated, read as zero, unknown offset
- * t f f sectors read from unknown file or offset
- * f t f not allocated or unknown offset, read as zero
- * f f f not allocated or unknown offset, read from backing_hd
- */
-#define BDRV_BLOCK_DATA 0x01
-#define BDRV_BLOCK_ZERO 0x02
-#define BDRV_BLOCK_OFFSET_VALID 0x04
-#define BDRV_BLOCK_RAW 0x08
-#define BDRV_BLOCK_ALLOCATED 0x10
-#define BDRV_BLOCK_EOF 0x20
-#define BDRV_BLOCK_RECURSE 0x40
-
-typedef QTAILQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
-
-typedef struct BDRVReopenState {
- BlockDriverState *bs;
- int flags;
- BlockdevDetectZeroesOptions detect_zeroes;
- bool backing_missing;
- BlockDriverState *old_backing_bs; /* keep pointer for permissions update */
- BlockDriverState *old_file_bs; /* keep pointer for permissions update */
- QDict *options;
- QDict *explicit_options;
- void *opaque;
-} BDRVReopenState;
-
-/*
- * Block operation types
- */
-typedef enum BlockOpType {
- BLOCK_OP_TYPE_BACKUP_SOURCE,
- BLOCK_OP_TYPE_BACKUP_TARGET,
- BLOCK_OP_TYPE_CHANGE,
- BLOCK_OP_TYPE_COMMIT_SOURCE,
- BLOCK_OP_TYPE_COMMIT_TARGET,
- BLOCK_OP_TYPE_DATAPLANE,
- BLOCK_OP_TYPE_DRIVE_DEL,
- BLOCK_OP_TYPE_EJECT,
- BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT,
- BLOCK_OP_TYPE_INTERNAL_SNAPSHOT,
- BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE,
- BLOCK_OP_TYPE_MIRROR_SOURCE,
- BLOCK_OP_TYPE_MIRROR_TARGET,
- BLOCK_OP_TYPE_RESIZE,
- BLOCK_OP_TYPE_STREAM,
- BLOCK_OP_TYPE_REPLACE,
- BLOCK_OP_TYPE_MAX,
-} BlockOpType;
-
-/* Block node permission constants */
-enum {
- /**
- * A user that has the "permission" of consistent reads is guaranteed that
- * their view of the contents of the block device is complete and
- * self-consistent, representing the contents of a disk at a specific
- * point.
- *
- * For most block devices (including their backing files) this is true, but
- * the property cannot be maintained in a few situations like for
- * intermediate nodes of a commit block job.
- */
- BLK_PERM_CONSISTENT_READ = 0x01,
-
- /** This permission is required to change the visible disk contents. */
- BLK_PERM_WRITE = 0x02,
-
- /**
- * This permission (which is weaker than BLK_PERM_WRITE) is both enough and
- * required for writes to the block node when the caller promises that
- * the visible disk content doesn't change.
- *
- * As the BLK_PERM_WRITE permission is strictly stronger, either is
- * sufficient to perform an unchanging write.
- */
- BLK_PERM_WRITE_UNCHANGED = 0x04,
-
- /** This permission is required to change the size of a block node. */
- BLK_PERM_RESIZE = 0x08,
-
- /**
- * There was a now-removed bit BLK_PERM_GRAPH_MOD, with value of 0x10. QEMU
- * 6.1 and earlier may still lock the corresponding byte in block/file-posix
- * locking. So, implementing some new permission should be very careful to
- * not interfere with this old unused thing.
- */
-
- BLK_PERM_ALL = 0x0f,
-
- DEFAULT_PERM_PASSTHROUGH = BLK_PERM_CONSISTENT_READ
- | BLK_PERM_WRITE
- | BLK_PERM_WRITE_UNCHANGED
- | BLK_PERM_RESIZE,
-
- DEFAULT_PERM_UNCHANGED = BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH,
-};
-
-/*
- * Flags that parent nodes assign to child nodes to specify what kind of
- * role(s) they take.
- *
- * At least one of DATA, METADATA, FILTERED, or COW must be set for
- * every child.
- */
-enum BdrvChildRoleBits {
- /*
- * This child stores data.
- * Any node may have an arbitrary number of such children.
- */
- BDRV_CHILD_DATA = (1 << 0),
-
- /*
- * This child stores metadata.
- * Any node may have an arbitrary number of metadata-storing
- * children.
- */
- BDRV_CHILD_METADATA = (1 << 1),
-
- /*
- * A child that always presents exactly the same visible data as
- * the parent, e.g. by virtue of the parent forwarding all reads
- * and writes.
- * This flag is mutually exclusive with DATA, METADATA, and COW.
- * Any node may have at most one filtered child at a time.
- */
- BDRV_CHILD_FILTERED = (1 << 2),
-
- /*
- * Child from which to read all data that isn't allocated in the
- * parent (i.e., the backing child); such data is copied to the
- * parent through COW (and optionally COR).
- * This field is mutually exclusive with DATA, METADATA, and
- * FILTERED.
- * Any node may have at most one such backing child at a time.
- */
- BDRV_CHILD_COW = (1 << 3),
-
- /*
- * The primary child. For most drivers, this is the child whose
- * filename applies best to the parent node.
- * Any node may have at most one primary child at a time.
- */
- BDRV_CHILD_PRIMARY = (1 << 4),
-
- /* Useful combination of flags */
- BDRV_CHILD_IMAGE = BDRV_CHILD_DATA
- | BDRV_CHILD_METADATA
- | BDRV_CHILD_PRIMARY,
-};
-
-/* Mask of BdrvChildRoleBits values */
-typedef unsigned int BdrvChildRole;
-
-char *bdrv_perm_names(uint64_t perm);
-uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm);
-
-void bdrv_init(void);
-void bdrv_init_with_whitelist(void);
-bool bdrv_uses_whitelist(void);
-int bdrv_is_whitelisted(BlockDriver *drv, bool read_only);
-BlockDriver *bdrv_find_protocol(const char *filename,
- bool allow_protocol_prefix,
- Error **errp);
-BlockDriver *bdrv_find_format(const char *format_name);
-int bdrv_create(BlockDriver *drv, const char* filename,
- QemuOpts *opts, Error **errp);
-int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp);
-
-BlockDriverState *bdrv_new(void);
-int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
- Error **errp);
-int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
- Error **errp);
-int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
- Error **errp);
-BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options,
- int flags, Error **errp);
-int bdrv_drop_filter(BlockDriverState *bs, Error **errp);
-
-int bdrv_parse_aio(const char *mode, int *flags);
-int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough);
-int bdrv_parse_discard_flags(const char *mode, int *flags);
-BdrvChild *bdrv_open_child(const char *filename,
- QDict *options, const char *bdref_key,
- BlockDriverState* parent,
- const BdrvChildClass *child_class,
- BdrvChildRole child_role,
- bool allow_none, Error **errp);
-BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp);
-int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
- Error **errp);
-int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
- const char *bdref_key, Error **errp);
-BlockDriverState *bdrv_open(const char *filename, const char *reference,
- QDict *options, int flags, Error **errp);
-BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv,
- const char *node_name,
- QDict *options, int flags,
- Error **errp);
-BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
- int flags, Error **errp);
-BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
- BlockDriverState *bs, QDict *options,
- bool keep_old_opts);
-void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue);
-int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
-int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
- Error **errp);
-int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
- Error **errp);
-int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
- int64_t bytes, BdrvRequestFlags flags);
-int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags);
-int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int64_t bytes);
-int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf,
- int64_t bytes);
-int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
- const void *buf, int64_t bytes);
-/*
- * Efficiently zero a region of the disk image. Note that this is a regular
- * I/O request like read or write and should have a reasonable size. This
- * function is not suitable for zeroing the entire image in a single request
- * because it may allocate memory for the entire region.
- */
-int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
- int64_t bytes, BdrvRequestFlags flags);
-BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
- const char *backing_file);
-void bdrv_refresh_filename(BlockDriverState *bs);
-
-int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
- PreallocMode prealloc, BdrvRequestFlags flags,
- Error **errp);
-int generated_co_wrapper
-bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
- PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
-
-int64_t bdrv_nb_sectors(BlockDriverState *bs);
-int64_t bdrv_getlength(BlockDriverState *bs);
-int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
-BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
- BlockDriverState *in_bs, Error **errp);
-void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
-void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp);
-int bdrv_commit(BlockDriverState *bs);
-int bdrv_make_empty(BdrvChild *c, Error **errp);
-int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file,
- const char *backing_fmt, bool warn);
-void bdrv_register(BlockDriver *bdrv);
-int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
- const char *backing_file_str);
-BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
- BlockDriverState *bs);
-BlockDriverState *bdrv_find_base(BlockDriverState *bs);
-bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base,
- Error **errp);
-int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base,
- Error **errp);
-void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base);
-int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp);
-void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs);
-
-
-typedef struct BdrvCheckResult {
- int corruptions;
- int leaks;
- int check_errors;
- int corruptions_fixed;
- int leaks_fixed;
- int64_t image_end_offset;
- BlockFragInfo bfi;
-} BdrvCheckResult;
-
-typedef enum {
- BDRV_FIX_LEAKS = 1,
- BDRV_FIX_ERRORS = 2,
-} BdrvCheckMode;
-
-int generated_co_wrapper bdrv_check(BlockDriverState *bs, BdrvCheckResult *res,
- BdrvCheckMode fix);
-
-/* The units of offset and total_work_size may be chosen arbitrarily by the
- * block driver; total_work_size may change during the course of the amendment
- * operation */
-typedef void BlockDriverAmendStatusCB(BlockDriverState *bs, int64_t offset,
- int64_t total_work_size, void *opaque);
-int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts,
- BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
- bool force,
- Error **errp);
-
-/* check if a named node can be replaced when doing drive-mirror */
-BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
- const char *node_name, Error **errp);
-
-/* async block I/O */
-void bdrv_aio_cancel(BlockAIOCB *acb);
-void bdrv_aio_cancel_async(BlockAIOCB *acb);
-
-/* sg packet commands */
-int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf);
-
-/* Invalidate any cached metadata used by image formats */
-int generated_co_wrapper bdrv_invalidate_cache(BlockDriverState *bs,
- Error **errp);
-void bdrv_invalidate_cache_all(Error **errp);
-int bdrv_inactivate_all(void);
-
-/* Ensure contents are flushed to disk. */
-int generated_co_wrapper bdrv_flush(BlockDriverState *bs);
-int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
-int bdrv_flush_all(void);
-void bdrv_close_all(void);
-void bdrv_drain(BlockDriverState *bs);
-void coroutine_fn bdrv_co_drain(BlockDriverState *bs);
-void bdrv_drain_all_begin(void);
-void bdrv_drain_all_end(void);
-void bdrv_drain_all(void);
-
-#define BDRV_POLL_WHILE(bs, cond) ({ \
- BlockDriverState *bs_ = (bs); \
- AIO_WAIT_WHILE(bdrv_get_aio_context(bs_), \
- cond); })
-
-int generated_co_wrapper bdrv_pdiscard(BdrvChild *child, int64_t offset,
- int64_t bytes);
-int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
-int bdrv_has_zero_init_1(BlockDriverState *bs);
-int bdrv_has_zero_init(BlockDriverState *bs);
-bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
-int bdrv_block_status(BlockDriverState *bs, int64_t offset,
- int64_t bytes, int64_t *pnum, int64_t *map,
- BlockDriverState **file);
-int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
- int64_t offset, int64_t bytes, int64_t *pnum,
- int64_t *map, BlockDriverState **file);
-int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
- int64_t *pnum);
-int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
- bool include_base, int64_t offset, int64_t bytes,
- int64_t *pnum);
-int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset,
- int64_t bytes);
-
-bool bdrv_is_read_only(BlockDriverState *bs);
-int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
- bool ignore_allow_rdw, Error **errp);
-int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
- Error **errp);
-bool bdrv_is_writable(BlockDriverState *bs);
-bool bdrv_is_sg(BlockDriverState *bs);
-bool bdrv_is_inserted(BlockDriverState *bs);
-void bdrv_lock_medium(BlockDriverState *bs, bool locked);
-void bdrv_eject(BlockDriverState *bs, bool eject_flag);
-const char *bdrv_get_format_name(BlockDriverState *bs);
-BlockDriverState *bdrv_find_node(const char *node_name);
-BlockDeviceInfoList *bdrv_named_nodes_list(bool flat, Error **errp);
-XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp);
-BlockDriverState *bdrv_lookup_bs(const char *device,
- const char *node_name,
- Error **errp);
-bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base);
-BlockDriverState *bdrv_next_node(BlockDriverState *bs);
-BlockDriverState *bdrv_next_all_states(BlockDriverState *bs);
-
-typedef struct BdrvNextIterator {
- enum {
- BDRV_NEXT_BACKEND_ROOTS,
- BDRV_NEXT_MONITOR_OWNED,
- } phase;
- BlockBackend *blk;
- BlockDriverState *bs;
-} BdrvNextIterator;
-
-BlockDriverState *bdrv_first(BdrvNextIterator *it);
-BlockDriverState *bdrv_next(BdrvNextIterator *it);
-void bdrv_next_cleanup(BdrvNextIterator *it);
-
-BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs);
-bool bdrv_supports_compressed_writes(BlockDriverState *bs);
-void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
- void *opaque, bool read_only);
-const char *bdrv_get_node_name(const BlockDriverState *bs);
-const char *bdrv_get_device_name(const BlockDriverState *bs);
-const char *bdrv_get_device_or_node_name(const BlockDriverState *bs);
-int bdrv_get_flags(BlockDriverState *bs);
-int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
-ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs,
- Error **errp);
-BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs);
-void bdrv_round_to_clusters(BlockDriverState *bs,
- int64_t offset, int64_t bytes,
- int64_t *cluster_offset,
- int64_t *cluster_bytes);
-
-void bdrv_get_backing_filename(BlockDriverState *bs,
- char *filename, int filename_size);
-char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp);
-char *bdrv_get_full_backing_filename_from_filename(const char *backed,
- const char *backing,
- Error **errp);
-char *bdrv_dirname(BlockDriverState *bs, Error **errp);
-
-int path_has_protocol(const char *path);
-int path_is_absolute(const char *path);
-char *path_combine(const char *base_path, const char *filename);
-
-int generated_co_wrapper
-bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
-int generated_co_wrapper
-bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
-int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
- int64_t pos, int size);
-
-int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
- int64_t pos, int size);
-
-void bdrv_img_create(const char *filename, const char *fmt,
- const char *base_filename, const char *base_fmt,
- char *options, uint64_t img_size, int flags,
- bool quiet, Error **errp);
-
-/* Returns the alignment in bytes that is required so that no bounce buffer
- * is required throughout the stack */
-size_t bdrv_min_mem_align(BlockDriverState *bs);
-/* Returns optimal alignment in bytes for bounce buffer */
-size_t bdrv_opt_mem_align(BlockDriverState *bs);
-void *qemu_blockalign(BlockDriverState *bs, size_t size);
-void *qemu_blockalign0(BlockDriverState *bs, size_t size);
-void *qemu_try_blockalign(BlockDriverState *bs, size_t size);
-void *qemu_try_blockalign0(BlockDriverState *bs, size_t size);
-bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
-
-void bdrv_enable_copy_on_read(BlockDriverState *bs);
-void bdrv_disable_copy_on_read(BlockDriverState *bs);
-
-void bdrv_ref(BlockDriverState *bs);
-void bdrv_unref(BlockDriverState *bs);
-void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
-BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
- BlockDriverState *child_bs,
- const char *child_name,
- const BdrvChildClass *child_class,
- BdrvChildRole child_role,
- Error **errp);
-
-bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp);
-void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason);
-void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason);
-void bdrv_op_block_all(BlockDriverState *bs, Error *reason);
-void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason);
-bool bdrv_op_blocker_is_empty(BlockDriverState *bs);
-
-#define BLKDBG_EVENT(child, evt) \
- do { \
- if (child) { \
- bdrv_debug_event(child->bs, evt); \
- } \
- } while (0)
-
-void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event);
-
-int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
- const char *tag);
-int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag);
-int bdrv_debug_resume(BlockDriverState *bs, const char *tag);
-bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
-
-/**
- * bdrv_get_aio_context:
+ * QEMU System Emulator block driver
*
- * Returns: the currently bound #AioContext
- */
-AioContext *bdrv_get_aio_context(BlockDriverState *bs);
-
-/**
- * Move the current coroutine to the AioContext of @bs and return the old
- * AioContext of the coroutine. Increase bs->in_flight so that draining @bs
- * will wait for the operation to proceed until the corresponding
- * bdrv_co_leave().
+ * Copyright (c) 2003 Fabrice Bellard
*
- * Consequently, you can't call drain inside a bdrv_co_enter/leave() section as
- * this will deadlock.
- */
-AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs);
-
-/**
- * Ends a section started by bdrv_co_enter(). Move the current coroutine back
- * to old_ctx and decrease bs->in_flight again.
- */
-void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx);
-
-/**
- * Locks the AioContext of @bs if it's not the current AioContext. This avoids
- * double locking which could lead to deadlocks: This is a coroutine_fn, so we
- * know we already own the lock of the current AioContext.
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
*
- * May only be called in the main thread.
- */
-void coroutine_fn bdrv_co_lock(BlockDriverState *bs);
-
-/**
- * Unlocks the AioContext of @bs if it's not the current AioContext.
- */
-void coroutine_fn bdrv_co_unlock(BlockDriverState *bs);
-
-/**
- * Transfer control to @co in the aio context of @bs
- */
-void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co);
-
-void bdrv_set_aio_context_ignore(BlockDriverState *bs,
- AioContext *new_context, GSList **ignore);
-int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
- Error **errp);
-int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
- BdrvChild *ignore_child, Error **errp);
-bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx,
- GSList **ignore, Error **errp);
-bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx,
- GSList **ignore, Error **errp);
-AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c);
-AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c);
-
-int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz);
-int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo);
-
-void bdrv_io_plug(BlockDriverState *bs);
-void bdrv_io_unplug(BlockDriverState *bs);
-
-/**
- * bdrv_parent_drained_begin_single:
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
*
- * Begin a quiesced section for the parent of @c. If @poll is true, wait for
- * any pending activity to cease.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
*/
-void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
-
-/**
- * bdrv_parent_drained_end_single:
- *
- * End a quiesced section for the parent of @c.
- *
- * This polls @bs's AioContext until all scheduled sub-drained_ends
- * have settled, which may result in graph changes.
- */
-void bdrv_parent_drained_end_single(BdrvChild *c);
-
-/**
- * bdrv_drain_poll:
- *
- * Poll for pending requests in @bs, its parents (except for @ignore_parent),
- * and if @recursive is true its children as well (used for subtree drain).
- *
- * If @ignore_bds_parents is true, parents that are BlockDriverStates must
- * ignore the drain request because they will be drained separately (used for
- * drain_all).
- *
- * This is part of bdrv_drained_begin.
- */
-bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
- BdrvChild *ignore_parent, bool ignore_bds_parents);
-
-/**
- * bdrv_drained_begin:
- *
- * Begin a quiesced section for exclusive access to the BDS, by disabling
- * external request sources including NBD server, block jobs, and device model.
- *
- * This function can be recursive.
- */
-void bdrv_drained_begin(BlockDriverState *bs);
-
-/**
- * bdrv_do_drained_begin_quiesce:
- *
- * Quiesces a BDS like bdrv_drained_begin(), but does not wait for already
- * running requests to complete.
- */
-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
- BdrvChild *parent, bool ignore_bds_parents);
-
-/**
- * Like bdrv_drained_begin, but recursively begins a quiesced section for
- * exclusive access to all child nodes as well.
- */
-void bdrv_subtree_drained_begin(BlockDriverState *bs);
-
-/**
- * bdrv_drained_end:
- *
- * End a quiescent section started by bdrv_drained_begin().
- *
- * This polls @bs's AioContext until all scheduled sub-drained_ends
- * have settled. On one hand, that may result in graph changes. On
- * the other, this requires that the caller either runs in the main
- * loop; or that all involved nodes (@bs and all of its parents) are
- * in the caller's AioContext.
- */
-void bdrv_drained_end(BlockDriverState *bs);
-
-/**
- * bdrv_drained_end_no_poll:
- *
- * Same as bdrv_drained_end(), but do not poll for the subgraph to
- * actually become unquiesced. Therefore, no graph changes will occur
- * with this function.
- *
- * *drained_end_counter is incremented for every background operation
- * that is scheduled, and will be decremented for every operation once
- * it settles. The caller must poll until it reaches 0. The counter
- * should be accessed using atomic operations only.
- */
-void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter);
-
-/**
- * End a quiescent section started by bdrv_subtree_drained_begin().
- */
-void bdrv_subtree_drained_end(BlockDriverState *bs);
-
-void bdrv_add_child(BlockDriverState *parent, BlockDriverState *child,
- Error **errp);
-void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp);
-
-bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
- uint32_t granularity, Error **errp);
-/**
- *
- * bdrv_register_buf/bdrv_unregister_buf:
- *
- * Register/unregister a buffer for I/O. For example, VFIO drivers are
- * interested to know the memory areas that would later be used for I/O, so
- * that they can prepare IOMMU mapping etc., to get better performance.
- */
-void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size);
-void bdrv_unregister_buf(BlockDriverState *bs, void *host);
+#ifndef BLOCK_H
+#define BLOCK_H
-/**
- *
- * bdrv_co_copy_range:
- *
- * Do offloaded copy between two children. If the operation is not implemented
- * by the driver, or if the backend storage doesn't support it, a negative
- * error code will be returned.
- *
- * Note: block layer doesn't emulate or fallback to a bounce buffer approach
- * because usually the caller shouldn't attempt offloaded copy any more (e.g.
- * calling copy_file_range(2)) after the first error, thus it should fall back
- * to a read+write path in the caller level.
- *
- * @src: Source child to copy data from
- * @src_offset: offset in @src image to read data
- * @dst: Destination child to copy data to
- * @dst_offset: offset in @dst image to write data
- * @bytes: number of bytes to copy
- * @flags: request flags. Supported flags:
- * BDRV_REQ_ZERO_WRITE - treat the @src range as zero data and do zero
- * write on @dst as if bdrv_co_pwrite_zeroes is
- * called. Used to simplify caller code, or
- * during BlockDriver.bdrv_co_copy_range_from()
- * recursion.
- * BDRV_REQ_NO_SERIALISING - do not serialize with other overlapping
- * requests currently in flight.
- *
- * Returns: 0 if succeeded; negative error code if failed.
- **/
-int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
- BdrvChild *dst, int64_t dst_offset,
- int64_t bytes, BdrvRequestFlags read_flags,
- BdrvRequestFlags write_flags);
+#include "block-global-state.h"
+#include "block-io.h"
-void bdrv_cancel_in_flight(BlockDriverState *bs);
+/* DO NOT ADD ANYTHING IN HERE. USE ONE OF THE HEADERS INCLUDED ABOVE */
-#endif
+#endif /* BLOCK_H */
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
new file mode 100644
index 0000000000..5a04c778e4
--- /dev/null
+++ b/include/block/block_int-common.h
@@ -0,0 +1,1222 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_INT_COMMON_H
+#define BLOCK_INT_COMMON_H
+
+#include "block/accounting.h"
+#include "block/block.h"
+#include "block/aio-wait.h"
+#include "qemu/queue.h"
+#include "qemu/coroutine.h"
+#include "qemu/stats64.h"
+#include "qemu/timer.h"
+#include "qemu/hbitmap.h"
+#include "block/snapshot.h"
+#include "qemu/throttle.h"
+#include "qemu/rcu.h"
+
+#define BLOCK_FLAG_LAZY_REFCOUNTS 8
+
+#define BLOCK_OPT_SIZE "size"
+#define BLOCK_OPT_ENCRYPT "encryption"
+#define BLOCK_OPT_ENCRYPT_FORMAT "encrypt.format"
+#define BLOCK_OPT_COMPAT6 "compat6"
+#define BLOCK_OPT_HWVERSION "hwversion"
+#define BLOCK_OPT_BACKING_FILE "backing_file"
+#define BLOCK_OPT_BACKING_FMT "backing_fmt"
+#define BLOCK_OPT_CLUSTER_SIZE "cluster_size"
+#define BLOCK_OPT_TABLE_SIZE "table_size"
+#define BLOCK_OPT_PREALLOC "preallocation"
+#define BLOCK_OPT_SUBFMT "subformat"
+#define BLOCK_OPT_COMPAT_LEVEL "compat"
+#define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts"
+#define BLOCK_OPT_ADAPTER_TYPE "adapter_type"
+#define BLOCK_OPT_REDUNDANCY "redundancy"
+#define BLOCK_OPT_NOCOW "nocow"
+#define BLOCK_OPT_EXTENT_SIZE_HINT "extent_size_hint"
+#define BLOCK_OPT_OBJECT_SIZE "object_size"
+#define BLOCK_OPT_REFCOUNT_BITS "refcount_bits"
+#define BLOCK_OPT_DATA_FILE "data_file"
+#define BLOCK_OPT_DATA_FILE_RAW "data_file_raw"
+#define BLOCK_OPT_COMPRESSION_TYPE "compression_type"
+#define BLOCK_OPT_EXTL2 "extended_l2"
+
+#define BLOCK_PROBE_BUF_SIZE 512
+
+enum BdrvTrackedRequestType {
+ BDRV_TRACKED_READ,
+ BDRV_TRACKED_WRITE,
+ BDRV_TRACKED_DISCARD,
+ BDRV_TRACKED_TRUNCATE,
+};
+
+/*
+ * That is not quite good that BdrvTrackedRequest structure is public,
+ * as block/io.c is very careful about incoming offset/bytes being
+ * correct. Be sure to assert bdrv_check_request() succeeded after any
+ * modification of BdrvTrackedRequest object out of block/io.c
+ */
+typedef struct BdrvTrackedRequest {
+ BlockDriverState *bs;
+ int64_t offset;
+ int64_t bytes;
+ enum BdrvTrackedRequestType type;
+
+ bool serialising;
+ int64_t overlap_offset;
+ int64_t overlap_bytes;
+
+ QLIST_ENTRY(BdrvTrackedRequest) list;
+ Coroutine *co; /* owner, used for deadlock detection */
+ CoQueue wait_queue; /* coroutines blocked on this request */
+
+ struct BdrvTrackedRequest *waiting_for;
+} BdrvTrackedRequest;
+
+
+struct BlockDriver {
+ /*
+ * These fields are initialized when this object is created,
+ * and are never changed afterwards.
+ */
+
+ const char *format_name;
+ int instance_size;
+
+ /*
+ * Set to true if the BlockDriver is a block filter. Block filters pass
+ * certain callbacks that refer to data (see block.c) to their bs->file
+ * or bs->backing (whichever one exists) if the driver doesn't implement
+ * them. Drivers that do not wish to forward must implement them and return
+ * -ENOTSUP.
+ * Note that filters are not allowed to modify data.
+ *
+ * Filters generally cannot have more than a single filtered child,
+ * because the data they present must at all times be the same as
+ * that on their filtered child. That would be impossible to
+ * achieve for multiple filtered children.
+ * (And this filtered child must then be bs->file or bs->backing.)
+ */
+ bool is_filter;
+ /*
+ * Set to true if the BlockDriver is a format driver. Format nodes
+ * generally do not expect their children to be other format nodes
+ * (except for backing files), and so format probing is disabled
+ * on those children.
+ */
+ bool is_format;
+
+ /*
+ * Drivers not implementing bdrv_parse_filename nor bdrv_open should have
+ * this field set to true, except ones that are defined only by their
+ * child's bs.
+ * An example of the last type will be the quorum block driver.
+ */
+ bool bdrv_needs_filename;
+
+ /*
+ * Set if a driver can support backing files. This also implies the
+ * following semantics:
+ *
+ * - Return status 0 of .bdrv_co_block_status means that corresponding
+ * blocks are not allocated in this layer of backing-chain
+ * - For such (unallocated) blocks, read will:
+ * - fill buffer with zeros if there is no backing file
+ * - read from the backing file otherwise, where the block layer
+ * takes care of reading zeros beyond EOF if backing file is short
+ */
+ bool supports_backing;
+
+ bool has_variable_length;
+
+ /*
+ * Drivers setting this field must be able to work with just a plain
+ * filename with '<protocol_name>:' as a prefix, and no other options.
+ * Options may be extracted from the filename by implementing
+ * bdrv_parse_filename.
+ */
+ const char *protocol_name;
+
+ /* List of options for creating images, terminated by name == NULL */
+ QemuOptsList *create_opts;
+
+ /* List of options for image amend */
+ QemuOptsList *amend_opts;
+
+ /*
+ * If this driver supports reopening images this contains a
+ * NULL-terminated list of the runtime options that can be
+ * modified. If an option in this list is unspecified during
+ * reopen then it _must_ be reset to its default value or return
+ * an error.
+ */
+ const char *const *mutable_opts;
+
+ /*
+ * Pointer to a NULL-terminated array of names of strong options
+ * that can be specified for bdrv_open(). A strong option is one
+ * that changes the data of a BDS.
+ * If this pointer is NULL, the array is considered empty.
+ * "filename" and "driver" are always considered strong.
+ */
+ const char *const *strong_runtime_opts;
+
+
+ /*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
+ /*
+ * This function is invoked under BQL before .bdrv_co_amend()
+ * (which in contrast does not necessarily run under the BQL)
+ * to allow driver-specific initialization code that requires
+ * the BQL, like setting up specific permission flags.
+ */
+ int (*bdrv_amend_pre_run)(BlockDriverState *bs, Error **errp);
+ /*
+ * This function is invoked under BQL after .bdrv_co_amend()
+ * to allow cleaning up what was done in .bdrv_amend_pre_run().
+ */
+ void (*bdrv_amend_clean)(BlockDriverState *bs);
+
+ /*
+ * Return true if @to_replace can be replaced by a BDS with the
+ * same data as @bs without it affecting @bs's behavior (that is,
+ * without it being visible to @bs's parents).
+ */
+ bool (*bdrv_recurse_can_replace)(BlockDriverState *bs,
+ BlockDriverState *to_replace);
+
+ int (*bdrv_probe_device)(const char *filename);
+
+ /*
+ * Any driver implementing this callback is expected to be able to handle
+ * NULL file names in its .bdrv_open() implementation.
+ */
+ void (*bdrv_parse_filename)(const char *filename, QDict *options,
+ Error **errp);
+
+ /* For handling image reopen for split or non-split files. */
+ int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state,
+ BlockReopenQueue *queue, Error **errp);
+ void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state);
+ void (*bdrv_reopen_commit_post)(BDRVReopenState *reopen_state);
+ void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state);
+ void (*bdrv_join_options)(QDict *options, QDict *old_options);
+
+ int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp);
+
+ /* Protocol drivers should implement this instead of bdrv_open */
+ int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp);
+ void (*bdrv_close)(BlockDriverState *bs);
+
+ int coroutine_fn (*bdrv_co_create)(BlockdevCreateOptions *opts,
+ Error **errp);
+ int coroutine_fn (*bdrv_co_create_opts)(BlockDriver *drv,
+ const char *filename,
+ QemuOpts *opts,
+ Error **errp);
+
+ int (*bdrv_amend_options)(BlockDriverState *bs,
+ QemuOpts *opts,
+ BlockDriverAmendStatusCB *status_cb,
+ void *cb_opaque,
+ bool force,
+ Error **errp);
+
+ int (*bdrv_make_empty)(BlockDriverState *bs);
+
+ /*
+ * Refreshes the bs->exact_filename field. If that is impossible,
+ * bs->exact_filename has to be left empty.
+ */
+ void (*bdrv_refresh_filename)(BlockDriverState *bs);
+
+ /*
+ * Gathers the open options for all children into @target.
+ * A simple format driver (without backing file support) might
+ * implement this function like this:
+ *
+ * QINCREF(bs->file->bs->full_open_options);
+ * qdict_put(target, "file", bs->file->bs->full_open_options);
+ *
+ * If not specified, the generic implementation will simply put
+ * all children's options under their respective name.
+ *
+ * @backing_overridden is true when bs->backing seems not to be
+ * the child that would result from opening bs->backing_file.
+ * Therefore, if it is true, the backing child's options should be
+ * gathered; otherwise, there is no need since the backing child
+ * is the one implied by the image header.
+ *
+ * Note that ideally this function would not be needed. Every
+ * block driver which implements it is probably doing something
+ * shady regarding its runtime option structure.
+ */
+ void (*bdrv_gather_child_options)(BlockDriverState *bs, QDict *target,
+ bool backing_overridden);
+
+ /*
+ * Returns an allocated string which is the directory name of this BDS: It
+ * will be used to make relative filenames absolute by prepending this
+ * function's return value to them.
+ */
+ char *(*bdrv_dirname)(BlockDriverState *bs, Error **errp);
+
+ /*
+ * This informs the driver that we are no longer interested in the result
+ * of in-flight requests, so don't waste the time if possible.
+ *
+ * One example usage is to avoid waiting for an nbd target node reconnect
+ * timeout during job-cancel with force=true.
+ */
+ void (*bdrv_cancel_in_flight)(BlockDriverState *bs);
+
+ int (*bdrv_inactivate)(BlockDriverState *bs);
+
+ int (*bdrv_snapshot_create)(BlockDriverState *bs,
+ QEMUSnapshotInfo *sn_info);
+ int (*bdrv_snapshot_goto)(BlockDriverState *bs,
+ const char *snapshot_id);
+ int (*bdrv_snapshot_delete)(BlockDriverState *bs,
+ const char *snapshot_id,
+ const char *name,
+ Error **errp);
+ int (*bdrv_snapshot_list)(BlockDriverState *bs,
+ QEMUSnapshotInfo **psn_info);
+ int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs,
+ const char *snapshot_id,
+ const char *name,
+ Error **errp);
+
+ int (*bdrv_change_backing_file)(BlockDriverState *bs,
+ const char *backing_file, const char *backing_fmt);
+
+ /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */
+ int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event,
+ const char *tag);
+ int (*bdrv_debug_remove_breakpoint)(BlockDriverState *bs,
+ const char *tag);
+ int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag);
+ bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag);
+
+ void (*bdrv_refresh_limits)(BlockDriverState *bs, Error **errp);
+
+ /*
+ * Returns 1 if newly created images are guaranteed to contain only
+ * zeros, 0 otherwise.
+ */
+ int (*bdrv_has_zero_init)(BlockDriverState *bs);
+
+ /*
+ * Remove fd handlers, timers, and other event loop callbacks so the event
+ * loop is no longer in use. Called with no in-flight requests and in
+ * depth-first traversal order with parents before child nodes.
+ */
+ void (*bdrv_detach_aio_context)(BlockDriverState *bs);
+
+ /*
+ * Add fd handlers, timers, and other event loop callbacks so I/O requests
+ * can be processed again. Called with no in-flight requests and in
+ * depth-first traversal order with child nodes before parent nodes.
+ */
+ void (*bdrv_attach_aio_context)(BlockDriverState *bs,
+ AioContext *new_context);
+
+ /**
+ * Try to get @bs's logical and physical block size.
+ * On success, store them in @bsz and return zero.
+ * On failure, return negative errno.
+ */
+ int (*bdrv_probe_blocksizes)(BlockDriverState *bs, BlockSizes *bsz);
+ /**
+ * Try to get @bs's geometry (cyls, heads, sectors)
+ * On success, store them in @geo and return 0.
+ * On failure return -errno.
+ * Only drivers that want to override guest geometry implement this
+ * callback; see hd_geometry_guess().
+ */
+ int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo);
+
+ void (*bdrv_add_child)(BlockDriverState *parent, BlockDriverState *child,
+ Error **errp);
+ void (*bdrv_del_child)(BlockDriverState *parent, BdrvChild *child,
+ Error **errp);
+
+ /**
+ * Informs the block driver that a permission change is intended. The
+ * driver checks whether the change is permissible and may take other
+ * preparations for the change (e.g. get file system locks). This operation
+ * is always followed either by a call to either .bdrv_set_perm or
+ * .bdrv_abort_perm_update.
+ *
+ * Checks whether the requested set of cumulative permissions in @perm
+ * can be granted for accessing @bs and whether no other users are using
+ * permissions other than those given in @shared (both arguments take
+ * BLK_PERM_* bitmasks).
+ *
+ * If both conditions are met, 0 is returned. Otherwise, -errno is returned
+ * and errp is set to an error describing the conflict.
+ */
+ int (*bdrv_check_perm)(BlockDriverState *bs, uint64_t perm,
+ uint64_t shared, Error **errp);
+
+ /**
+ * Called to inform the driver that the set of cumulative set of used
+ * permissions for @bs has changed to @perm, and the set of sharable
+ * permission to @shared. The driver can use this to propagate changes to
+ * its children (i.e. request permissions only if a parent actually needs
+ * them).
+ *
+ * This function is only invoked after bdrv_check_perm(), so block drivers
+ * may rely on preparations made in their .bdrv_check_perm implementation.
+ */
+ void (*bdrv_set_perm)(BlockDriverState *bs, uint64_t perm, uint64_t shared);
+
+ /*
+ * Called to inform the driver that after a previous bdrv_check_perm()
+ * call, the permission update is not performed and any preparations made
+ * for it (e.g. taken file locks) need to be undone.
+ *
+ * This function can be called even for nodes that never saw a
+ * bdrv_check_perm() call. It is a no-op then.
+ */
+ void (*bdrv_abort_perm_update)(BlockDriverState *bs);
+
+ /**
+ * Returns in @nperm and @nshared the permissions that the driver for @bs
+ * needs on its child @c, based on the cumulative permissions requested by
+ * the parents in @parent_perm and @parent_shared.
+ *
+ * If @c is NULL, return the permissions for attaching a new child for the
+ * given @child_class and @role.
+ *
+ * If @reopen_queue is non-NULL, don't return the currently needed
+ * permissions, but those that will be needed after applying the
+ * @reopen_queue.
+ */
+ void (*bdrv_child_perm)(BlockDriverState *bs, BdrvChild *c,
+ BdrvChildRole role,
+ BlockReopenQueue *reopen_queue,
+ uint64_t parent_perm, uint64_t parent_shared,
+ uint64_t *nperm, uint64_t *nshared);
+
+ /**
+ * Register/unregister a buffer for I/O. For example, when the driver is
+ * interested to know the memory areas that will later be used in iovs, so
+ * that it can do IOMMU mapping with VFIO etc., in order to get better
+ * performance. In the case of VFIO drivers, this callback is used to do
+ * DMA mapping for hot buffers.
+ */
+ void (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size);
+ void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host);
+
+ /*
+ * This field is modified only under the BQL, and is part of
+ * the global state.
+ */
+ QLIST_ENTRY(BlockDriver) list;
+
+ /*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
+ int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
+
+ int coroutine_fn (*bdrv_co_amend)(BlockDriverState *bs,
+ BlockdevAmendOptions *opts,
+ bool force,
+ Error **errp);
+
+ /* aio */
+ BlockAIOCB *(*bdrv_aio_preadv)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque);
+ BlockAIOCB *(*bdrv_aio_pwritev)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque);
+ BlockAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
+ BlockCompletionFunc *cb, void *opaque);
+ BlockAIOCB *(*bdrv_aio_pdiscard)(BlockDriverState *bs,
+ int64_t offset, int bytes,
+ BlockCompletionFunc *cb, void *opaque);
+
+ int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+
+ /**
+ * @offset: position in bytes to read at
+ * @bytes: number of bytes to read
+ * @qiov: the buffers to fill with read data
+ * @flags: currently unused, always 0
+ *
+ * @offset and @bytes will be a multiple of 'request_alignment',
+ * but the length of individual @qiov elements does not have to
+ * be a multiple.
+ *
+ * @bytes will always equal the total size of @qiov, and will be
+ * no larger than 'max_transfer'.
+ *
+ * The buffer in @qiov may point directly to guest memory.
+ */
+ int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
+
+ int coroutine_fn (*bdrv_co_preadv_part)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset,
+ BdrvRequestFlags flags);
+
+ int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+ int flags);
+ /**
+ * @offset: position in bytes to write at
+ * @bytes: number of bytes to write
+ * @qiov: the buffers containing data to write
+ * @flags: zero or more bits allowed by 'supported_write_flags'
+ *
+ * @offset and @bytes will be a multiple of 'request_alignment',
+ * but the length of individual @qiov elements does not have to
+ * be a multiple.
+ *
+ * @bytes will always equal the total size of @qiov, and will be
+ * no larger than 'max_transfer'.
+ *
+ * The buffer in @qiov may point directly to guest memory.
+ */
+ int coroutine_fn (*bdrv_co_pwritev)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
+ int coroutine_fn (*bdrv_co_pwritev_part)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset,
+ BdrvRequestFlags flags);
+
+ /*
+ * Efficiently zero a region of the disk image. Typically an image format
+ * would use a compact metadata representation to implement this. This
+ * function pointer may be NULL or return -ENOSUP and .bdrv_co_writev()
+ * will be called instead.
+ */
+ int coroutine_fn (*bdrv_co_pwrite_zeroes)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, BdrvRequestFlags flags);
+ int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes);
+
+ /*
+ * Map [offset, offset + nbytes) range onto a child of @bs to copy from,
+ * and invoke bdrv_co_copy_range_from(child, ...), or invoke
+ * bdrv_co_copy_range_to() if @bs is the leaf child to copy data from.
+ *
+ * See the comment of bdrv_co_copy_range for the parameter and return value
+ * semantics.
+ */
+ int coroutine_fn (*bdrv_co_copy_range_from)(BlockDriverState *bs,
+ BdrvChild *src,
+ int64_t offset,
+ BdrvChild *dst,
+ int64_t dst_offset,
+ int64_t bytes,
+ BdrvRequestFlags read_flags,
+ BdrvRequestFlags write_flags);
+
+ /*
+ * Map [offset, offset + nbytes) range onto a child of bs to copy data to,
+ * and invoke bdrv_co_copy_range_to(child, src, ...), or perform the copy
+ * operation if @bs is the leaf and @src has the same BlockDriver. Return
+ * -ENOTSUP if @bs is the leaf but @src has a different BlockDriver.
+ *
+ * See the comment of bdrv_co_copy_range for the parameter and return value
+ * semantics.
+ */
+ int coroutine_fn (*bdrv_co_copy_range_to)(BlockDriverState *bs,
+ BdrvChild *src,
+ int64_t src_offset,
+ BdrvChild *dst,
+ int64_t dst_offset,
+ int64_t bytes,
+ BdrvRequestFlags read_flags,
+ BdrvRequestFlags write_flags);
+
+ /*
+ * Building block for bdrv_block_status[_above] and
+ * bdrv_is_allocated[_above]. The driver should answer only
+ * according to the current layer, and should only need to set
+ * BDRV_BLOCK_DATA, BDRV_BLOCK_ZERO, BDRV_BLOCK_OFFSET_VALID,
+ * and/or BDRV_BLOCK_RAW; if the current layer defers to a backing
+ * layer, the result should be 0 (and not BDRV_BLOCK_ZERO). See
+ * block.h for the overall meaning of the bits. As a hint, the
+ * flag want_zero is true if the caller cares more about precise
+ * mappings (favor accurate _OFFSET_VALID/_ZERO) or false for
+ * overall allocation (favor larger *pnum, perhaps by reporting
+ * _DATA instead of _ZERO). The block layer guarantees input
+ * clamped to bdrv_getlength() and aligned to request_alignment,
+ * as well as non-NULL pnum, map, and file; in turn, the driver
+ * must return an error or set pnum to an aligned non-zero value.
+ *
+ * Note that @bytes is just a hint on how big of a region the
+ * caller wants to inspect. It is not a limit on *pnum.
+ * Implementations are free to return larger values of *pnum if
+ * doing so does not incur a performance penalty.
+ *
+ * block/io.c's bdrv_co_block_status() will utilize an unclamped
+ * *pnum value for the block-status cache on protocol nodes, prior
+ * to clamping *pnum for return to its caller.
+ */
+ int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs,
+ bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
+ int64_t *map, BlockDriverState **file);
+
+ /*
+ * Invalidate any cached meta-data.
+ */
+ void coroutine_fn (*bdrv_co_invalidate_cache)(BlockDriverState *bs,
+ Error **errp);
+
+ /*
+ * Flushes all data for all layers by calling bdrv_co_flush for underlying
+ * layers, if needed. This function is needed for deterministic
+ * synchronization of the flush finishing callback.
+ */
+ int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs);
+
+ /* Delete a created file. */
+ int coroutine_fn (*bdrv_co_delete_file)(BlockDriverState *bs,
+ Error **errp);
+
+ /*
+ * Flushes all data that was already written to the OS all the way down to
+ * the disk (for example file-posix.c calls fsync()).
+ */
+ int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs);
+
+ /*
+ * Flushes all internal caches to the OS. The data may still sit in a
+ * writeback cache of the host OS, but it will survive a crash of the qemu
+ * process.
+ */
+ int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs);
+
+ /*
+ * Truncate @bs to @offset bytes using the given @prealloc mode
+ * when growing. Modes other than PREALLOC_MODE_OFF should be
+ * rejected when shrinking @bs.
+ *
+ * If @exact is true, @bs must be resized to exactly @offset.
+ * Otherwise, it is sufficient for @bs (if it is a host block
+ * device and thus there is no way to resize it) to be at least
+ * @offset bytes in length.
+ *
+ * If @exact is true and this function fails but would succeed
+ * with @exact = false, it should return -ENOTSUP.
+ */
+ int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset,
+ bool exact, PreallocMode prealloc,
+ BdrvRequestFlags flags, Error **errp);
+ int64_t (*bdrv_getlength)(BlockDriverState *bs);
+ int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs);
+ BlockMeasureInfo *(*bdrv_measure)(QemuOpts *opts, BlockDriverState *in_bs,
+ Error **errp);
+
+ int coroutine_fn (*bdrv_co_pwritev_compressed)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov);
+ int coroutine_fn (*bdrv_co_pwritev_compressed_part)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+ size_t qiov_offset);
+
+ int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi);
+
+ ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs,
+ Error **errp);
+ BlockStatsSpecific *(*bdrv_get_specific_stats)(BlockDriverState *bs);
+
+ int coroutine_fn (*bdrv_save_vmstate)(BlockDriverState *bs,
+ QEMUIOVector *qiov,
+ int64_t pos);
+ int coroutine_fn (*bdrv_load_vmstate)(BlockDriverState *bs,
+ QEMUIOVector *qiov,
+ int64_t pos);
+
+ /* removable device specific */
+ bool (*bdrv_is_inserted)(BlockDriverState *bs);
+ void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag);
+ void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked);
+
+ /* to control generic scsi devices */
+ BlockAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs,
+ unsigned long int req, void *buf,
+ BlockCompletionFunc *cb, void *opaque);
+ int coroutine_fn (*bdrv_co_ioctl)(BlockDriverState *bs,
+ unsigned long int req, void *buf);
+
+ /*
+ * Returns 0 for completed check, -errno for internal errors.
+ * The check results are stored in result.
+ */
+ int coroutine_fn (*bdrv_co_check)(BlockDriverState *bs,
+ BdrvCheckResult *result,
+ BdrvCheckMode fix);
+
+ void (*bdrv_debug_event)(BlockDriverState *bs, BlkdebugEvent event);
+
+ /* io queue for linux-aio */
+ void (*bdrv_io_plug)(BlockDriverState *bs);
+ void (*bdrv_io_unplug)(BlockDriverState *bs);
+
+ /**
+ * bdrv_co_drain_begin is called if implemented in the beginning of a
+ * drain operation to drain and stop any internal sources of requests in
+ * the driver.
+ * bdrv_co_drain_end is called if implemented at the end of the drain.
+ *
+ * They should be used by the driver to e.g. manage scheduled I/O
+ * requests, or toggle an internal state. After the end of the drain new
+ * requests will continue normally.
+ */
+ void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs);
+ void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs);
+
+ bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs);
+ bool (*bdrv_co_can_store_new_dirty_bitmap)(BlockDriverState *bs,
+ const char *name,
+ uint32_t granularity,
+ Error **errp);
+ int (*bdrv_co_remove_persistent_dirty_bitmap)(BlockDriverState *bs,
+ const char *name,
+ Error **errp);
+};
+
+static inline bool block_driver_can_compress(BlockDriver *drv)
+{
+ return drv->bdrv_co_pwritev_compressed ||
+ drv->bdrv_co_pwritev_compressed_part;
+}
+
+typedef struct BlockLimits {
+ /*
+ * Alignment requirement, in bytes, for offset/length of I/O
+ * requests. Must be a power of 2 less than INT_MAX; defaults to
+ * 1 for drivers with modern byte interfaces, and to 512
+ * otherwise.
+ */
+ uint32_t request_alignment;
+
+ /*
+ * Maximum number of bytes that can be discarded at once. Must be multiple
+ * of pdiscard_alignment, but need not be power of 2. May be 0 if no
+ * inherent 64-bit limit.
+ */
+ int64_t max_pdiscard;
+
+ /*
+ * Optimal alignment for discard requests in bytes. A power of 2
+ * is best but not mandatory. Must be a multiple of
+ * bl.request_alignment, and must be less than max_pdiscard if
+ * that is set. May be 0 if bl.request_alignment is good enough
+ */
+ uint32_t pdiscard_alignment;
+
+ /*
+ * Maximum number of bytes that can zeroized at once. Must be multiple of
+ * pwrite_zeroes_alignment. 0 means no limit.
+ */
+ int64_t max_pwrite_zeroes;
+
+ /*
+ * Optimal alignment for write zeroes requests in bytes. A power
+ * of 2 is best but not mandatory. Must be a multiple of
+ * bl.request_alignment, and must be less than max_pwrite_zeroes
+ * if that is set. May be 0 if bl.request_alignment is good
+ * enough
+ */
+ uint32_t pwrite_zeroes_alignment;
+
+ /*
+ * Optimal transfer length in bytes. A power of 2 is best but not
+ * mandatory. Must be a multiple of bl.request_alignment, or 0 if
+ * no preferred size
+ */
+ uint32_t opt_transfer;
+
+ /*
+ * Maximal transfer length in bytes. Need not be power of 2, but
+ * must be multiple of opt_transfer and bl.request_alignment, or 0
+ * for no 32-bit limit. For now, anything larger than INT_MAX is
+ * clamped down.
+ */
+ uint32_t max_transfer;
+
+ /*
+ * Maximal hardware transfer length in bytes. Applies whenever
+ * transfers to the device bypass the kernel I/O scheduler, for
+ * example with SG_IO. If larger than max_transfer or if zero,
+ * blk_get_max_hw_transfer will fall back to max_transfer.
+ */
+ uint64_t max_hw_transfer;
+
+ /*
+ * Maximal number of scatter/gather elements allowed by the hardware.
+ * Applies whenever transfers to the device bypass the kernel I/O
+ * scheduler, for example with SG_IO. If larger than max_iov
+ * or if zero, blk_get_max_hw_iov will fall back to max_iov.
+ */
+ int max_hw_iov;
+
+
+ /* memory alignment, in bytes so that no bounce buffer is needed */
+ size_t min_mem_alignment;
+
+ /* memory alignment, in bytes, for bounce buffer */
+ size_t opt_mem_alignment;
+
+ /* maximum number of iovec elements */
+ int max_iov;
+} BlockLimits;
+
+typedef struct BdrvOpBlocker BdrvOpBlocker;
+
+typedef struct BdrvAioNotifier {
+ void (*attached_aio_context)(AioContext *new_context, void *opaque);
+ void (*detach_aio_context)(void *opaque);
+
+ void *opaque;
+ bool deleted;
+
+ QLIST_ENTRY(BdrvAioNotifier) list;
+} BdrvAioNotifier;
+
+struct BdrvChildClass {
+ /*
+ * If true, bdrv_replace_node() doesn't change the node this BdrvChild
+ * points to.
+ */
+ bool stay_at_node;
+
+ /*
+ * If true, the parent is a BlockDriverState and bdrv_next_all_states()
+ * will return it. This information is used for drain_all, where every node
+ * will be drained separately, so the drain only needs to be propagated to
+ * non-BDS parents.
+ */
+ bool parent_is_bds;
+
+ /*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+ void (*inherit_options)(BdrvChildRole role, bool parent_is_format,
+ int *child_flags, QDict *child_options,
+ int parent_flags, QDict *parent_options);
+ void (*change_media)(BdrvChild *child, bool load);
+
+ /*
+ * Returns a malloced string that describes the parent of the child for a
+ * human reader. This could be a node-name, BlockBackend name, qdev ID or
+ * QOM path of the device owning the BlockBackend, job type and ID etc. The
+ * caller is responsible for freeing the memory.
+ */
+ char *(*get_parent_desc)(BdrvChild *child);
+
+ /*
+ * Notifies the parent that the child has been activated/inactivated (e.g.
+ * when migration is completing) and it can start/stop requesting
+ * permissions and doing I/O on it.
+ */
+ void (*activate)(BdrvChild *child, Error **errp);
+ int (*inactivate)(BdrvChild *child);
+
+ void (*attach)(BdrvChild *child);
+ void (*detach)(BdrvChild *child);
+
+ /*
+ * Notifies the parent that the filename of its child has changed (e.g.
+ * because the direct child was removed from the backing chain), so that it
+ * can update its reference.
+ */
+ int (*update_filename)(BdrvChild *child, BlockDriverState *new_base,
+ const char *filename, Error **errp);
+
+ bool (*can_set_aio_ctx)(BdrvChild *child, AioContext *ctx,
+ GSList **ignore, Error **errp);
+ void (*set_aio_ctx)(BdrvChild *child, AioContext *ctx, GSList **ignore);
+
+ AioContext *(*get_parent_aio_context)(BdrvChild *child);
+
+ /*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
+ void (*resize)(BdrvChild *child);
+
+ /*
+ * Returns a name that is supposedly more useful for human users than the
+ * node name for identifying the node in question (in particular, a BB
+ * name), or NULL if the parent can't provide a better name.
+ */
+ const char *(*get_name)(BdrvChild *child);
+
+ /*
+ * If this pair of functions is implemented, the parent doesn't issue new
+ * requests after returning from .drained_begin() until .drained_end() is
+ * called.
+ *
+ * These functions must not change the graph (and therefore also must not
+ * call aio_poll(), which could change the graph indirectly).
+ *
+ * If drained_end() schedules background operations, it must atomically
+ * increment *drained_end_counter for each such operation and atomically
+ * decrement it once the operation has settled.
+ *
+ * Note that this can be nested. If drained_begin() was called twice, new
+ * I/O is allowed only after drained_end() was called twice, too.
+ */
+ void (*drained_begin)(BdrvChild *child);
+ void (*drained_end)(BdrvChild *child, int *drained_end_counter);
+
+ /*
+ * Returns whether the parent has pending requests for the child. This
+ * callback is polled after .drained_begin() has been called until all
+ * activity on the child has stopped.
+ */
+ bool (*drained_poll)(BdrvChild *child);
+};
+
+extern const BdrvChildClass child_of_bds;
+
+struct BdrvChild {
+ BlockDriverState *bs;
+ char *name;
+ const BdrvChildClass *klass;
+ BdrvChildRole role;
+ void *opaque;
+
+ /**
+ * Granted permissions for operating on this BdrvChild (BLK_PERM_* bitmask)
+ */
+ uint64_t perm;
+
+ /**
+ * Permissions that can still be granted to other users of @bs while this
+ * BdrvChild is still attached to it. (BLK_PERM_* bitmask)
+ */
+ uint64_t shared_perm;
+
+ /*
+ * This link is frozen: the child can neither be replaced nor
+ * detached from the parent.
+ */
+ bool frozen;
+
+ /*
+ * How many times the parent of this child has been drained
+ * (through klass->drained_*).
+ * Usually, this is equal to bs->quiesce_counter (potentially
+ * reduced by bdrv_drain_all_count). It may differ while the
+ * child is entering or leaving a drained section.
+ */
+ int parent_quiesce_counter;
+
+ QLIST_ENTRY(BdrvChild) next;
+ QLIST_ENTRY(BdrvChild) next_parent;
+};
+
+/*
+ * Allows bdrv_co_block_status() to cache one data region for a
+ * protocol node.
+ *
+ * @valid: Whether the cache is valid (should be accessed with atomic
+ * functions so this can be reset by RCU readers)
+ * @data_start: Offset where we know (or strongly assume) is data
+ * @data_end: Offset where the data region ends (which is not necessarily
+ * the start of a zeroed region)
+ */
+typedef struct BdrvBlockStatusCache {
+ struct rcu_head rcu;
+
+ bool valid;
+ int64_t data_start;
+ int64_t data_end;
+} BdrvBlockStatusCache;
+
+struct BlockDriverState {
+ /*
+ * Protected by big QEMU lock or read-only after opening. No special
+ * locking needed during I/O...
+ */
+ int open_flags; /* flags used to open the file, re-used for re-open */
+ bool encrypted; /* if true, the media is encrypted */
+ bool sg; /* if true, the device is a /dev/sg* */
+ bool probed; /* if true, format was probed rather than specified */
+ bool force_share; /* if true, always allow all shared permissions */
+ bool implicit; /* if true, this filter node was automatically inserted */
+
+ BlockDriver *drv; /* NULL means no media */
+ void *opaque;
+
+ AioContext *aio_context; /* event loop used for fd handlers, timers, etc */
+ /*
+ * long-running tasks intended to always use the same AioContext as this
+ * BDS may register themselves in this list to be notified of changes
+ * regarding this BDS's context
+ */
+ QLIST_HEAD(, BdrvAioNotifier) aio_notifiers;
+ bool walking_aio_notifiers; /* to make removal during iteration safe */
+
+ char filename[PATH_MAX];
+ /*
+ * If not empty, this image is a diff in relation to backing_file.
+ * Note that this is the name given in the image header and
+ * therefore may or may not be equal to .backing->bs->filename.
+ * If this field contains a relative path, it is to be resolved
+ * relatively to the overlay's location.
+ */
+ char backing_file[PATH_MAX];
+ /*
+ * The backing filename indicated by the image header. Contrary
+ * to backing_file, if we ever open this file, auto_backing_file
+ * is replaced by the resulting BDS's filename (i.e. after a
+ * bdrv_refresh_filename() run).
+ */
+ char auto_backing_file[PATH_MAX];
+ char backing_format[16]; /* if non-zero and backing_file exists */
+
+ QDict *full_open_options;
+ char exact_filename[PATH_MAX];
+
+ BdrvChild *backing;
+ BdrvChild *file;
+
+ /* I/O Limits */
+ BlockLimits bl;
+
+ /*
+ * Flags honored during pread
+ */
+ unsigned int supported_read_flags;
+ /*
+ * Flags honored during pwrite (so far: BDRV_REQ_FUA,
+ * BDRV_REQ_WRITE_UNCHANGED).
+ * If a driver does not support BDRV_REQ_WRITE_UNCHANGED, those
+ * writes will be issued as normal writes without the flag set.
+ * This is important to note for drivers that do not explicitly
+ * request a WRITE permission for their children and instead take
+ * the same permissions as their parent did (this is commonly what
+ * block filters do). Such drivers have to be aware that the
+ * parent may have taken a WRITE_UNCHANGED permission only and is
+ * issuing such requests. Drivers either must make sure that
+ * these requests do not result in plain WRITE accesses (usually
+ * by supporting BDRV_REQ_WRITE_UNCHANGED, and then forwarding
+ * every incoming write request as-is, including potentially that
+ * flag), or they have to explicitly take the WRITE permission for
+ * their children.
+ */
+ unsigned int supported_write_flags;
+ /*
+ * Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA,
+ * BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED)
+ */
+ unsigned int supported_zero_flags;
+ /*
+ * Flags honoured during truncate (so far: BDRV_REQ_ZERO_WRITE).
+ *
+ * If BDRV_REQ_ZERO_WRITE is given, the truncate operation must make sure
+ * that any added space reads as all zeros. If this can't be guaranteed,
+ * the operation must fail.
+ */
+ unsigned int supported_truncate_flags;
+
+ /* the following member gives a name to every node on the bs graph. */
+ char node_name[32];
+ /* element of the list of named nodes building the graph */
+ QTAILQ_ENTRY(BlockDriverState) node_list;
+ /* element of the list of all BlockDriverStates (all_bdrv_states) */
+ QTAILQ_ENTRY(BlockDriverState) bs_list;
+ /* element of the list of monitor-owned BDS */
+ QTAILQ_ENTRY(BlockDriverState) monitor_list;
+ int refcnt;
+
+ /* operation blockers. Protected by BQL. */
+ QLIST_HEAD(, BdrvOpBlocker) op_blockers[BLOCK_OP_TYPE_MAX];
+
+ /*
+ * The node that this node inherited default options from (and a reopen on
+ * which can affect this node by changing these defaults). This is always a
+ * parent node of this node.
+ */
+ BlockDriverState *inherits_from;
+ QLIST_HEAD(, BdrvChild) children;
+ QLIST_HEAD(, BdrvChild) parents;
+
+ QDict *options;
+ QDict *explicit_options;
+ BlockdevDetectZeroesOptions detect_zeroes;
+
+ /* The error object in use for blocking operations on backing_hd */
+ Error *backing_blocker;
+
+ /* Protected by AioContext lock */
+
+ /*
+ * If we are reading a disk image, give its size in sectors.
+ * Generally read-only; it is written to by load_snapshot and
+ * save_snaphost, but the block layer is quiescent during those.
+ */
+ int64_t total_sectors;
+
+ /* threshold limit for writes, in bytes. "High water mark". */
+ uint64_t write_threshold_offset;
+
+ /*
+ * Writing to the list requires the BQL _and_ the dirty_bitmap_mutex.
+ * Reading from the list can be done with either the BQL or the
+ * dirty_bitmap_mutex. Modifying a bitmap only requires
+ * dirty_bitmap_mutex.
+ */
+ QemuMutex dirty_bitmap_mutex;
+ QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
+
+ /* Offset after the highest byte written to */
+ Stat64 wr_highest_offset;
+
+ /*
+ * If true, copy read backing sectors into image. Can be >1 if more
+ * than one client has requested copy-on-read. Accessed with atomic
+ * ops.
+ */
+ int copy_on_read;
+
+ /*
+ * number of in-flight requests; overall and serialising.
+ * Accessed with atomic ops.
+ */
+ unsigned int in_flight;
+ unsigned int serialising_in_flight;
+
+ /*
+ * counter for nested bdrv_io_plug.
+ * Accessed with atomic ops.
+ */
+ unsigned io_plugged;
+
+ /* do we need to tell the quest if we have a volatile write cache? */
+ int enable_write_cache;
+
+ /* Accessed with atomic ops. */
+ int quiesce_counter;
+ int recursive_quiesce_counter;
+
+ unsigned int write_gen; /* Current data generation */
+
+ /* Protected by reqs_lock. */
+ CoMutex reqs_lock;
+ QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
+ CoQueue flush_queue; /* Serializing flush queue */
+ bool active_flush_req; /* Flush request in flight? */
+
+ /* Only read/written by whoever has set active_flush_req to true. */
+ unsigned int flushed_gen; /* Flushed write generation */
+
+ /* BdrvChild links to this node may never be frozen */
+ bool never_freeze;
+
+ /* Lock for block-status cache RCU writers */
+ CoMutex bsc_modify_lock;
+ /* Always non-NULL, but must only be dereferenced under an RCU read guard */
+ BdrvBlockStatusCache *block_status_cache;
+};
+
+struct BlockBackendRootState {
+ int open_flags;
+ BlockdevDetectZeroesOptions detect_zeroes;
+};
+
+typedef enum BlockMirrorBackingMode {
+ /*
+ * Reuse the existing backing chain from the source for the target.
+ * - sync=full: Set backing BDS to NULL.
+ * - sync=top: Use source's backing BDS.
+ * - sync=none: Use source as the backing BDS.
+ */
+ MIRROR_SOURCE_BACKING_CHAIN,
+
+ /* Open the target's backing chain completely anew */
+ MIRROR_OPEN_BACKING_CHAIN,
+
+ /* Do not change the target's backing BDS after job completion */
+ MIRROR_LEAVE_BACKING_CHAIN,
+} BlockMirrorBackingMode;
+
+
+/*
+ * Essential block drivers which must always be statically linked into qemu, and
+ * which therefore can be accessed without using bdrv_find_format()
+ */
+extern BlockDriver bdrv_file;
+extern BlockDriver bdrv_raw;
+extern BlockDriver bdrv_qcow2;
+
+extern unsigned int bdrv_drain_all_count;
+extern QemuOptsList bdrv_create_opts_simple;
+
+/*
+ * Common functions that are neither I/O nor Global State.
+ *
+ * See include/block/block-commmon.h for more information about
+ * the Common API.
+ */
+
+static inline BlockDriverState *child_bs(BdrvChild *child)
+{
+ return child ? child->bs : NULL;
+}
+
+int bdrv_check_request(int64_t offset, int64_t bytes, Error **errp);
+int get_tmp_filename(char *filename, int size);
+void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
+ QDict *options);
+
+
+int bdrv_check_qiov_request(int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset,
+ Error **errp);
+
+#ifdef _WIN32
+int is_windows_drive(const char *filename);
+#endif
+
+#endif /* BLOCK_INT_COMMON_H */
diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h
new file mode 100644
index 0000000000..0f21b0570b
--- /dev/null
+++ b/include/block/block_int-global-state.h
@@ -0,0 +1,329 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_INT_GLOBAL_STATE_H
+#define BLOCK_INT_GLOBAL_STATE_H
+
+#include "block_int-common.h"
+
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
+/**
+ * stream_start:
+ * @job_id: The id of the newly-created job, or %NULL to use the
+ * device name of @bs.
+ * @bs: Block device to operate on.
+ * @base: Block device that will become the new base, or %NULL to
+ * flatten the whole backing file chain onto @bs.
+ * @backing_file_str: The file name that will be written to @bs as the
+ * the new backing file if the job completes. Ignored if @base is %NULL.
+ * @creation_flags: Flags that control the behavior of the Job lifetime.
+ * See @BlockJobCreateFlags
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the stream job inserts into the graph above
+ * @bs. NULL means that a node name should be autogenerated.
+ * @errp: Error object.
+ *
+ * Start a streaming operation on @bs. Clusters that are unallocated
+ * in @bs, but allocated in any image between @base and @bs (both
+ * exclusive) will be written to @bs. At the end of a successful
+ * streaming job, the backing file of @bs will be changed to
+ * @backing_file_str in the written image and to @base in the live
+ * BlockDriverState.
+ */
+void stream_start(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *base, const char *backing_file_str,
+ BlockDriverState *bottom,
+ int creation_flags, int64_t speed,
+ BlockdevOnError on_error,
+ const char *filter_node_name,
+ Error **errp);
+
+/**
+ * commit_start:
+ * @job_id: The id of the newly-created job, or %NULL to use the
+ * device name of @bs.
+ * @bs: Active block device.
+ * @top: Top block device to be committed.
+ * @base: Block device that will be written into, and become the new top.
+ * @creation_flags: Flags that control the behavior of the Job lifetime.
+ * See @BlockJobCreateFlags
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
+ * @backing_file_str: String to use as the backing file in @top's overlay
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the commit job inserts into the graph above @top. NULL means
+ * that a node name should be autogenerated.
+ * @errp: Error object.
+ *
+ */
+void commit_start(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *base, BlockDriverState *top,
+ int creation_flags, int64_t speed,
+ BlockdevOnError on_error, const char *backing_file_str,
+ const char *filter_node_name, Error **errp);
+/**
+ * commit_active_start:
+ * @job_id: The id of the newly-created job, or %NULL to use the
+ * device name of @bs.
+ * @bs: Active block device to be committed.
+ * @base: Block device that will be written into, and become the new top.
+ * @creation_flags: Flags that control the behavior of the Job lifetime.
+ * See @BlockJobCreateFlags
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the commit job inserts into the graph above @bs. NULL means that
+ * a node name should be autogenerated.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @auto_complete: Auto complete the job.
+ * @errp: Error object.
+ *
+ */
+BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *base, int creation_flags,
+ int64_t speed, BlockdevOnError on_error,
+ const char *filter_node_name,
+ BlockCompletionFunc *cb, void *opaque,
+ bool auto_complete, Error **errp);
+/*
+ * mirror_start:
+ * @job_id: The id of the newly-created job, or %NULL to use the
+ * device name of @bs.
+ * @bs: Block device to operate on.
+ * @target: Block device to write to.
+ * @replaces: Block graph node name to replace once the mirror is done. Can
+ * only be used when full mirroring is selected.
+ * @creation_flags: Flags that control the behavior of the Job lifetime.
+ * See @BlockJobCreateFlags
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @granularity: The chosen granularity for the dirty bitmap.
+ * @buf_size: The amount of data that can be in flight at one time.
+ * @mode: Whether to collapse all images in the chain to the target.
+ * @backing_mode: How to establish the target's backing chain after completion.
+ * @zero_target: Whether the target should be explicitly zero-initialized
+ * @on_source_error: The action to take upon error reading from the source.
+ * @on_target_error: The action to take upon error writing to the target.
+ * @unmap: Whether to unmap target where source sectors only contain zeroes.
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the mirror job inserts into the graph above @bs. NULL means that
+ * a node name should be autogenerated.
+ * @copy_mode: When to trigger writes to the target.
+ * @errp: Error object.
+ *
+ * Start a mirroring operation on @bs. Clusters that are allocated
+ * in @bs will be written to @target until the job is cancelled or
+ * manually completed. At the end of a successful mirroring job,
+ * @bs will be switched to read from @target.
+ */
+void mirror_start(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *target, const char *replaces,
+ int creation_flags, int64_t speed,
+ uint32_t granularity, int64_t buf_size,
+ MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
+ bool zero_target,
+ BlockdevOnError on_source_error,
+ BlockdevOnError on_target_error,
+ bool unmap, const char *filter_node_name,
+ MirrorCopyMode copy_mode, Error **errp);
+
+/*
+ * backup_job_create:
+ * @job_id: The id of the newly-created job, or %NULL to use the
+ * device name of @bs.
+ * @bs: Block device to operate on.
+ * @target: Block device to write to.
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @sync_mode: What parts of the disk image should be copied to the destination.
+ * @sync_bitmap: The dirty bitmap if sync_mode is 'bitmap' or 'incremental'
+ * @bitmap_mode: The bitmap synchronization policy to use.
+ * @perf: Performance options. All actual fields assumed to be present,
+ * all ".has_*" fields are ignored.
+ * @on_source_error: The action to take upon error reading from the source.
+ * @on_target_error: The action to take upon error writing to the target.
+ * @creation_flags: Flags that control the behavior of the Job lifetime.
+ * See @BlockJobCreateFlags
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @txn: Transaction that this job is part of (may be NULL).
+ *
+ * Create a backup operation on @bs. Clusters in @bs are written to @target
+ * until the job is cancelled or manually completed.
+ */
+BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *target, int64_t speed,
+ MirrorSyncMode sync_mode,
+ BdrvDirtyBitmap *sync_bitmap,
+ BitmapSyncMode bitmap_mode,
+ bool compress,
+ const char *filter_node_name,
+ BackupPerf *perf,
+ BlockdevOnError on_source_error,
+ BlockdevOnError on_target_error,
+ int creation_flags,
+ BlockCompletionFunc *cb, void *opaque,
+ JobTxn *txn, Error **errp);
+
+BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
+ const char *child_name,
+ const BdrvChildClass *child_class,
+ BdrvChildRole child_role,
+ uint64_t perm, uint64_t shared_perm,
+ void *opaque, Error **errp);
+void bdrv_root_unref_child(BdrvChild *child);
+
+void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
+ uint64_t *shared_perm);
+
+/**
+ * Sets a BdrvChild's permissions. Avoid if the parent is a BDS; use
+ * bdrv_child_refresh_perms() instead and make the parent's
+ * .bdrv_child_perm() implementation return the correct values.
+ */
+int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
+ Error **errp);
+
+/**
+ * Calls bs->drv->bdrv_child_perm() and updates the child's permission
+ * masks with the result.
+ * Drivers should invoke this function whenever an event occurs that
+ * makes their .bdrv_child_perm() implementation return different
+ * values than before, but which will not result in the block layer
+ * automatically refreshing the permissions.
+ */
+int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp);
+
+bool bdrv_recurse_can_replace(BlockDriverState *bs,
+ BlockDriverState *to_replace);
+
+/*
+ * Default implementation for BlockDriver.bdrv_child_perm() that can
+ * be used by block filters and image formats, as long as they use the
+ * child_of_bds child class and set an appropriate BdrvChildRole.
+ */
+void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c,
+ BdrvChildRole role, BlockReopenQueue *reopen_queue,
+ uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared);
+
+void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp);
+bool blk_dev_has_removable_media(BlockBackend *blk);
+void blk_dev_eject_request(BlockBackend *blk, bool force);
+bool blk_dev_is_medium_locked(BlockBackend *blk);
+
+void bdrv_restore_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *backup);
+
+void bdrv_set_monitor_owned(BlockDriverState *bs);
+
+void blockdev_close_all_bdrv_states(void);
+
+BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp);
+
+/**
+ * Simple implementation of bdrv_co_create_opts for protocol drivers
+ * which only support creation via opening a file
+ * (usually existing raw storage device)
+ */
+int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
+ const char *filename,
+ QemuOpts *opts,
+ Error **errp);
+
+BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node,
+ const char *name,
+ BlockDriverState **pbs,
+ Error **errp);
+BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target,
+ BlockDirtyBitmapMergeSourceList *bms,
+ HBitmap **backup, Error **errp);
+BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name,
+ bool release,
+ BlockDriverState **bitmap_bs,
+ Error **errp);
+
+
+BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs);
+
+/**
+ * bdrv_add_aio_context_notifier:
+ *
+ * If a long-running job intends to be always run in the same AioContext as a
+ * certain BDS, it may use this function to be notified of changes regarding the
+ * association of the BDS to an AioContext.
+ *
+ * attached_aio_context() is called after the target BDS has been attached to a
+ * new AioContext; detach_aio_context() is called before the target BDS is being
+ * detached from its old AioContext.
+ */
+void bdrv_add_aio_context_notifier(BlockDriverState *bs,
+ void (*attached_aio_context)(AioContext *new_context, void *opaque),
+ void (*detach_aio_context)(void *opaque), void *opaque);
+
+/**
+ * bdrv_remove_aio_context_notifier:
+ *
+ * Unsubscribe of change notifications regarding the BDS's AioContext. The
+ * parameters given here have to be the same as those given to
+ * bdrv_add_aio_context_notifier().
+ */
+void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
+ void (*aio_context_attached)(AioContext *,
+ void *),
+ void (*aio_context_detached)(void *),
+ void *opaque);
+
+/**
+ * End all quiescent sections started by bdrv_drain_all_begin(). This is
+ * needed when deleting a BDS before bdrv_drain_all_end() is called.
+ *
+ * NOTE: this is an internal helper for bdrv_close() *only*. No one else
+ * should call it.
+ */
+void bdrv_drain_all_end_quiesce(BlockDriverState *bs);
+
+/**
+ * Make sure that the function is running under both drain and BQL.
+ * The latter protects from concurrent writings
+ * from the GS API, while the former prevents concurrent reads
+ * from I/O.
+ */
+static inline void assert_bdrv_graph_writable(BlockDriverState *bs)
+{
+ /*
+ * TODO: this function is incomplete. Because the users of this
+ * assert lack the necessary drains, check only for BQL.
+ * Once the necessary drains are added,
+ * assert also for qatomic_read(&bs->quiesce_counter) > 0
+ */
+ assert(qemu_in_main_thread());
+}
+
+#endif /* BLOCK_INT_GLOBAL_STATE */
diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h
new file mode 100644
index 0000000000..3da5f01c42
--- /dev/null
+++ b/include/block/block_int-io.h
@@ -0,0 +1,185 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_INT_IO_H
+#define BLOCK_INT_IO_H
+
+#include "block_int-common.h"
+
+/*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
+int coroutine_fn bdrv_co_preadv(BdrvChild *child,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
+int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
+ int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
+int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
+int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
+ int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
+
+static inline int coroutine_fn bdrv_co_pread(BdrvChild *child,
+ int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags)
+{
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+ IO_CODE();
+
+ return bdrv_co_preadv(child, offset, bytes, &qiov, flags);
+}
+
+static inline int coroutine_fn bdrv_co_pwrite(BdrvChild *child,
+ int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags)
+{
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+ IO_CODE();
+
+ return bdrv_co_pwritev(child, offset, bytes, &qiov, flags);
+}
+
+bool coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
+ uint64_t align);
+BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs);
+
+BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
+ const char *filename);
+
+/**
+ * bdrv_wakeup:
+ * @bs: The BlockDriverState for which an I/O operation has been completed.
+ *
+ * Wake up the main thread if it is waiting on BDRV_POLL_WHILE. During
+ * synchronous I/O on a BlockDriverState that is attached to another
+ * I/O thread, the main thread lets the I/O thread's event loop run,
+ * waiting for the I/O operation to complete. A bdrv_wakeup will wake
+ * up the main thread if necessary.
+ *
+ * Manual calls to bdrv_wakeup are rarely necessary, because
+ * bdrv_dec_in_flight already calls it.
+ */
+void bdrv_wakeup(BlockDriverState *bs);
+
+const char *bdrv_get_parent_name(const BlockDriverState *bs);
+bool blk_dev_has_tray(BlockBackend *blk);
+bool blk_dev_is_tray_open(BlockBackend *blk);
+
+void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes);
+
+void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out);
+bool bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest,
+ const BdrvDirtyBitmap *src,
+ HBitmap **backup, bool lock);
+
+void bdrv_inc_in_flight(BlockDriverState *bs);
+void bdrv_dec_in_flight(BlockDriverState *bs);
+
+int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset,
+ BdrvChild *dst, int64_t dst_offset,
+ int64_t bytes,
+ BdrvRequestFlags read_flags,
+ BdrvRequestFlags write_flags);
+int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset,
+ BdrvChild *dst, int64_t dst_offset,
+ int64_t bytes,
+ BdrvRequestFlags read_flags,
+ BdrvRequestFlags write_flags);
+
+int refresh_total_sectors(BlockDriverState *bs, int64_t hint);
+
+BdrvChild *bdrv_cow_child(BlockDriverState *bs);
+BdrvChild *bdrv_filter_child(BlockDriverState *bs);
+BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs);
+BdrvChild *bdrv_primary_child(BlockDriverState *bs);
+BlockDriverState *bdrv_skip_filters(BlockDriverState *bs);
+BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs);
+
+static inline BlockDriverState *bdrv_cow_bs(BlockDriverState *bs)
+{
+ IO_CODE();
+ return child_bs(bdrv_cow_child(bs));
+}
+
+static inline BlockDriverState *bdrv_filter_bs(BlockDriverState *bs)
+{
+ IO_CODE();
+ return child_bs(bdrv_filter_child(bs));
+}
+
+static inline BlockDriverState *bdrv_filter_or_cow_bs(BlockDriverState *bs)
+{
+ IO_CODE();
+ return child_bs(bdrv_filter_or_cow_child(bs));
+}
+
+static inline BlockDriverState *bdrv_primary_bs(BlockDriverState *bs)
+{
+ IO_CODE();
+ return child_bs(bdrv_primary_child(bs));
+}
+
+/**
+ * Check whether the given offset is in the cached block-status data
+ * region.
+ *
+ * If it is, and @pnum is not NULL, *pnum is set to
+ * `bsc.data_end - offset`, i.e. how many bytes, starting from
+ * @offset, are data (according to the cache).
+ * Otherwise, *pnum is not touched.
+ */
+bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum);
+
+/**
+ * If [offset, offset + bytes) overlaps with the currently cached
+ * block-status region, invalidate the cache.
+ *
+ * (To be used by I/O paths that cause data regions to be zero or
+ * holes.)
+ */
+void bdrv_bsc_invalidate_range(BlockDriverState *bs,
+ int64_t offset, int64_t bytes);
+
+/**
+ * Mark the range [offset, offset + bytes) as a data region.
+ */
+void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes);
+
+
+/*
+ * "I/O or GS" API functions. These functions can run without
+ * the BQL, but only in one specific iothread/main loop.
+ *
+ * See include/block/block-io.h for more information about
+ * the "I/O or GS" API.
+ */
+
+void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
+void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);
+
+#endif /* BLOCK_INT_IO_H */
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 27008cfb22..7d50b6bbd1 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -24,1478 +24,9 @@
#ifndef BLOCK_INT_H
#define BLOCK_INT_H
-#include "block/accounting.h"
-#include "block/block.h"
-#include "block/aio-wait.h"
-#include "qemu/queue.h"
-#include "qemu/coroutine.h"
-#include "qemu/stats64.h"
-#include "qemu/timer.h"
-#include "qemu/hbitmap.h"
-#include "block/snapshot.h"
-#include "qemu/throttle.h"
-#include "qemu/rcu.h"
+#include "block_int-global-state.h"
+#include "block_int-io.h"
-#define BLOCK_FLAG_LAZY_REFCOUNTS 8
-
-#define BLOCK_OPT_SIZE "size"
-#define BLOCK_OPT_ENCRYPT "encryption"
-#define BLOCK_OPT_ENCRYPT_FORMAT "encrypt.format"
-#define BLOCK_OPT_COMPAT6 "compat6"
-#define BLOCK_OPT_HWVERSION "hwversion"
-#define BLOCK_OPT_BACKING_FILE "backing_file"
-#define BLOCK_OPT_BACKING_FMT "backing_fmt"
-#define BLOCK_OPT_CLUSTER_SIZE "cluster_size"
-#define BLOCK_OPT_TABLE_SIZE "table_size"
-#define BLOCK_OPT_PREALLOC "preallocation"
-#define BLOCK_OPT_SUBFMT "subformat"
-#define BLOCK_OPT_COMPAT_LEVEL "compat"
-#define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts"
-#define BLOCK_OPT_ADAPTER_TYPE "adapter_type"
-#define BLOCK_OPT_REDUNDANCY "redundancy"
-#define BLOCK_OPT_NOCOW "nocow"
-#define BLOCK_OPT_EXTENT_SIZE_HINT "extent_size_hint"
-#define BLOCK_OPT_OBJECT_SIZE "object_size"
-#define BLOCK_OPT_REFCOUNT_BITS "refcount_bits"
-#define BLOCK_OPT_DATA_FILE "data_file"
-#define BLOCK_OPT_DATA_FILE_RAW "data_file_raw"
-#define BLOCK_OPT_COMPRESSION_TYPE "compression_type"
-#define BLOCK_OPT_EXTL2 "extended_l2"
-
-#define BLOCK_PROBE_BUF_SIZE 512
-
-enum BdrvTrackedRequestType {
- BDRV_TRACKED_READ,
- BDRV_TRACKED_WRITE,
- BDRV_TRACKED_DISCARD,
- BDRV_TRACKED_TRUNCATE,
-};
-
-/*
- * That is not quite good that BdrvTrackedRequest structure is public,
- * as block/io.c is very careful about incoming offset/bytes being
- * correct. Be sure to assert bdrv_check_request() succeeded after any
- * modification of BdrvTrackedRequest object out of block/io.c
- */
-typedef struct BdrvTrackedRequest {
- BlockDriverState *bs;
- int64_t offset;
- int64_t bytes;
- enum BdrvTrackedRequestType type;
-
- bool serialising;
- int64_t overlap_offset;
- int64_t overlap_bytes;
-
- QLIST_ENTRY(BdrvTrackedRequest) list;
- Coroutine *co; /* owner, used for deadlock detection */
- CoQueue wait_queue; /* coroutines blocked on this request */
-
- struct BdrvTrackedRequest *waiting_for;
-} BdrvTrackedRequest;
-
-int bdrv_check_qiov_request(int64_t offset, int64_t bytes,
- QEMUIOVector *qiov, size_t qiov_offset,
- Error **errp);
-int bdrv_check_request(int64_t offset, int64_t bytes, Error **errp);
-
-struct BlockDriver {
- const char *format_name;
- int instance_size;
-
- /* set to true if the BlockDriver is a block filter. Block filters pass
- * certain callbacks that refer to data (see block.c) to their bs->file
- * or bs->backing (whichever one exists) if the driver doesn't implement
- * them. Drivers that do not wish to forward must implement them and return
- * -ENOTSUP.
- * Note that filters are not allowed to modify data.
- *
- * Filters generally cannot have more than a single filtered child,
- * because the data they present must at all times be the same as
- * that on their filtered child. That would be impossible to
- * achieve for multiple filtered children.
- * (And this filtered child must then be bs->file or bs->backing.)
- */
- bool is_filter;
- /*
- * Set to true if the BlockDriver is a format driver. Format nodes
- * generally do not expect their children to be other format nodes
- * (except for backing files), and so format probing is disabled
- * on those children.
- */
- bool is_format;
- /*
- * Return true if @to_replace can be replaced by a BDS with the
- * same data as @bs without it affecting @bs's behavior (that is,
- * without it being visible to @bs's parents).
- */
- bool (*bdrv_recurse_can_replace)(BlockDriverState *bs,
- BlockDriverState *to_replace);
-
- int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
- int (*bdrv_probe_device)(const char *filename);
-
- /* Any driver implementing this callback is expected to be able to handle
- * NULL file names in its .bdrv_open() implementation */
- void (*bdrv_parse_filename)(const char *filename, QDict *options, Error **errp);
- /* Drivers not implementing bdrv_parse_filename nor bdrv_open should have
- * this field set to true, except ones that are defined only by their
- * child's bs.
- * An example of the last type will be the quorum block driver.
- */
- bool bdrv_needs_filename;
-
- /*
- * Set if a driver can support backing files. This also implies the
- * following semantics:
- *
- * - Return status 0 of .bdrv_co_block_status means that corresponding
- * blocks are not allocated in this layer of backing-chain
- * - For such (unallocated) blocks, read will:
- * - fill buffer with zeros if there is no backing file
- * - read from the backing file otherwise, where the block layer
- * takes care of reading zeros beyond EOF if backing file is short
- */
- bool supports_backing;
-
- /* For handling image reopen for split or non-split files */
- int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state,
- BlockReopenQueue *queue, Error **errp);
- void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state);
- void (*bdrv_reopen_commit_post)(BDRVReopenState *reopen_state);
- void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state);
- void (*bdrv_join_options)(QDict *options, QDict *old_options);
-
- int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags,
- Error **errp);
-
- /* Protocol drivers should implement this instead of bdrv_open */
- int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags,
- Error **errp);
- void (*bdrv_close)(BlockDriverState *bs);
-
-
- int coroutine_fn (*bdrv_co_create)(BlockdevCreateOptions *opts,
- Error **errp);
- int coroutine_fn (*bdrv_co_create_opts)(BlockDriver *drv,
- const char *filename,
- QemuOpts *opts,
- Error **errp);
-
- int coroutine_fn (*bdrv_co_amend)(BlockDriverState *bs,
- BlockdevAmendOptions *opts,
- bool force,
- Error **errp);
-
- int (*bdrv_amend_options)(BlockDriverState *bs,
- QemuOpts *opts,
- BlockDriverAmendStatusCB *status_cb,
- void *cb_opaque,
- bool force,
- Error **errp);
-
- int (*bdrv_make_empty)(BlockDriverState *bs);
-
- /*
- * Refreshes the bs->exact_filename field. If that is impossible,
- * bs->exact_filename has to be left empty.
- */
- void (*bdrv_refresh_filename)(BlockDriverState *bs);
-
- /*
- * Gathers the open options for all children into @target.
- * A simple format driver (without backing file support) might
- * implement this function like this:
- *
- * QINCREF(bs->file->bs->full_open_options);
- * qdict_put(target, "file", bs->file->bs->full_open_options);
- *
- * If not specified, the generic implementation will simply put
- * all children's options under their respective name.
- *
- * @backing_overridden is true when bs->backing seems not to be
- * the child that would result from opening bs->backing_file.
- * Therefore, if it is true, the backing child's options should be
- * gathered; otherwise, there is no need since the backing child
- * is the one implied by the image header.
- *
- * Note that ideally this function would not be needed. Every
- * block driver which implements it is probably doing something
- * shady regarding its runtime option structure.
- */
- void (*bdrv_gather_child_options)(BlockDriverState *bs, QDict *target,
- bool backing_overridden);
-
- /*
- * Returns an allocated string which is the directory name of this BDS: It
- * will be used to make relative filenames absolute by prepending this
- * function's return value to them.
- */
- char *(*bdrv_dirname)(BlockDriverState *bs, Error **errp);
-
- /* aio */
- BlockAIOCB *(*bdrv_aio_preadv)(BlockDriverState *bs,
- int64_t offset, int64_t bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque);
- BlockAIOCB *(*bdrv_aio_pwritev)(BlockDriverState *bs,
- int64_t offset, int64_t bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque);
- BlockAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
- BlockCompletionFunc *cb, void *opaque);
- BlockAIOCB *(*bdrv_aio_pdiscard)(BlockDriverState *bs,
- int64_t offset, int bytes,
- BlockCompletionFunc *cb, void *opaque);
-
- int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
-
- /**
- * @offset: position in bytes to read at
- * @bytes: number of bytes to read
- * @qiov: the buffers to fill with read data
- * @flags: currently unused, always 0
- *
- * @offset and @bytes will be a multiple of 'request_alignment',
- * but the length of individual @qiov elements does not have to
- * be a multiple.
- *
- * @bytes will always equal the total size of @qiov, and will be
- * no larger than 'max_transfer'.
- *
- * The buffer in @qiov may point directly to guest memory.
- */
- int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs,
- int64_t offset, int64_t bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags);
- int coroutine_fn (*bdrv_co_preadv_part)(BlockDriverState *bs,
- int64_t offset, int64_t bytes,
- QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
- int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int flags);
- /**
- * @offset: position in bytes to write at
- * @bytes: number of bytes to write
- * @qiov: the buffers containing data to write
- * @flags: zero or more bits allowed by 'supported_write_flags'
- *
- * @offset and @bytes will be a multiple of 'request_alignment',
- * but the length of individual @qiov elements does not have to
- * be a multiple.
- *
- * @bytes will always equal the total size of @qiov, and will be
- * no larger than 'max_transfer'.
- *
- * The buffer in @qiov may point directly to guest memory.
- */
- int coroutine_fn (*bdrv_co_pwritev)(BlockDriverState *bs,
- int64_t offset, int64_t bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags);
- int coroutine_fn (*bdrv_co_pwritev_part)(BlockDriverState *bs,
- int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset,
- BdrvRequestFlags flags);
-
- /*
- * Efficiently zero a region of the disk image. Typically an image format
- * would use a compact metadata representation to implement this. This
- * function pointer may be NULL or return -ENOSUP and .bdrv_co_writev()
- * will be called instead.
- */
- int coroutine_fn (*bdrv_co_pwrite_zeroes)(BlockDriverState *bs,
- int64_t offset, int64_t bytes, BdrvRequestFlags flags);
- int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs,
- int64_t offset, int64_t bytes);
-
- /* Map [offset, offset + nbytes) range onto a child of @bs to copy from,
- * and invoke bdrv_co_copy_range_from(child, ...), or invoke
- * bdrv_co_copy_range_to() if @bs is the leaf child to copy data from.
- *
- * See the comment of bdrv_co_copy_range for the parameter and return value
- * semantics.
- */
- int coroutine_fn (*bdrv_co_copy_range_from)(BlockDriverState *bs,
- BdrvChild *src,
- int64_t offset,
- BdrvChild *dst,
- int64_t dst_offset,
- int64_t bytes,
- BdrvRequestFlags read_flags,
- BdrvRequestFlags write_flags);
-
- /* Map [offset, offset + nbytes) range onto a child of bs to copy data to,
- * and invoke bdrv_co_copy_range_to(child, src, ...), or perform the copy
- * operation if @bs is the leaf and @src has the same BlockDriver. Return
- * -ENOTSUP if @bs is the leaf but @src has a different BlockDriver.
- *
- * See the comment of bdrv_co_copy_range for the parameter and return value
- * semantics.
- */
- int coroutine_fn (*bdrv_co_copy_range_to)(BlockDriverState *bs,
- BdrvChild *src,
- int64_t src_offset,
- BdrvChild *dst,
- int64_t dst_offset,
- int64_t bytes,
- BdrvRequestFlags read_flags,
- BdrvRequestFlags write_flags);
-
- /*
- * Building block for bdrv_block_status[_above] and
- * bdrv_is_allocated[_above]. The driver should answer only
- * according to the current layer, and should only need to set
- * BDRV_BLOCK_DATA, BDRV_BLOCK_ZERO, BDRV_BLOCK_OFFSET_VALID,
- * and/or BDRV_BLOCK_RAW; if the current layer defers to a backing
- * layer, the result should be 0 (and not BDRV_BLOCK_ZERO). See
- * block.h for the overall meaning of the bits. As a hint, the
- * flag want_zero is true if the caller cares more about precise
- * mappings (favor accurate _OFFSET_VALID/_ZERO) or false for
- * overall allocation (favor larger *pnum, perhaps by reporting
- * _DATA instead of _ZERO). The block layer guarantees input
- * clamped to bdrv_getlength() and aligned to request_alignment,
- * as well as non-NULL pnum, map, and file; in turn, the driver
- * must return an error or set pnum to an aligned non-zero value.
- *
- * Note that @bytes is just a hint on how big of a region the
- * caller wants to inspect. It is not a limit on *pnum.
- * Implementations are free to return larger values of *pnum if
- * doing so does not incur a performance penalty.
- *
- * block/io.c's bdrv_co_block_status() will utilize an unclamped
- * *pnum value for the block-status cache on protocol nodes, prior
- * to clamping *pnum for return to its caller.
- */
- int coroutine_fn (*bdrv_co_block_status)(BlockDriverState *bs,
- bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
- int64_t *map, BlockDriverState **file);
-
- /*
- * This informs the driver that we are no longer interested in the result
- * of in-flight requests, so don't waste the time if possible.
- *
- * One example usage is to avoid waiting for an nbd target node reconnect
- * timeout during job-cancel with force=true.
- */
- void (*bdrv_cancel_in_flight)(BlockDriverState *bs);
-
- /*
- * Invalidate any cached meta-data.
- */
- void coroutine_fn (*bdrv_co_invalidate_cache)(BlockDriverState *bs,
- Error **errp);
- int (*bdrv_inactivate)(BlockDriverState *bs);
-
- /*
- * Flushes all data for all layers by calling bdrv_co_flush for underlying
- * layers, if needed. This function is needed for deterministic
- * synchronization of the flush finishing callback.
- */
- int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs);
-
- /* Delete a created file. */
- int coroutine_fn (*bdrv_co_delete_file)(BlockDriverState *bs,
- Error **errp);
-
- /*
- * Flushes all data that was already written to the OS all the way down to
- * the disk (for example file-posix.c calls fsync()).
- */
- int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs);
-
- /*
- * Flushes all internal caches to the OS. The data may still sit in a
- * writeback cache of the host OS, but it will survive a crash of the qemu
- * process.
- */
- int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs);
-
- /*
- * Drivers setting this field must be able to work with just a plain
- * filename with '<protocol_name>:' as a prefix, and no other options.
- * Options may be extracted from the filename by implementing
- * bdrv_parse_filename.
- */
- const char *protocol_name;
-
- /*
- * Truncate @bs to @offset bytes using the given @prealloc mode
- * when growing. Modes other than PREALLOC_MODE_OFF should be
- * rejected when shrinking @bs.
- *
- * If @exact is true, @bs must be resized to exactly @offset.
- * Otherwise, it is sufficient for @bs (if it is a host block
- * device and thus there is no way to resize it) to be at least
- * @offset bytes in length.
- *
- * If @exact is true and this function fails but would succeed
- * with @exact = false, it should return -ENOTSUP.
- */
- int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset,
- bool exact, PreallocMode prealloc,
- BdrvRequestFlags flags, Error **errp);
-
- int64_t (*bdrv_getlength)(BlockDriverState *bs);
- bool has_variable_length;
- int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs);
- BlockMeasureInfo *(*bdrv_measure)(QemuOpts *opts, BlockDriverState *in_bs,
- Error **errp);
-
- int coroutine_fn (*bdrv_co_pwritev_compressed)(BlockDriverState *bs,
- int64_t offset, int64_t bytes, QEMUIOVector *qiov);
- int coroutine_fn (*bdrv_co_pwritev_compressed_part)(BlockDriverState *bs,
- int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset);
-
- int (*bdrv_snapshot_create)(BlockDriverState *bs,
- QEMUSnapshotInfo *sn_info);
- int (*bdrv_snapshot_goto)(BlockDriverState *bs,
- const char *snapshot_id);
- int (*bdrv_snapshot_delete)(BlockDriverState *bs,
- const char *snapshot_id,
- const char *name,
- Error **errp);
- int (*bdrv_snapshot_list)(BlockDriverState *bs,
- QEMUSnapshotInfo **psn_info);
- int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs,
- const char *snapshot_id,
- const char *name,
- Error **errp);
- int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi);
- ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs,
- Error **errp);
- BlockStatsSpecific *(*bdrv_get_specific_stats)(BlockDriverState *bs);
-
- int coroutine_fn (*bdrv_save_vmstate)(BlockDriverState *bs,
- QEMUIOVector *qiov,
- int64_t pos);
- int coroutine_fn (*bdrv_load_vmstate)(BlockDriverState *bs,
- QEMUIOVector *qiov,
- int64_t pos);
-
- int (*bdrv_change_backing_file)(BlockDriverState *bs,
- const char *backing_file, const char *backing_fmt);
-
- /* removable device specific */
- bool (*bdrv_is_inserted)(BlockDriverState *bs);
- void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag);
- void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked);
-
- /* to control generic scsi devices */
- BlockAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs,
- unsigned long int req, void *buf,
- BlockCompletionFunc *cb, void *opaque);
- int coroutine_fn (*bdrv_co_ioctl)(BlockDriverState *bs,
- unsigned long int req, void *buf);
-
- /* List of options for creating images, terminated by name == NULL */
- QemuOptsList *create_opts;
-
- /* List of options for image amend */
- QemuOptsList *amend_opts;
-
- /*
- * If this driver supports reopening images this contains a
- * NULL-terminated list of the runtime options that can be
- * modified. If an option in this list is unspecified during
- * reopen then it _must_ be reset to its default value or return
- * an error.
- */
- const char *const *mutable_opts;
-
- /*
- * Returns 0 for completed check, -errno for internal errors.
- * The check results are stored in result.
- */
- int coroutine_fn (*bdrv_co_check)(BlockDriverState *bs,
- BdrvCheckResult *result,
- BdrvCheckMode fix);
-
- void (*bdrv_debug_event)(BlockDriverState *bs, BlkdebugEvent event);
-
- /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */
- int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event,
- const char *tag);
- int (*bdrv_debug_remove_breakpoint)(BlockDriverState *bs,
- const char *tag);
- int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag);
- bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag);
-
- void (*bdrv_refresh_limits)(BlockDriverState *bs, Error **errp);
-
- /*
- * Returns 1 if newly created images are guaranteed to contain only
- * zeros, 0 otherwise.
- */
- int (*bdrv_has_zero_init)(BlockDriverState *bs);
-
- /* Remove fd handlers, timers, and other event loop callbacks so the event
- * loop is no longer in use. Called with no in-flight requests and in
- * depth-first traversal order with parents before child nodes.
- */
- void (*bdrv_detach_aio_context)(BlockDriverState *bs);
-
- /* Add fd handlers, timers, and other event loop callbacks so I/O requests
- * can be processed again. Called with no in-flight requests and in
- * depth-first traversal order with child nodes before parent nodes.
- */
- void (*bdrv_attach_aio_context)(BlockDriverState *bs,
- AioContext *new_context);
-
- /* io queue for linux-aio */
- void (*bdrv_io_plug)(BlockDriverState *bs);
- void (*bdrv_io_unplug)(BlockDriverState *bs);
-
- /**
- * Try to get @bs's logical and physical block size.
- * On success, store them in @bsz and return zero.
- * On failure, return negative errno.
- */
- int (*bdrv_probe_blocksizes)(BlockDriverState *bs, BlockSizes *bsz);
- /**
- * Try to get @bs's geometry (cyls, heads, sectors)
- * On success, store them in @geo and return 0.
- * On failure return -errno.
- * Only drivers that want to override guest geometry implement this
- * callback; see hd_geometry_guess().
- */
- int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo);
-
- /**
- * bdrv_co_drain_begin is called if implemented in the beginning of a
- * drain operation to drain and stop any internal sources of requests in
- * the driver.
- * bdrv_co_drain_end is called if implemented at the end of the drain.
- *
- * They should be used by the driver to e.g. manage scheduled I/O
- * requests, or toggle an internal state. After the end of the drain new
- * requests will continue normally.
- */
- void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs);
- void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs);
-
- void (*bdrv_add_child)(BlockDriverState *parent, BlockDriverState *child,
- Error **errp);
- void (*bdrv_del_child)(BlockDriverState *parent, BdrvChild *child,
- Error **errp);
-
- /**
- * Informs the block driver that a permission change is intended. The
- * driver checks whether the change is permissible and may take other
- * preparations for the change (e.g. get file system locks). This operation
- * is always followed either by a call to either .bdrv_set_perm or
- * .bdrv_abort_perm_update.
- *
- * Checks whether the requested set of cumulative permissions in @perm
- * can be granted for accessing @bs and whether no other users are using
- * permissions other than those given in @shared (both arguments take
- * BLK_PERM_* bitmasks).
- *
- * If both conditions are met, 0 is returned. Otherwise, -errno is returned
- * and errp is set to an error describing the conflict.
- */
- int (*bdrv_check_perm)(BlockDriverState *bs, uint64_t perm,
- uint64_t shared, Error **errp);
-
- /**
- * Called to inform the driver that the set of cumulative set of used
- * permissions for @bs has changed to @perm, and the set of sharable
- * permission to @shared. The driver can use this to propagate changes to
- * its children (i.e. request permissions only if a parent actually needs
- * them).
- *
- * This function is only invoked after bdrv_check_perm(), so block drivers
- * may rely on preparations made in their .bdrv_check_perm implementation.
- */
- void (*bdrv_set_perm)(BlockDriverState *bs, uint64_t perm, uint64_t shared);
-
- /*
- * Called to inform the driver that after a previous bdrv_check_perm()
- * call, the permission update is not performed and any preparations made
- * for it (e.g. taken file locks) need to be undone.
- *
- * This function can be called even for nodes that never saw a
- * bdrv_check_perm() call. It is a no-op then.
- */
- void (*bdrv_abort_perm_update)(BlockDriverState *bs);
-
- /**
- * Returns in @nperm and @nshared the permissions that the driver for @bs
- * needs on its child @c, based on the cumulative permissions requested by
- * the parents in @parent_perm and @parent_shared.
- *
- * If @c is NULL, return the permissions for attaching a new child for the
- * given @child_class and @role.
- *
- * If @reopen_queue is non-NULL, don't return the currently needed
- * permissions, but those that will be needed after applying the
- * @reopen_queue.
- */
- void (*bdrv_child_perm)(BlockDriverState *bs, BdrvChild *c,
- BdrvChildRole role,
- BlockReopenQueue *reopen_queue,
- uint64_t parent_perm, uint64_t parent_shared,
- uint64_t *nperm, uint64_t *nshared);
-
- bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs);
- bool (*bdrv_co_can_store_new_dirty_bitmap)(BlockDriverState *bs,
- const char *name,
- uint32_t granularity,
- Error **errp);
- int (*bdrv_co_remove_persistent_dirty_bitmap)(BlockDriverState *bs,
- const char *name,
- Error **errp);
-
- /**
- * Register/unregister a buffer for I/O. For example, when the driver is
- * interested to know the memory areas that will later be used in iovs, so
- * that it can do IOMMU mapping with VFIO etc., in order to get better
- * performance. In the case of VFIO drivers, this callback is used to do
- * DMA mapping for hot buffers.
- */
- void (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size);
- void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host);
- QLIST_ENTRY(BlockDriver) list;
-
- /* Pointer to a NULL-terminated array of names of strong options
- * that can be specified for bdrv_open(). A strong option is one
- * that changes the data of a BDS.
- * If this pointer is NULL, the array is considered empty.
- * "filename" and "driver" are always considered strong. */
- const char *const *strong_runtime_opts;
-};
-
-static inline bool block_driver_can_compress(BlockDriver *drv)
-{
- return drv->bdrv_co_pwritev_compressed ||
- drv->bdrv_co_pwritev_compressed_part;
-}
-
-typedef struct BlockLimits {
- /* Alignment requirement, in bytes, for offset/length of I/O
- * requests. Must be a power of 2 less than INT_MAX; defaults to
- * 1 for drivers with modern byte interfaces, and to 512
- * otherwise. */
- uint32_t request_alignment;
-
- /*
- * Maximum number of bytes that can be discarded at once. Must be multiple
- * of pdiscard_alignment, but need not be power of 2. May be 0 if no
- * inherent 64-bit limit.
- */
- int64_t max_pdiscard;
-
- /* Optimal alignment for discard requests in bytes. A power of 2
- * is best but not mandatory. Must be a multiple of
- * bl.request_alignment, and must be less than max_pdiscard if
- * that is set. May be 0 if bl.request_alignment is good enough */
- uint32_t pdiscard_alignment;
-
- /*
- * Maximum number of bytes that can zeroized at once. Must be multiple of
- * pwrite_zeroes_alignment. 0 means no limit.
- */
- int64_t max_pwrite_zeroes;
-
- /* Optimal alignment for write zeroes requests in bytes. A power
- * of 2 is best but not mandatory. Must be a multiple of
- * bl.request_alignment, and must be less than max_pwrite_zeroes
- * if that is set. May be 0 if bl.request_alignment is good
- * enough */
- uint32_t pwrite_zeroes_alignment;
-
- /* Optimal transfer length in bytes. A power of 2 is best but not
- * mandatory. Must be a multiple of bl.request_alignment, or 0 if
- * no preferred size */
- uint32_t opt_transfer;
-
- /* Maximal transfer length in bytes. Need not be power of 2, but
- * must be multiple of opt_transfer and bl.request_alignment, or 0
- * for no 32-bit limit. For now, anything larger than INT_MAX is
- * clamped down. */
- uint32_t max_transfer;
-
- /* Maximal hardware transfer length in bytes. Applies whenever
- * transfers to the device bypass the kernel I/O scheduler, for
- * example with SG_IO. If larger than max_transfer or if zero,
- * blk_get_max_hw_transfer will fall back to max_transfer.
- */
- uint64_t max_hw_transfer;
-
- /* Maximal number of scatter/gather elements allowed by the hardware.
- * Applies whenever transfers to the device bypass the kernel I/O
- * scheduler, for example with SG_IO. If larger than max_iov
- * or if zero, blk_get_max_hw_iov will fall back to max_iov.
- */
- int max_hw_iov;
-
- /* memory alignment, in bytes so that no bounce buffer is needed */
- size_t min_mem_alignment;
-
- /* memory alignment, in bytes, for bounce buffer */
- size_t opt_mem_alignment;
-
- /* maximum number of iovec elements */
- int max_iov;
-} BlockLimits;
-
-typedef struct BdrvOpBlocker BdrvOpBlocker;
-
-typedef struct BdrvAioNotifier {
- void (*attached_aio_context)(AioContext *new_context, void *opaque);
- void (*detach_aio_context)(void *opaque);
-
- void *opaque;
- bool deleted;
-
- QLIST_ENTRY(BdrvAioNotifier) list;
-} BdrvAioNotifier;
-
-struct BdrvChildClass {
- /* If true, bdrv_replace_node() doesn't change the node this BdrvChild
- * points to. */
- bool stay_at_node;
-
- /* If true, the parent is a BlockDriverState and bdrv_next_all_states()
- * will return it. This information is used for drain_all, where every node
- * will be drained separately, so the drain only needs to be propagated to
- * non-BDS parents. */
- bool parent_is_bds;
-
- void (*inherit_options)(BdrvChildRole role, bool parent_is_format,
- int *child_flags, QDict *child_options,
- int parent_flags, QDict *parent_options);
-
- void (*change_media)(BdrvChild *child, bool load);
- void (*resize)(BdrvChild *child);
-
- /* Returns a name that is supposedly more useful for human users than the
- * node name for identifying the node in question (in particular, a BB
- * name), or NULL if the parent can't provide a better name. */
- const char *(*get_name)(BdrvChild *child);
-
- /* Returns a malloced string that describes the parent of the child for a
- * human reader. This could be a node-name, BlockBackend name, qdev ID or
- * QOM path of the device owning the BlockBackend, job type and ID etc. The
- * caller is responsible for freeing the memory. */
- char *(*get_parent_desc)(BdrvChild *child);
-
- /*
- * If this pair of functions is implemented, the parent doesn't issue new
- * requests after returning from .drained_begin() until .drained_end() is
- * called.
- *
- * These functions must not change the graph (and therefore also must not
- * call aio_poll(), which could change the graph indirectly).
- *
- * If drained_end() schedules background operations, it must atomically
- * increment *drained_end_counter for each such operation and atomically
- * decrement it once the operation has settled.
- *
- * Note that this can be nested. If drained_begin() was called twice, new
- * I/O is allowed only after drained_end() was called twice, too.
- */
- void (*drained_begin)(BdrvChild *child);
- void (*drained_end)(BdrvChild *child, int *drained_end_counter);
-
- /*
- * Returns whether the parent has pending requests for the child. This
- * callback is polled after .drained_begin() has been called until all
- * activity on the child has stopped.
- */
- bool (*drained_poll)(BdrvChild *child);
-
- /* Notifies the parent that the child has been activated/inactivated (e.g.
- * when migration is completing) and it can start/stop requesting
- * permissions and doing I/O on it. */
- void (*activate)(BdrvChild *child, Error **errp);
- int (*inactivate)(BdrvChild *child);
-
- void (*attach)(BdrvChild *child);
- void (*detach)(BdrvChild *child);
-
- /* Notifies the parent that the filename of its child has changed (e.g.
- * because the direct child was removed from the backing chain), so that it
- * can update its reference. */
- int (*update_filename)(BdrvChild *child, BlockDriverState *new_base,
- const char *filename, Error **errp);
-
- bool (*can_set_aio_ctx)(BdrvChild *child, AioContext *ctx,
- GSList **ignore, Error **errp);
- void (*set_aio_ctx)(BdrvChild *child, AioContext *ctx, GSList **ignore);
-
- AioContext *(*get_parent_aio_context)(BdrvChild *child);
-};
-
-extern const BdrvChildClass child_of_bds;
-
-struct BdrvChild {
- BlockDriverState *bs;
- char *name;
- const BdrvChildClass *klass;
- BdrvChildRole role;
- void *opaque;
-
- /**
- * Granted permissions for operating on this BdrvChild (BLK_PERM_* bitmask)
- */
- uint64_t perm;
-
- /**
- * Permissions that can still be granted to other users of @bs while this
- * BdrvChild is still attached to it. (BLK_PERM_* bitmask)
- */
- uint64_t shared_perm;
-
- /*
- * This link is frozen: the child can neither be replaced nor
- * detached from the parent.
- */
- bool frozen;
-
- /*
- * How many times the parent of this child has been drained
- * (through klass->drained_*).
- * Usually, this is equal to bs->quiesce_counter (potentially
- * reduced by bdrv_drain_all_count). It may differ while the
- * child is entering or leaving a drained section.
- */
- int parent_quiesce_counter;
-
- QLIST_ENTRY(BdrvChild) next;
- QLIST_ENTRY(BdrvChild) next_parent;
-};
-
-/*
- * Allows bdrv_co_block_status() to cache one data region for a
- * protocol node.
- *
- * @valid: Whether the cache is valid (should be accessed with atomic
- * functions so this can be reset by RCU readers)
- * @data_start: Offset where we know (or strongly assume) is data
- * @data_end: Offset where the data region ends (which is not necessarily
- * the start of a zeroed region)
- */
-typedef struct BdrvBlockStatusCache {
- struct rcu_head rcu;
-
- bool valid;
- int64_t data_start;
- int64_t data_end;
-} BdrvBlockStatusCache;
-
-struct BlockDriverState {
- /* Protected by big QEMU lock or read-only after opening. No special
- * locking needed during I/O...
- */
- int open_flags; /* flags used to open the file, re-used for re-open */
- bool encrypted; /* if true, the media is encrypted */
- bool sg; /* if true, the device is a /dev/sg* */
- bool probed; /* if true, format was probed rather than specified */
- bool force_share; /* if true, always allow all shared permissions */
- bool implicit; /* if true, this filter node was automatically inserted */
-
- BlockDriver *drv; /* NULL means no media */
- void *opaque;
-
- AioContext *aio_context; /* event loop used for fd handlers, timers, etc */
- /* long-running tasks intended to always use the same AioContext as this
- * BDS may register themselves in this list to be notified of changes
- * regarding this BDS's context */
- QLIST_HEAD(, BdrvAioNotifier) aio_notifiers;
- bool walking_aio_notifiers; /* to make removal during iteration safe */
-
- char filename[PATH_MAX];
- /*
- * If not empty, this image is a diff in relation to backing_file.
- * Note that this is the name given in the image header and
- * therefore may or may not be equal to .backing->bs->filename.
- * If this field contains a relative path, it is to be resolved
- * relatively to the overlay's location.
- */
- char backing_file[PATH_MAX];
- /*
- * The backing filename indicated by the image header. Contrary
- * to backing_file, if we ever open this file, auto_backing_file
- * is replaced by the resulting BDS's filename (i.e. after a
- * bdrv_refresh_filename() run).
- */
- char auto_backing_file[PATH_MAX];
- char backing_format[16]; /* if non-zero and backing_file exists */
-
- QDict *full_open_options;
- char exact_filename[PATH_MAX];
-
- BdrvChild *backing;
- BdrvChild *file;
-
- /* I/O Limits */
- BlockLimits bl;
-
- /*
- * Flags honored during pread
- */
- unsigned int supported_read_flags;
- /* Flags honored during pwrite (so far: BDRV_REQ_FUA,
- * BDRV_REQ_WRITE_UNCHANGED).
- * If a driver does not support BDRV_REQ_WRITE_UNCHANGED, those
- * writes will be issued as normal writes without the flag set.
- * This is important to note for drivers that do not explicitly
- * request a WRITE permission for their children and instead take
- * the same permissions as their parent did (this is commonly what
- * block filters do). Such drivers have to be aware that the
- * parent may have taken a WRITE_UNCHANGED permission only and is
- * issuing such requests. Drivers either must make sure that
- * these requests do not result in plain WRITE accesses (usually
- * by supporting BDRV_REQ_WRITE_UNCHANGED, and then forwarding
- * every incoming write request as-is, including potentially that
- * flag), or they have to explicitly take the WRITE permission for
- * their children. */
- unsigned int supported_write_flags;
- /* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA,
- * BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED) */
- unsigned int supported_zero_flags;
- /*
- * Flags honoured during truncate (so far: BDRV_REQ_ZERO_WRITE).
- *
- * If BDRV_REQ_ZERO_WRITE is given, the truncate operation must make sure
- * that any added space reads as all zeros. If this can't be guaranteed,
- * the operation must fail.
- */
- unsigned int supported_truncate_flags;
-
- /* the following member gives a name to every node on the bs graph. */
- char node_name[32];
- /* element of the list of named nodes building the graph */
- QTAILQ_ENTRY(BlockDriverState) node_list;
- /* element of the list of all BlockDriverStates (all_bdrv_states) */
- QTAILQ_ENTRY(BlockDriverState) bs_list;
- /* element of the list of monitor-owned BDS */
- QTAILQ_ENTRY(BlockDriverState) monitor_list;
- int refcnt;
-
- /* operation blockers */
- QLIST_HEAD(, BdrvOpBlocker) op_blockers[BLOCK_OP_TYPE_MAX];
-
- /* The node that this node inherited default options from (and a reopen on
- * which can affect this node by changing these defaults). This is always a
- * parent node of this node. */
- BlockDriverState *inherits_from;
- QLIST_HEAD(, BdrvChild) children;
- QLIST_HEAD(, BdrvChild) parents;
-
- QDict *options;
- QDict *explicit_options;
- BlockdevDetectZeroesOptions detect_zeroes;
-
- /* The error object in use for blocking operations on backing_hd */
- Error *backing_blocker;
-
- /* Protected by AioContext lock */
-
- /* If we are reading a disk image, give its size in sectors.
- * Generally read-only; it is written to by load_snapshot and
- * save_snaphost, but the block layer is quiescent during those.
- */
- int64_t total_sectors;
-
- /* threshold limit for writes, in bytes. "High water mark". */
- uint64_t write_threshold_offset;
-
- /* Writing to the list requires the BQL _and_ the dirty_bitmap_mutex.
- * Reading from the list can be done with either the BQL or the
- * dirty_bitmap_mutex. Modifying a bitmap only requires
- * dirty_bitmap_mutex. */
- QemuMutex dirty_bitmap_mutex;
- QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
-
- /* Offset after the highest byte written to */
- Stat64 wr_highest_offset;
-
- /* If true, copy read backing sectors into image. Can be >1 if more
- * than one client has requested copy-on-read. Accessed with atomic
- * ops.
- */
- int copy_on_read;
-
- /* number of in-flight requests; overall and serialising.
- * Accessed with atomic ops.
- */
- unsigned int in_flight;
- unsigned int serialising_in_flight;
-
- /* counter for nested bdrv_io_plug.
- * Accessed with atomic ops.
- */
- unsigned io_plugged;
-
- /* do we need to tell the quest if we have a volatile write cache? */
- int enable_write_cache;
-
- /* Accessed with atomic ops. */
- int quiesce_counter;
- int recursive_quiesce_counter;
-
- unsigned int write_gen; /* Current data generation */
-
- /* Protected by reqs_lock. */
- CoMutex reqs_lock;
- QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
- CoQueue flush_queue; /* Serializing flush queue */
- bool active_flush_req; /* Flush request in flight? */
-
- /* Only read/written by whoever has set active_flush_req to true. */
- unsigned int flushed_gen; /* Flushed write generation */
-
- /* BdrvChild links to this node may never be frozen */
- bool never_freeze;
-
- /* Lock for block-status cache RCU writers */
- CoMutex bsc_modify_lock;
- /* Always non-NULL, but must only be dereferenced under an RCU read guard */
- BdrvBlockStatusCache *block_status_cache;
-};
-
-struct BlockBackendRootState {
- int open_flags;
- BlockdevDetectZeroesOptions detect_zeroes;
-};
-
-typedef enum BlockMirrorBackingMode {
- /* Reuse the existing backing chain from the source for the target.
- * - sync=full: Set backing BDS to NULL.
- * - sync=top: Use source's backing BDS.
- * - sync=none: Use source as the backing BDS. */
- MIRROR_SOURCE_BACKING_CHAIN,
-
- /* Open the target's backing chain completely anew */
- MIRROR_OPEN_BACKING_CHAIN,
-
- /* Do not change the target's backing BDS after job completion */
- MIRROR_LEAVE_BACKING_CHAIN,
-} BlockMirrorBackingMode;
-
-
-/* Essential block drivers which must always be statically linked into qemu, and
- * which therefore can be accessed without using bdrv_find_format() */
-extern BlockDriver bdrv_file;
-extern BlockDriver bdrv_raw;
-extern BlockDriver bdrv_qcow2;
-
-int coroutine_fn bdrv_co_preadv(BdrvChild *child,
- int64_t offset, int64_t bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags);
-int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
- int64_t offset, int64_t bytes,
- QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
-int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
- int64_t offset, int64_t bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags);
-int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
- int64_t offset, int64_t bytes,
- QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
-
-static inline int coroutine_fn bdrv_co_pread(BdrvChild *child,
- int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags)
-{
- QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
-
- return bdrv_co_preadv(child, offset, bytes, &qiov, flags);
-}
-
-static inline int coroutine_fn bdrv_co_pwrite(BdrvChild *child,
- int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags)
-{
- QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
-
- return bdrv_co_pwritev(child, offset, bytes, &qiov, flags);
-}
-
-extern unsigned int bdrv_drain_all_count;
-void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
-void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);
-
-bool coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
- uint64_t align);
-BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs);
-
-int get_tmp_filename(char *filename, int size);
-BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
- const char *filename);
-
-void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
- QDict *options);
-
-/**
- * bdrv_add_aio_context_notifier:
- *
- * If a long-running job intends to be always run in the same AioContext as a
- * certain BDS, it may use this function to be notified of changes regarding the
- * association of the BDS to an AioContext.
- *
- * attached_aio_context() is called after the target BDS has been attached to a
- * new AioContext; detach_aio_context() is called before the target BDS is being
- * detached from its old AioContext.
- */
-void bdrv_add_aio_context_notifier(BlockDriverState *bs,
- void (*attached_aio_context)(AioContext *new_context, void *opaque),
- void (*detach_aio_context)(void *opaque), void *opaque);
-
-/**
- * bdrv_remove_aio_context_notifier:
- *
- * Unsubscribe of change notifications regarding the BDS's AioContext. The
- * parameters given here have to be the same as those given to
- * bdrv_add_aio_context_notifier().
- */
-void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
- void (*aio_context_attached)(AioContext *,
- void *),
- void (*aio_context_detached)(void *),
- void *opaque);
-
-/**
- * bdrv_wakeup:
- * @bs: The BlockDriverState for which an I/O operation has been completed.
- *
- * Wake up the main thread if it is waiting on BDRV_POLL_WHILE. During
- * synchronous I/O on a BlockDriverState that is attached to another
- * I/O thread, the main thread lets the I/O thread's event loop run,
- * waiting for the I/O operation to complete. A bdrv_wakeup will wake
- * up the main thread if necessary.
- *
- * Manual calls to bdrv_wakeup are rarely necessary, because
- * bdrv_dec_in_flight already calls it.
- */
-void bdrv_wakeup(BlockDriverState *bs);
-
-#ifdef _WIN32
-int is_windows_drive(const char *filename);
-#endif
-
-/**
- * stream_start:
- * @job_id: The id of the newly-created job, or %NULL to use the
- * device name of @bs.
- * @bs: Block device to operate on.
- * @base: Block device that will become the new base, or %NULL to
- * flatten the whole backing file chain onto @bs.
- * @backing_file_str: The file name that will be written to @bs as the
- * the new backing file if the job completes. Ignored if @base is %NULL.
- * @creation_flags: Flags that control the behavior of the Job lifetime.
- * See @BlockJobCreateFlags
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @on_error: The action to take upon error.
- * @filter_node_name: The node name that should be assigned to the filter
- * driver that the stream job inserts into the graph above
- * @bs. NULL means that a node name should be autogenerated.
- * @errp: Error object.
- *
- * Start a streaming operation on @bs. Clusters that are unallocated
- * in @bs, but allocated in any image between @base and @bs (both
- * exclusive) will be written to @bs. At the end of a successful
- * streaming job, the backing file of @bs will be changed to
- * @backing_file_str in the written image and to @base in the live
- * BlockDriverState.
- */
-void stream_start(const char *job_id, BlockDriverState *bs,
- BlockDriverState *base, const char *backing_file_str,
- BlockDriverState *bottom,
- int creation_flags, int64_t speed,
- BlockdevOnError on_error,
- const char *filter_node_name,
- Error **errp);
-
-/**
- * commit_start:
- * @job_id: The id of the newly-created job, or %NULL to use the
- * device name of @bs.
- * @bs: Active block device.
- * @top: Top block device to be committed.
- * @base: Block device that will be written into, and become the new top.
- * @creation_flags: Flags that control the behavior of the Job lifetime.
- * See @BlockJobCreateFlags
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @on_error: The action to take upon error.
- * @backing_file_str: String to use as the backing file in @top's overlay
- * @filter_node_name: The node name that should be assigned to the filter
- * driver that the commit job inserts into the graph above @top. NULL means
- * that a node name should be autogenerated.
- * @errp: Error object.
- *
- */
-void commit_start(const char *job_id, BlockDriverState *bs,
- BlockDriverState *base, BlockDriverState *top,
- int creation_flags, int64_t speed,
- BlockdevOnError on_error, const char *backing_file_str,
- const char *filter_node_name, Error **errp);
-/**
- * commit_active_start:
- * @job_id: The id of the newly-created job, or %NULL to use the
- * device name of @bs.
- * @bs: Active block device to be committed.
- * @base: Block device that will be written into, and become the new top.
- * @creation_flags: Flags that control the behavior of the Job lifetime.
- * See @BlockJobCreateFlags
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @on_error: The action to take upon error.
- * @filter_node_name: The node name that should be assigned to the filter
- * driver that the commit job inserts into the graph above @bs. NULL means that
- * a node name should be autogenerated.
- * @cb: Completion function for the job.
- * @opaque: Opaque pointer value passed to @cb.
- * @auto_complete: Auto complete the job.
- * @errp: Error object.
- *
- */
-BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
- BlockDriverState *base, int creation_flags,
- int64_t speed, BlockdevOnError on_error,
- const char *filter_node_name,
- BlockCompletionFunc *cb, void *opaque,
- bool auto_complete, Error **errp);
-/*
- * mirror_start:
- * @job_id: The id of the newly-created job, or %NULL to use the
- * device name of @bs.
- * @bs: Block device to operate on.
- * @target: Block device to write to.
- * @replaces: Block graph node name to replace once the mirror is done. Can
- * only be used when full mirroring is selected.
- * @creation_flags: Flags that control the behavior of the Job lifetime.
- * See @BlockJobCreateFlags
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @granularity: The chosen granularity for the dirty bitmap.
- * @buf_size: The amount of data that can be in flight at one time.
- * @mode: Whether to collapse all images in the chain to the target.
- * @backing_mode: How to establish the target's backing chain after completion.
- * @zero_target: Whether the target should be explicitly zero-initialized
- * @on_source_error: The action to take upon error reading from the source.
- * @on_target_error: The action to take upon error writing to the target.
- * @unmap: Whether to unmap target where source sectors only contain zeroes.
- * @filter_node_name: The node name that should be assigned to the filter
- * driver that the mirror job inserts into the graph above @bs. NULL means that
- * a node name should be autogenerated.
- * @copy_mode: When to trigger writes to the target.
- * @errp: Error object.
- *
- * Start a mirroring operation on @bs. Clusters that are allocated
- * in @bs will be written to @target until the job is cancelled or
- * manually completed. At the end of a successful mirroring job,
- * @bs will be switched to read from @target.
- */
-void mirror_start(const char *job_id, BlockDriverState *bs,
- BlockDriverState *target, const char *replaces,
- int creation_flags, int64_t speed,
- uint32_t granularity, int64_t buf_size,
- MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
- bool zero_target,
- BlockdevOnError on_source_error,
- BlockdevOnError on_target_error,
- bool unmap, const char *filter_node_name,
- MirrorCopyMode copy_mode, Error **errp);
-
-/*
- * backup_job_create:
- * @job_id: The id of the newly-created job, or %NULL to use the
- * device name of @bs.
- * @bs: Block device to operate on.
- * @target: Block device to write to.
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @sync_mode: What parts of the disk image should be copied to the destination.
- * @sync_bitmap: The dirty bitmap if sync_mode is 'bitmap' or 'incremental'
- * @bitmap_mode: The bitmap synchronization policy to use.
- * @perf: Performance options. All actual fields assumed to be present,
- * all ".has_*" fields are ignored.
- * @on_source_error: The action to take upon error reading from the source.
- * @on_target_error: The action to take upon error writing to the target.
- * @creation_flags: Flags that control the behavior of the Job lifetime.
- * See @BlockJobCreateFlags
- * @cb: Completion function for the job.
- * @opaque: Opaque pointer value passed to @cb.
- * @txn: Transaction that this job is part of (may be NULL).
- *
- * Create a backup operation on @bs. Clusters in @bs are written to @target
- * until the job is cancelled or manually completed.
- */
-BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
- BlockDriverState *target, int64_t speed,
- MirrorSyncMode sync_mode,
- BdrvDirtyBitmap *sync_bitmap,
- BitmapSyncMode bitmap_mode,
- bool compress,
- const char *filter_node_name,
- BackupPerf *perf,
- BlockdevOnError on_source_error,
- BlockdevOnError on_target_error,
- int creation_flags,
- BlockCompletionFunc *cb, void *opaque,
- JobTxn *txn, Error **errp);
-
-BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
- const char *child_name,
- const BdrvChildClass *child_class,
- BdrvChildRole child_role,
- uint64_t perm, uint64_t shared_perm,
- void *opaque, Error **errp);
-void bdrv_root_unref_child(BdrvChild *child);
-
-void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
- uint64_t *shared_perm);
-
-/**
- * Sets a BdrvChild's permissions. Avoid if the parent is a BDS; use
- * bdrv_child_refresh_perms() instead and make the parent's
- * .bdrv_child_perm() implementation return the correct values.
- */
-int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
- Error **errp);
-
-/**
- * Calls bs->drv->bdrv_child_perm() and updates the child's permission
- * masks with the result.
- * Drivers should invoke this function whenever an event occurs that
- * makes their .bdrv_child_perm() implementation return different
- * values than before, but which will not result in the block layer
- * automatically refreshing the permissions.
- */
-int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp);
-
-bool bdrv_recurse_can_replace(BlockDriverState *bs,
- BlockDriverState *to_replace);
-
-/*
- * Default implementation for BlockDriver.bdrv_child_perm() that can
- * be used by block filters and image formats, as long as they use the
- * child_of_bds child class and set an appropriate BdrvChildRole.
- */
-void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c,
- BdrvChildRole role, BlockReopenQueue *reopen_queue,
- uint64_t perm, uint64_t shared,
- uint64_t *nperm, uint64_t *nshared);
-
-const char *bdrv_get_parent_name(const BlockDriverState *bs);
-void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp);
-bool blk_dev_has_removable_media(BlockBackend *blk);
-bool blk_dev_has_tray(BlockBackend *blk);
-void blk_dev_eject_request(BlockBackend *blk, bool force);
-bool blk_dev_is_tray_open(BlockBackend *blk);
-bool blk_dev_is_medium_locked(BlockBackend *blk);
-
-void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes);
-
-void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out);
-void bdrv_restore_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *backup);
-bool bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest,
- const BdrvDirtyBitmap *src,
- HBitmap **backup, bool lock);
-
-void bdrv_inc_in_flight(BlockDriverState *bs);
-void bdrv_dec_in_flight(BlockDriverState *bs);
-
-void blockdev_close_all_bdrv_states(void);
-
-int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset,
- BdrvChild *dst, int64_t dst_offset,
- int64_t bytes,
- BdrvRequestFlags read_flags,
- BdrvRequestFlags write_flags);
-int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset,
- BdrvChild *dst, int64_t dst_offset,
- int64_t bytes,
- BdrvRequestFlags read_flags,
- BdrvRequestFlags write_flags);
-
-int refresh_total_sectors(BlockDriverState *bs, int64_t hint);
-
-void bdrv_set_monitor_owned(BlockDriverState *bs);
-BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp);
-
-/**
- * Simple implementation of bdrv_co_create_opts for protocol drivers
- * which only support creation via opening a file
- * (usually existing raw storage device)
- */
-int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
- const char *filename,
- QemuOpts *opts,
- Error **errp);
-extern QemuOptsList bdrv_create_opts_simple;
-
-BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node,
- const char *name,
- BlockDriverState **pbs,
- Error **errp);
-BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target,
- BlockDirtyBitmapMergeSourceList *bms,
- HBitmap **backup, Error **errp);
-BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name,
- bool release,
- BlockDriverState **bitmap_bs,
- Error **errp);
-
-BdrvChild *bdrv_cow_child(BlockDriverState *bs);
-BdrvChild *bdrv_filter_child(BlockDriverState *bs);
-BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs);
-BdrvChild *bdrv_primary_child(BlockDriverState *bs);
-BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs);
-BlockDriverState *bdrv_skip_filters(BlockDriverState *bs);
-BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs);
-
-static inline BlockDriverState *child_bs(BdrvChild *child)
-{
- return child ? child->bs : NULL;
-}
-
-static inline BlockDriverState *bdrv_cow_bs(BlockDriverState *bs)
-{
- return child_bs(bdrv_cow_child(bs));
-}
-
-static inline BlockDriverState *bdrv_filter_bs(BlockDriverState *bs)
-{
- return child_bs(bdrv_filter_child(bs));
-}
-
-static inline BlockDriverState *bdrv_filter_or_cow_bs(BlockDriverState *bs)
-{
- return child_bs(bdrv_filter_or_cow_child(bs));
-}
-
-static inline BlockDriverState *bdrv_primary_bs(BlockDriverState *bs)
-{
- return child_bs(bdrv_primary_child(bs));
-}
-
-/**
- * End all quiescent sections started by bdrv_drain_all_begin(). This is
- * needed when deleting a BDS before bdrv_drain_all_end() is called.
- *
- * NOTE: this is an internal helper for bdrv_close() *only*. No one else
- * should call it.
- */
-void bdrv_drain_all_end_quiesce(BlockDriverState *bs);
-
-/**
- * Check whether the given offset is in the cached block-status data
- * region.
- *
- * If it is, and @pnum is not NULL, *pnum is set to
- * `bsc.data_end - offset`, i.e. how many bytes, starting from
- * @offset, are data (according to the cache).
- * Otherwise, *pnum is not touched.
- */
-bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum);
-
-/**
- * If [offset, offset + bytes) overlaps with the currently cached
- * block-status region, invalidate the cache.
- *
- * (To be used by I/O paths that cause data regions to be zero or
- * holes.)
- */
-void bdrv_bsc_invalidate_range(BlockDriverState *bs,
- int64_t offset, int64_t bytes);
-
-/**
- * Mark the range [offset, offset + bytes) as a data region.
- */
-void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes);
+/* DO NOT ADD ANYTHING IN HERE. USE ONE OF THE HEADERS INCLUDED ABOVE */
#endif /* BLOCK_INT_H */
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index 87fbb3985f..6525e16fd5 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -74,6 +74,13 @@ typedef struct BlockJob {
GSList *nodes;
} BlockJob;
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
/**
* block_job_next:
* @job: A block job, or %NULL.
@@ -155,6 +162,21 @@ BlockJobInfo *block_job_query(BlockJob *job, Error **errp);
*/
void block_job_iostatus_reset(BlockJob *job);
+/*
+ * block_job_get_aio_context:
+ *
+ * Returns aio context associated with a block job.
+ */
+AioContext *block_job_get_aio_context(BlockJob *job);
+
+
+/*
+ * Common functions that are neither I/O nor Global State.
+ *
+ * See include/block/block-common.h for more information about
+ * the Common API.
+ */
+
/**
* block_job_is_internal:
* @job: The job to determine if it is user-visible or not.
@@ -170,11 +192,4 @@ bool block_job_is_internal(BlockJob *job);
*/
const BlockJobDriver *block_job_driver(BlockJob *job);
-/*
- * block_job_get_aio_context:
- *
- * Returns aio context associated with a block job.
- */
-AioContext *block_job_get_aio_context(BlockJob *job);
-
#endif
diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h
index 6633d83da2..6bd9ae2b20 100644
--- a/include/block/blockjob_int.h
+++ b/include/block/blockjob_int.h
@@ -39,6 +39,13 @@ struct BlockJobDriver {
JobDriver job_driver;
/*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
+ /*
* Returns whether the job has pending requests for the child or will
* submit new requests before the next pause point. This callback is polled
* in the context of draining a job node after requesting that the job be
@@ -47,6 +54,13 @@ struct BlockJobDriver {
bool (*drained_poll)(BlockJob *job);
/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
+ /*
* If the callback is not NULL, it will be invoked before the job is
* resumed in a new AioContext. This is the place to move any resources
* besides job->blk to the new AioContext.
@@ -56,6 +70,13 @@ struct BlockJobDriver {
void (*set_speed)(BlockJob *job, int64_t speed);
};
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
/**
* block_job_create:
* @job_id: The id of the newly-created job, or %NULL to have one
@@ -98,6 +119,13 @@ void block_job_free(Job *job);
*/
void block_job_user_resume(Job *job);
+/*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
/**
* block_job_ratelimit_get_delay:
*
diff --git a/include/block/snapshot.h b/include/block/snapshot.h
index 940345692f..50ff924710 100644
--- a/include/block/snapshot.h
+++ b/include/block/snapshot.h
@@ -45,6 +45,13 @@ typedef struct QEMUSnapshotInfo {
uint64_t icount; /* record/replay step */
} QEMUSnapshotInfo;
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info,
const char *name);
bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs,
@@ -73,9 +80,11 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
Error **errp);
-/* Group operations. All block drivers are involved.
+/*
+ * Group operations. All block drivers are involved.
* These functions will properly handle dataplane (take aio_context_acquire
- * when appropriate for appropriate block drivers */
+ * when appropriate for appropriate block drivers
+ */
bool bdrv_all_can_snapshot(bool has_devices, strList *devices,
Error **errp);
diff --git a/include/qemu/coroutine-tls.h b/include/qemu/coroutine-tls.h
new file mode 100644
index 0000000000..1558a826aa
--- /dev/null
+++ b/include/qemu/coroutine-tls.h
@@ -0,0 +1,165 @@
+/*
+ * QEMU Thread Local Storage for coroutines
+ *
+ * Copyright Red Hat
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ * It is forbidden to access Thread Local Storage in coroutines because
+ * compiler optimizations may cause values to be cached across coroutine
+ * re-entry. Coroutines can run in more than one thread through the course of
+ * their life, leading bugs when stale TLS values from the wrong thread are
+ * used as a result of compiler optimization.
+ *
+ * An example is:
+ *
+ * ..code-block:: c
+ * :caption: A coroutine that may see the wrong TLS value
+ *
+ * static __thread AioContext *current_aio_context;
+ * ...
+ * static void coroutine_fn foo(void)
+ * {
+ * aio_notify(current_aio_context);
+ * qemu_coroutine_yield();
+ * aio_notify(current_aio_context); // <-- may be stale after yielding!
+ * }
+ *
+ * This header provides macros for safely defining variables in Thread Local
+ * Storage:
+ *
+ * ..code-block:: c
+ * :caption: A coroutine that safely uses TLS
+ *
+ * QEMU_DEFINE_STATIC_CO_TLS(AioContext *, current_aio_context)
+ * ...
+ * static void coroutine_fn foo(void)
+ * {
+ * aio_notify(get_current_aio_context());
+ * qemu_coroutine_yield();
+ * aio_notify(get_current_aio_context()); // <-- safe
+ * }
+ */
+
+#ifndef QEMU_COROUTINE_TLS_H
+#define QEMU_COROUTINE_TLS_H
+
+/*
+ * To stop the compiler from caching TLS values we define accessor functions
+ * with __attribute__((noinline)) plus asm volatile("") to prevent
+ * optimizations that override noinline.
+ *
+ * The compiler can still analyze noinline code and make optimizations based on
+ * that knowledge, so an inline asm output operand is used to prevent
+ * optimizations that make assumptions about the address of the TLS variable.
+ *
+ * This is fragile and ultimately needs to be solved by a mechanism that is
+ * guaranteed to work by the compiler (e.g. stackless coroutines), but for now
+ * we use this approach to prevent issues.
+ */
+
+/**
+ * QEMU_DECLARE_CO_TLS:
+ * @type: the variable's C type
+ * @var: the variable name
+ *
+ * Declare an extern variable in Thread Local Storage from a header file:
+ *
+ * .. code-block:: c
+ * :caption: Declaring an extern variable in Thread Local Storage
+ *
+ * QEMU_DECLARE_CO_TLS(int, my_count)
+ * ...
+ * int c = get_my_count();
+ * set_my_count(c + 1);
+ * *get_ptr_my_count() = 0;
+ *
+ * This is a coroutine-safe replacement for the __thread keyword and is
+ * equivalent to the following code:
+ *
+ * .. code-block:: c
+ * :caption: Declaring a TLS variable using __thread
+ *
+ * extern __thread int my_count;
+ * ...
+ * int c = my_count;
+ * my_count = c + 1;
+ * *(&my_count) = 0;
+ */
+#define QEMU_DECLARE_CO_TLS(type, var) \
+ __attribute__((noinline)) type get_##var(void); \
+ __attribute__((noinline)) void set_##var(type v); \
+ __attribute__((noinline)) type *get_ptr_##var(void);
+
+/**
+ * QEMU_DEFINE_CO_TLS:
+ * @type: the variable's C type
+ * @var: the variable name
+ *
+ * Define a variable in Thread Local Storage that was previously declared from
+ * a header file with QEMU_DECLARE_CO_TLS():
+ *
+ * .. code-block:: c
+ * :caption: Defining a variable in Thread Local Storage
+ *
+ * QEMU_DEFINE_CO_TLS(int, my_count)
+ *
+ * This is a coroutine-safe replacement for the __thread keyword and is
+ * equivalent to the following code:
+ *
+ * .. code-block:: c
+ * :caption: Defining a TLS variable using __thread
+ *
+ * __thread int my_count;
+ */
+#define QEMU_DEFINE_CO_TLS(type, var) \
+ static __thread type co_tls_##var; \
+ type get_##var(void) { asm volatile(""); return co_tls_##var; } \
+ void set_##var(type v) { asm volatile(""); co_tls_##var = v; } \
+ type *get_ptr_##var(void) \
+ { type *ptr = &co_tls_##var; asm volatile("" : "+rm" (ptr)); return ptr; }
+
+/**
+ * QEMU_DEFINE_STATIC_CO_TLS:
+ * @type: the variable's C type
+ * @var: the variable name
+ *
+ * Define a static variable in Thread Local Storage:
+ *
+ * .. code-block:: c
+ * :caption: Defining a static variable in Thread Local Storage
+ *
+ * QEMU_DEFINE_STATIC_CO_TLS(int, my_count)
+ * ...
+ * int c = get_my_count();
+ * set_my_count(c + 1);
+ * *get_ptr_my_count() = 0;
+ *
+ * This is a coroutine-safe replacement for the __thread keyword and is
+ * equivalent to the following code:
+ *
+ * .. code-block:: c
+ * :caption: Defining a static TLS variable using __thread
+ *
+ * static __thread int my_count;
+ * ...
+ * int c = my_count;
+ * my_count = c + 1;
+ * *(&my_count) = 0;
+ */
+#define QEMU_DEFINE_STATIC_CO_TLS(type, var) \
+ static __thread type co_tls_##var; \
+ static __attribute__((noinline, unused)) \
+ type get_##var(void) \
+ { asm volatile(""); return co_tls_##var; } \
+ static __attribute__((noinline, unused)) \
+ void set_##var(type v) \
+ { asm volatile(""); co_tls_##var = v; } \
+ static __attribute__((noinline, unused)) \
+ type *get_ptr_##var(void) \
+ { type *ptr = &co_tls_##var; asm volatile("" : "+rm" (ptr)); return ptr; }
+
+#endif /* QEMU_COROUTINE_TLS_H */
diff --git a/include/qemu/job.h b/include/qemu/job.h
index 6e67b6977f..c105b31076 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -169,6 +169,12 @@ typedef struct Job {
* Callbacks and other information about a Job driver.
*/
struct JobDriver {
+
+ /*
+ * These fields are initialized when this object is created,
+ * and are never changed afterwards
+ */
+
/** Derived Job struct size */
size_t instance_size;
@@ -184,9 +190,18 @@ struct JobDriver {
* aborted. If it returns zero, the job moves into the WAITING state. If it
* is the last job to complete in its transaction, all jobs in the
* transaction move from WAITING to PENDING.
+ *
+ * This callback must be run in the job's context.
*/
int coroutine_fn (*run)(Job *job, Error **errp);
+ /*
+ * Functions run without regard to the BQL that may run in any
+ * arbitrary thread. These functions do not need to be thread-safe
+ * because the caller ensures that they are invoked from one
+ * thread at time.
+ */
+
/**
* If the callback is not NULL, it will be invoked when the job transitions
* into the paused state. Paused jobs must not perform any asynchronous
@@ -201,6 +216,13 @@ struct JobDriver {
*/
void coroutine_fn (*resume)(Job *job);
+ /*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
/**
* Called when the job is resumed by the user (i.e. user_paused becomes
* false). .user_resume is called before .resume.
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
index 8dbc6fcb89..7a4d6a0920 100644
--- a/include/qemu/main-loop.h
+++ b/include/qemu/main-loop.h
@@ -242,10 +242,52 @@ AioContext *iohandler_get_aio_context(void);
* must always be taken outside other locks. This function helps
* functions take different paths depending on whether the current
* thread is running within the main loop mutex.
+ *
+ * This function should never be used in the block layer, because
+ * unit tests, block layer tools and qemu-storage-daemon do not
+ * have a BQL.
+ * Please instead refer to qemu_in_main_thread().
*/
bool qemu_mutex_iothread_locked(void);
/**
+ * qemu_in_main_thread: return whether it's possible to safely access
+ * the global state of the block layer.
+ *
+ * Global state of the block layer is not accessible from I/O threads
+ * or worker threads; only from threads that "own" the default
+ * AioContext that qemu_get_aio_context() returns. For tests, block
+ * layer tools and qemu-storage-daemon there is a designated thread that
+ * runs the event loop for qemu_get_aio_context(), and that is the
+ * main thread.
+ *
+ * For emulators, however, any thread that holds the BQL can act
+ * as the block layer main thread; this will be any of the actual
+ * main thread, the vCPU threads or the RCU thread.
+ *
+ * For clarity, do not use this function outside the block layer.
+ */
+bool qemu_in_main_thread(void);
+
+/* Mark and check that the function is part of the global state API. */
+#define GLOBAL_STATE_CODE() \
+ do { \
+ assert(qemu_in_main_thread()); \
+ } while (0)
+
+/* Mark and check that the function is part of the I/O API. */
+#define IO_CODE() \
+ do { \
+ /* nop */ \
+ } while (0)
+
+/* Mark and check that the function is part of the "I/O OR GS" API. */
+#define IO_OR_GS_CODE() \
+ do { \
+ /* nop */ \
+ } while (0)
+
+/**
* qemu_mutex_lock_iothread: Lock the main loop mutex.
*
* This function locks the main loop mutex. The mutex is taken by
diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h
index e69efbd47f..b063c6fde8 100644
--- a/include/qemu/rcu.h
+++ b/include/qemu/rcu.h
@@ -29,6 +29,7 @@
#include "qemu/atomic.h"
#include "qemu/notify.h"
#include "qemu/sys_membarrier.h"
+#include "qemu/coroutine-tls.h"
#ifdef __cplusplus
extern "C" {
@@ -76,11 +77,11 @@ struct rcu_reader_data {
NotifierList force_rcu;
};
-extern __thread struct rcu_reader_data rcu_reader;
+QEMU_DECLARE_CO_TLS(struct rcu_reader_data, rcu_reader)
static inline void rcu_read_lock(void)
{
- struct rcu_reader_data *p_rcu_reader = &rcu_reader;
+ struct rcu_reader_data *p_rcu_reader = get_ptr_rcu_reader();
unsigned ctr;
if (p_rcu_reader->depth++ > 0) {
@@ -96,7 +97,7 @@ static inline void rcu_read_lock(void)
static inline void rcu_read_unlock(void)
{
- struct rcu_reader_data *p_rcu_reader = &rcu_reader;
+ struct rcu_reader_data *p_rcu_reader = get_ptr_rcu_reader();
assert(p_rcu_reader->depth != 0);
if (--p_rcu_reader->depth > 0) {
diff --git a/include/sysemu/block-backend-common.h b/include/sysemu/block-backend-common.h
new file mode 100644
index 0000000000..2391679c56
--- /dev/null
+++ b/include/sysemu/block-backend-common.h
@@ -0,0 +1,102 @@
+/*
+ * QEMU Block backends
+ *
+ * Copyright (C) 2014-2016 Red Hat, Inc.
+ *
+ * Authors:
+ * Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1
+ * or later. See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef BLOCK_BACKEND_COMMON_H
+#define BLOCK_BACKEND_COMMON_H
+
+#include "qemu/iov.h"
+#include "block/throttle-groups.h"
+
+/*
+ * TODO Have to include block/block.h for a bunch of block layer
+ * types. Unfortunately, this pulls in the whole BlockDriverState
+ * API, which we don't want used by many BlockBackend users. Some of
+ * the types belong here, and the rest should be split into a common
+ * header and one for the BlockDriverState API.
+ */
+#include "block/block.h"
+
+/* Callbacks for block device models */
+typedef struct BlockDevOps {
+
+ /*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
+ /*
+ * Runs when virtual media changed (monitor commands eject, change)
+ * Argument load is true on load and false on eject.
+ * Beware: doesn't run when a host device's physical media
+ * changes. Sure would be useful if it did.
+ * Device models with removable media must implement this callback.
+ */
+ void (*change_media_cb)(void *opaque, bool load, Error **errp);
+ /*
+ * Runs when an eject request is issued from the monitor, the tray
+ * is closed, and the medium is locked.
+ * Device models that do not implement is_medium_locked will not need
+ * this callback. Device models that can lock the medium or tray might
+ * want to implement the callback and unlock the tray when "force" is
+ * true, even if they do not support eject requests.
+ */
+ void (*eject_request_cb)(void *opaque, bool force);
+
+ /*
+ * Is the virtual medium locked into the device?
+ * Device models implement this only when device has such a lock.
+ */
+ bool (*is_medium_locked)(void *opaque);
+
+ /*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
+ /*
+ * Is the virtual tray open?
+ * Device models implement this only when the device has a tray.
+ */
+ bool (*is_tray_open)(void *opaque);
+
+ /*
+ * Runs when the size changed (e.g. monitor command block_resize)
+ */
+ void (*resize_cb)(void *opaque);
+ /*
+ * Runs when the backend receives a drain request.
+ */
+ void (*drained_begin)(void *opaque);
+ /*
+ * Runs when the backend's last drain request ends.
+ */
+ void (*drained_end)(void *opaque);
+ /*
+ * Is the device still busy?
+ */
+ bool (*drained_poll)(void *opaque);
+} BlockDevOps;
+
+/*
+ * This struct is embedded in (the private) BlockBackend struct and contains
+ * fields that must be public. This is in particular for QLIST_ENTRY() and
+ * friends so that BlockBackends can be kept in lists outside block-backend.c
+ */
+typedef struct BlockBackendPublic {
+ ThrottleGroupMember throttle_group_member;
+} BlockBackendPublic;
+
+#endif /* BLOCK_BACKEND_COMMON_H */
diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h
new file mode 100644
index 0000000000..2e93a74679
--- /dev/null
+++ b/include/sysemu/block-backend-global-state.h
@@ -0,0 +1,116 @@
+/*
+ * QEMU Block backends
+ *
+ * Copyright (C) 2014-2016 Red Hat, Inc.
+ *
+ * Authors:
+ * Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1
+ * or later. See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef BLOCK_BACKEND_GS_H
+#define BLOCK_BACKEND_GS_H
+
+#include "block-backend-common.h"
+
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
+BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm);
+BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
+ uint64_t shared_perm, Error **errp);
+BlockBackend *blk_new_open(const char *filename, const char *reference,
+ QDict *options, int flags, Error **errp);
+int blk_get_refcnt(BlockBackend *blk);
+void blk_ref(BlockBackend *blk);
+void blk_unref(BlockBackend *blk);
+void blk_remove_all_bs(void);
+BlockBackend *blk_by_name(const char *name);
+BlockBackend *blk_next(BlockBackend *blk);
+BlockBackend *blk_all_next(BlockBackend *blk);
+bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp);
+void monitor_remove_blk(BlockBackend *blk);
+
+BlockBackendPublic *blk_get_public(BlockBackend *blk);
+BlockBackend *blk_by_public(BlockBackendPublic *public);
+
+void blk_remove_bs(BlockBackend *blk);
+int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp);
+int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp);
+bool bdrv_has_blk(BlockDriverState *bs);
+bool bdrv_is_root_node(BlockDriverState *bs);
+int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
+ Error **errp);
+void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm);
+
+void blk_iostatus_enable(BlockBackend *blk);
+BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk);
+void blk_iostatus_disable(BlockBackend *blk);
+void blk_iostatus_reset(BlockBackend *blk);
+int blk_attach_dev(BlockBackend *blk, DeviceState *dev);
+void blk_detach_dev(BlockBackend *blk, DeviceState *dev);
+DeviceState *blk_get_attached_dev(BlockBackend *blk);
+BlockBackend *blk_by_dev(void *dev);
+BlockBackend *blk_by_qdev_id(const char *id, Error **errp);
+void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque);
+
+void blk_activate(BlockBackend *blk, Error **errp);
+
+int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags);
+void blk_aio_cancel(BlockAIOCB *acb);
+int blk_commit_all(void);
+void blk_drain(BlockBackend *blk);
+void blk_drain_all(void);
+void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
+ BlockdevOnError on_write_error);
+bool blk_supports_write_perm(BlockBackend *blk);
+bool blk_is_sg(BlockBackend *blk);
+void blk_set_enable_write_cache(BlockBackend *blk, bool wce);
+int blk_get_flags(BlockBackend *blk);
+bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp);
+void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason);
+void blk_op_block_all(BlockBackend *blk, Error *reason);
+void blk_op_unblock_all(BlockBackend *blk, Error *reason);
+int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
+ Error **errp);
+void blk_add_aio_context_notifier(BlockBackend *blk,
+ void (*attached_aio_context)(AioContext *new_context, void *opaque),
+ void (*detach_aio_context)(void *opaque), void *opaque);
+void blk_remove_aio_context_notifier(BlockBackend *blk,
+ void (*attached_aio_context)(AioContext *,
+ void *),
+ void (*detach_aio_context)(void *),
+ void *opaque);
+void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify);
+void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify);
+BlockBackendRootState *blk_get_root_state(BlockBackend *blk);
+void blk_update_root_state(BlockBackend *blk);
+bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk);
+int blk_get_open_flags_from_root_state(BlockBackend *blk);
+
+int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
+ int64_t pos, int size);
+int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size);
+int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz);
+int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo);
+
+void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg);
+void blk_io_limits_disable(BlockBackend *blk);
+void blk_io_limits_enable(BlockBackend *blk, const char *group);
+void blk_io_limits_update_group(BlockBackend *blk, const char *group);
+void blk_set_force_allow_inactivate(BlockBackend *blk);
+
+void blk_register_buf(BlockBackend *blk, void *host, size_t size);
+void blk_unregister_buf(BlockBackend *blk, void *host);
+
+const BdrvChild *blk_root(BlockBackend *blk);
+
+int blk_make_empty(BlockBackend *blk, Error **errp);
+
+#endif /* BLOCK_BACKEND_GS_H */
diff --git a/include/sysemu/block-backend-io.h b/include/sysemu/block-backend-io.h
new file mode 100644
index 0000000000..6517c39295
--- /dev/null
+++ b/include/sysemu/block-backend-io.h
@@ -0,0 +1,161 @@
+/*
+ * QEMU Block backends
+ *
+ * Copyright (C) 2014-2016 Red Hat, Inc.
+ *
+ * Authors:
+ * Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1
+ * or later. See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef BLOCK_BACKEND_IO_H
+#define BLOCK_BACKEND_IO_H
+
+#include "block-backend-common.h"
+
+/*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
+const char *blk_name(const BlockBackend *blk);
+
+BlockDriverState *blk_bs(BlockBackend *blk);
+
+void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow);
+void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow);
+void blk_set_disable_request_queuing(BlockBackend *blk, bool disable);
+bool blk_iostatus_is_enabled(const BlockBackend *blk);
+
+char *blk_get_attached_dev_id(BlockBackend *blk);
+
+BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+ int64_t bytes, BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque);
+
+BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
+ QEMUIOVector *qiov, BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
+ QEMUIOVector *qiov, BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *blk_aio_flush(BlockBackend *blk,
+ BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes,
+ BlockCompletionFunc *cb, void *opaque);
+void blk_aio_cancel_async(BlockAIOCB *acb);
+BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
+ BlockCompletionFunc *cb, void *opaque);
+
+void blk_inc_in_flight(BlockBackend *blk);
+void blk_dec_in_flight(BlockBackend *blk);
+bool blk_is_inserted(BlockBackend *blk);
+bool blk_is_available(BlockBackend *blk);
+void blk_lock_medium(BlockBackend *blk, bool locked);
+void blk_eject(BlockBackend *blk, bool eject_flag);
+int64_t blk_getlength(BlockBackend *blk);
+void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr);
+int64_t blk_nb_sectors(BlockBackend *blk);
+void *blk_try_blockalign(BlockBackend *blk, size_t size);
+void *blk_blockalign(BlockBackend *blk, size_t size);
+bool blk_is_writable(BlockBackend *blk);
+bool blk_enable_write_cache(BlockBackend *blk);
+BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read);
+BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
+ int error);
+void blk_error_action(BlockBackend *blk, BlockErrorAction action,
+ bool is_read, int error);
+void blk_iostatus_set_err(BlockBackend *blk, int error);
+int blk_get_max_iov(BlockBackend *blk);
+int blk_get_max_hw_iov(BlockBackend *blk);
+void blk_set_guest_block_size(BlockBackend *blk, int align);
+
+void blk_io_plug(BlockBackend *blk);
+void blk_io_unplug(BlockBackend *blk);
+AioContext *blk_get_aio_context(BlockBackend *blk);
+BlockAcctStats *blk_get_stats(BlockBackend *blk);
+void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
+ BlockCompletionFunc *cb, void *opaque);
+BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
+ BlockCompletionFunc *cb,
+ void *opaque, int ret);
+
+uint32_t blk_get_request_alignment(BlockBackend *blk);
+uint32_t blk_get_max_transfer(BlockBackend *blk);
+uint64_t blk_get_max_hw_transfer(BlockBackend *blk);
+
+int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
+ BlockBackend *blk_out, int64_t off_out,
+ int64_t bytes, BdrvRequestFlags read_flags,
+ BdrvRequestFlags write_flags);
+
+
+/*
+ * "I/O or GS" API functions. These functions can run without
+ * the BQL, but only in one specific iothread/main loop.
+ *
+ * See include/block/block-io.h for more information about
+ * the "I/O or GS" API.
+ */
+
+int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int bytes);
+int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int bytes,
+ BdrvRequestFlags flags);
+int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
+ int64_t bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
+int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
+ int64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset,
+ BdrvRequestFlags flags);
+int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
+ int64_t bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
+
+static inline int coroutine_fn blk_co_pread(BlockBackend *blk, int64_t offset,
+ int64_t bytes, void *buf,
+ BdrvRequestFlags flags)
+{
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+ IO_OR_GS_CODE();
+
+ assert(bytes <= SIZE_MAX);
+
+ return blk_co_preadv(blk, offset, bytes, &qiov, flags);
+}
+
+static inline int coroutine_fn blk_co_pwrite(BlockBackend *blk, int64_t offset,
+ int64_t bytes, void *buf,
+ BdrvRequestFlags flags)
+{
+ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
+ IO_OR_GS_CODE();
+
+ assert(bytes <= SIZE_MAX);
+
+ return blk_co_pwritev(blk, offset, bytes, &qiov, flags);
+}
+
+int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset,
+ int64_t bytes);
+
+int coroutine_fn blk_co_flush(BlockBackend *blk);
+int blk_flush(BlockBackend *blk);
+
+int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
+
+int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
+ int64_t bytes);
+int blk_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes);
+int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+ int64_t bytes, BdrvRequestFlags flags);
+int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+ int64_t bytes, BdrvRequestFlags flags);
+int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
+ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
+
+#endif /* BLOCK_BACKEND_IO_H */
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index e5e1524f06..038be9fc40 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -13,272 +13,9 @@
#ifndef BLOCK_BACKEND_H
#define BLOCK_BACKEND_H
-#include "qemu/iov.h"
-#include "block/throttle-groups.h"
+#include "block-backend-global-state.h"
+#include "block-backend-io.h"
-/*
- * TODO Have to include block/block.h for a bunch of block layer
- * types. Unfortunately, this pulls in the whole BlockDriverState
- * API, which we don't want used by many BlockBackend users. Some of
- * the types belong here, and the rest should be split into a common
- * header and one for the BlockDriverState API.
- */
-#include "block/block.h"
-
-/* Callbacks for block device models */
-typedef struct BlockDevOps {
- /*
- * Runs when virtual media changed (monitor commands eject, change)
- * Argument load is true on load and false on eject.
- * Beware: doesn't run when a host device's physical media
- * changes. Sure would be useful if it did.
- * Device models with removable media must implement this callback.
- */
- void (*change_media_cb)(void *opaque, bool load, Error **errp);
- /*
- * Runs when an eject request is issued from the monitor, the tray
- * is closed, and the medium is locked.
- * Device models that do not implement is_medium_locked will not need
- * this callback. Device models that can lock the medium or tray might
- * want to implement the callback and unlock the tray when "force" is
- * true, even if they do not support eject requests.
- */
- void (*eject_request_cb)(void *opaque, bool force);
- /*
- * Is the virtual tray open?
- * Device models implement this only when the device has a tray.
- */
- bool (*is_tray_open)(void *opaque);
- /*
- * Is the virtual medium locked into the device?
- * Device models implement this only when device has such a lock.
- */
- bool (*is_medium_locked)(void *opaque);
- /*
- * Runs when the size changed (e.g. monitor command block_resize)
- */
- void (*resize_cb)(void *opaque);
- /*
- * Runs when the backend receives a drain request.
- */
- void (*drained_begin)(void *opaque);
- /*
- * Runs when the backend's last drain request ends.
- */
- void (*drained_end)(void *opaque);
- /*
- * Is the device still busy?
- */
- bool (*drained_poll)(void *opaque);
-} BlockDevOps;
-
-/* This struct is embedded in (the private) BlockBackend struct and contains
- * fields that must be public. This is in particular for QLIST_ENTRY() and
- * friends so that BlockBackends can be kept in lists outside block-backend.c
- * */
-typedef struct BlockBackendPublic {
- ThrottleGroupMember throttle_group_member;
-} BlockBackendPublic;
-
-BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm);
-BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
- uint64_t shared_perm, Error **errp);
-BlockBackend *blk_new_open(const char *filename, const char *reference,
- QDict *options, int flags, Error **errp);
-int blk_get_refcnt(BlockBackend *blk);
-void blk_ref(BlockBackend *blk);
-void blk_unref(BlockBackend *blk);
-void blk_remove_all_bs(void);
-const char *blk_name(const BlockBackend *blk);
-BlockBackend *blk_by_name(const char *name);
-BlockBackend *blk_next(BlockBackend *blk);
-BlockBackend *blk_all_next(BlockBackend *blk);
-bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp);
-void monitor_remove_blk(BlockBackend *blk);
-
-BlockBackendPublic *blk_get_public(BlockBackend *blk);
-BlockBackend *blk_by_public(BlockBackendPublic *public);
-
-BlockDriverState *blk_bs(BlockBackend *blk);
-void blk_remove_bs(BlockBackend *blk);
-int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp);
-int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp);
-bool bdrv_has_blk(BlockDriverState *bs);
-bool bdrv_is_root_node(BlockDriverState *bs);
-int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
- Error **errp);
-void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm);
-
-void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow);
-void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow);
-void blk_set_disable_request_queuing(BlockBackend *blk, bool disable);
-void blk_iostatus_enable(BlockBackend *blk);
-bool blk_iostatus_is_enabled(const BlockBackend *blk);
-BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk);
-void blk_iostatus_disable(BlockBackend *blk);
-void blk_iostatus_reset(BlockBackend *blk);
-void blk_iostatus_set_err(BlockBackend *blk, int error);
-int blk_attach_dev(BlockBackend *blk, DeviceState *dev);
-void blk_detach_dev(BlockBackend *blk, DeviceState *dev);
-DeviceState *blk_get_attached_dev(BlockBackend *blk);
-char *blk_get_attached_dev_id(BlockBackend *blk);
-BlockBackend *blk_by_dev(void *dev);
-BlockBackend *blk_by_qdev_id(const char *id, Error **errp);
-void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque);
-int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
- int64_t bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags);
-int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
- int64_t bytes,
- QEMUIOVector *qiov, size_t qiov_offset,
- BdrvRequestFlags flags);
-int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
- int64_t bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags);
-
-static inline int coroutine_fn blk_co_pread(BlockBackend *blk, int64_t offset,
- int64_t bytes, void *buf,
- BdrvRequestFlags flags)
-{
- QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
-
- assert(bytes <= SIZE_MAX);
-
- return blk_co_preadv(blk, offset, bytes, &qiov, flags);
-}
-
-static inline int coroutine_fn blk_co_pwrite(BlockBackend *blk, int64_t offset,
- int64_t bytes, void *buf,
- BdrvRequestFlags flags)
-{
- QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
-
- assert(bytes <= SIZE_MAX);
-
- return blk_co_pwritev(blk, offset, bytes, &qiov, flags);
-}
-
-int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
- int64_t bytes, BdrvRequestFlags flags);
-BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
- int64_t bytes, BdrvRequestFlags flags,
- BlockCompletionFunc *cb, void *opaque);
-int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags);
-int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int bytes);
-int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int bytes,
- BdrvRequestFlags flags);
-int64_t blk_getlength(BlockBackend *blk);
-void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr);
-int64_t blk_nb_sectors(BlockBackend *blk);
-BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
- QEMUIOVector *qiov, BdrvRequestFlags flags,
- BlockCompletionFunc *cb, void *opaque);
-BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
- QEMUIOVector *qiov, BdrvRequestFlags flags,
- BlockCompletionFunc *cb, void *opaque);
-BlockAIOCB *blk_aio_flush(BlockBackend *blk,
- BlockCompletionFunc *cb, void *opaque);
-BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes,
- BlockCompletionFunc *cb, void *opaque);
-void blk_aio_cancel(BlockAIOCB *acb);
-void blk_aio_cancel_async(BlockAIOCB *acb);
-int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
-BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
- BlockCompletionFunc *cb, void *opaque);
-int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset,
- int64_t bytes);
-int coroutine_fn blk_co_flush(BlockBackend *blk);
-int blk_flush(BlockBackend *blk);
-int blk_commit_all(void);
-void blk_inc_in_flight(BlockBackend *blk);
-void blk_dec_in_flight(BlockBackend *blk);
-void blk_drain(BlockBackend *blk);
-void blk_drain_all(void);
-void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
- BlockdevOnError on_write_error);
-BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read);
-BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
- int error);
-void blk_error_action(BlockBackend *blk, BlockErrorAction action,
- bool is_read, int error);
-bool blk_supports_write_perm(BlockBackend *blk);
-bool blk_is_writable(BlockBackend *blk);
-bool blk_is_sg(BlockBackend *blk);
-bool blk_enable_write_cache(BlockBackend *blk);
-void blk_set_enable_write_cache(BlockBackend *blk, bool wce);
-void blk_invalidate_cache(BlockBackend *blk, Error **errp);
-bool blk_is_inserted(BlockBackend *blk);
-bool blk_is_available(BlockBackend *blk);
-void blk_lock_medium(BlockBackend *blk, bool locked);
-void blk_eject(BlockBackend *blk, bool eject_flag);
-int blk_get_flags(BlockBackend *blk);
-uint32_t blk_get_request_alignment(BlockBackend *blk);
-uint32_t blk_get_max_transfer(BlockBackend *blk);
-uint64_t blk_get_max_hw_transfer(BlockBackend *blk);
-int blk_get_max_iov(BlockBackend *blk);
-int blk_get_max_hw_iov(BlockBackend *blk);
-void blk_set_guest_block_size(BlockBackend *blk, int align);
-void *blk_try_blockalign(BlockBackend *blk, size_t size);
-void *blk_blockalign(BlockBackend *blk, size_t size);
-bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp);
-void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason);
-void blk_op_block_all(BlockBackend *blk, Error *reason);
-void blk_op_unblock_all(BlockBackend *blk, Error *reason);
-AioContext *blk_get_aio_context(BlockBackend *blk);
-int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
- Error **errp);
-void blk_add_aio_context_notifier(BlockBackend *blk,
- void (*attached_aio_context)(AioContext *new_context, void *opaque),
- void (*detach_aio_context)(void *opaque), void *opaque);
-void blk_remove_aio_context_notifier(BlockBackend *blk,
- void (*attached_aio_context)(AioContext *,
- void *),
- void (*detach_aio_context)(void *),
- void *opaque);
-void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify);
-void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify);
-void blk_io_plug(BlockBackend *blk);
-void blk_io_unplug(BlockBackend *blk);
-BlockAcctStats *blk_get_stats(BlockBackend *blk);
-BlockBackendRootState *blk_get_root_state(BlockBackend *blk);
-void blk_update_root_state(BlockBackend *blk);
-bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk);
-int blk_get_open_flags_from_root_state(BlockBackend *blk);
-
-void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
- BlockCompletionFunc *cb, void *opaque);
-int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
- int64_t bytes, BdrvRequestFlags flags);
-int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
- int64_t bytes);
-int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
- PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
-int blk_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes);
-int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
- int64_t pos, int size);
-int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size);
-int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz);
-int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo);
-BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
- BlockCompletionFunc *cb,
- void *opaque, int ret);
-
-void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg);
-void blk_io_limits_disable(BlockBackend *blk);
-void blk_io_limits_enable(BlockBackend *blk, const char *group);
-void blk_io_limits_update_group(BlockBackend *blk, const char *group);
-void blk_set_force_allow_inactivate(BlockBackend *blk);
-
-void blk_register_buf(BlockBackend *blk, void *host, size_t size);
-void blk_unregister_buf(BlockBackend *blk, void *host);
-
-int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
- BlockBackend *blk_out, int64_t off_out,
- int64_t bytes, BdrvRequestFlags read_flags,
- BdrvRequestFlags write_flags);
-
-const BdrvChild *blk_root(BlockBackend *blk);
-
-int blk_make_empty(BlockBackend *blk, Error **errp);
+/* DO NOT ADD ANYTHING IN HERE. USE ONE OF THE HEADERS INCLUDED ABOVE */
#endif
diff --git a/include/sysemu/blockdev.h b/include/sysemu/blockdev.h
index f9fb54d437..3211b16513 100644
--- a/include/sysemu/blockdev.h
+++ b/include/sysemu/blockdev.h
@@ -13,9 +13,6 @@
#include "block/block.h"
#include "qemu/queue.h"
-void blockdev_mark_auto_del(BlockBackend *blk);
-void blockdev_auto_del(BlockBackend *blk);
-
typedef enum {
IF_DEFAULT = -1, /* for use with drive_add() only */
/*
@@ -38,6 +35,16 @@ struct DriveInfo {
QTAILQ_ENTRY(DriveInfo) next;
};
+/*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
+
+void blockdev_mark_auto_del(BlockBackend *blk);
+void blockdev_auto_del(BlockBackend *blk);
+
DriveInfo *blk_legacy_dinfo(BlockBackend *blk);
DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo);
BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo);
diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h
index 2edf33658a..dd64fb401d 100644
--- a/include/sysemu/os-posix.h
+++ b/include/sysemu/os-posix.h
@@ -55,6 +55,7 @@ int os_mlock(void);
typedef struct timeval qemu_timeval;
#define qemu_gettimeofday(tp) gettimeofday(tp, NULL)
+int os_set_daemonize(bool d);
bool is_daemonized(void);
/**
diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h
index 43f569b5c2..770752222a 100644
--- a/include/sysemu/os-win32.h
+++ b/include/sysemu/os-win32.h
@@ -77,6 +77,14 @@ typedef struct {
} qemu_timeval;
int qemu_gettimeofday(qemu_timeval *tp);
+static inline int os_set_daemonize(bool d)
+{
+ if (d) {
+ return -ENOTSUP;
+ }
+ return 0;
+}
+
static inline bool is_daemonized(void)
{
return false;
diff --git a/job.c b/job.c
index 54db80df66..075c6f3a20 100644
--- a/job.c
+++ b/job.c
@@ -381,6 +381,8 @@ void job_ref(Job *job)
void job_unref(Job *job)
{
+ GLOBAL_STATE_CODE();
+
if (--job->refcnt == 0) {
assert(job->status == JOB_STATUS_NULL);
assert(!timer_pending(&job->sleep_timer));
@@ -602,6 +604,7 @@ bool job_user_paused(Job *job)
void job_user_resume(Job *job, Error **errp)
{
assert(job);
+ GLOBAL_STATE_CODE();
if (!job->user_paused || job->pause_count <= 0) {
error_setg(errp, "Can't resume a job that was not paused");
return;
@@ -672,6 +675,7 @@ static void job_update_rc(Job *job)
static void job_commit(Job *job)
{
assert(!job->ret);
+ GLOBAL_STATE_CODE();
if (job->driver->commit) {
job->driver->commit(job);
}
@@ -680,6 +684,7 @@ static void job_commit(Job *job)
static void job_abort(Job *job)
{
assert(job->ret);
+ GLOBAL_STATE_CODE();
if (job->driver->abort) {
job->driver->abort(job);
}
@@ -687,6 +692,7 @@ static void job_abort(Job *job)
static void job_clean(Job *job)
{
+ GLOBAL_STATE_CODE();
if (job->driver->clean) {
job->driver->clean(job);
}
@@ -726,6 +732,7 @@ static int job_finalize_single(Job *job)
static void job_cancel_async(Job *job, bool force)
{
+ GLOBAL_STATE_CODE();
if (job->driver->cancel) {
force = job->driver->cancel(job, force);
} else {
@@ -825,6 +832,7 @@ static void job_completed_txn_abort(Job *job)
static int job_prepare(Job *job)
{
+ GLOBAL_STATE_CODE();
if (job->ret == 0 && job->driver->prepare) {
job->ret = job->driver->prepare(job);
job_update_rc(job);
@@ -952,6 +960,7 @@ static void coroutine_fn job_co_entry(void *opaque)
Job *job = opaque;
assert(job && job->driver && job->driver->run);
+ assert(job->aio_context == qemu_get_current_aio_context());
job_pause_point(job);
job->ret = job->driver->run(job, &job->err);
job->deferred_to_main_loop = true;
@@ -1054,6 +1063,7 @@ void job_complete(Job *job, Error **errp)
{
/* Should not be reachable via external interface for internal jobs */
assert(job->id);
+ GLOBAL_STATE_CODE();
if (job_apply_verb(job, JOB_VERB_COMPLETE, errp)) {
return;
}
diff --git a/migration/block.c b/migration/block.c
index a950977855..077a413325 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -932,7 +932,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
return -EINVAL;
}
- blk_invalidate_cache(blk, &local_err);
+ blk_activate(blk, &local_err);
if (local_err) {
error_report_err(local_err);
return -EINVAL;
diff --git a/migration/migration.c b/migration/migration.c
index 9cc344514b..695f0f2900 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -503,9 +503,9 @@ static void process_incoming_migration_bh(void *opaque)
if (!migrate_late_block_activate() ||
(autostart && (!global_state_received() ||
global_state_get_runstate() == RUN_STATE_RUNNING))) {
- /* Make sure all file formats flush their mutable metadata.
+ /* Make sure all file formats throw away their mutable metadata.
* If we get an error here, just don't restart the VM yet. */
- bdrv_invalidate_cache_all(&local_err);
+ bdrv_activate_all(&local_err);
if (local_err) {
error_report_err(local_err);
local_err = NULL;
@@ -591,8 +591,8 @@ static void process_incoming_migration_co(void *opaque)
/* we get COLO info, and know if we are in COLO mode */
if (!ret && migration_incoming_colo_enabled()) {
- /* Make sure all file formats flush their mutable metadata */
- bdrv_invalidate_cache_all(&local_err);
+ /* Make sure all file formats throw away their mutable metadata */
+ bdrv_activate_all(&local_err);
if (local_err) {
error_report_err(local_err);
goto fail;
@@ -1932,7 +1932,7 @@ static void migrate_fd_cancel(MigrationState *s)
if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) {
Error *local_err = NULL;
- bdrv_invalidate_cache_all(&local_err);
+ bdrv_activate_all(&local_err);
if (local_err) {
error_report_err(local_err);
} else {
@@ -3111,7 +3111,7 @@ fail:
*/
Error *local_err = NULL;
- bdrv_invalidate_cache_all(&local_err);
+ bdrv_activate_all(&local_err);
if (local_err) {
error_report_err(local_err);
}
@@ -3256,7 +3256,7 @@ fail_invalidate:
Error *local_err = NULL;
qemu_mutex_lock_iothread();
- bdrv_invalidate_cache_all(&local_err);
+ bdrv_activate_all(&local_err);
if (local_err) {
error_report_err(local_err);
} else {
diff --git a/migration/savevm.c b/migration/savevm.c
index 967ff80547..02ed94c180 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1438,7 +1438,7 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
if (inactivate_disks) {
/* Inactivate before sending QEMU_VM_EOF so that the
- * bdrv_invalidate_cache_all() on the other end won't fail. */
+ * bdrv_activate_all() on the other end won't fail. */
ret = bdrv_inactivate_all();
if (ret) {
error_report("%s: bdrv_inactivate_all() failed (%d)",
@@ -2013,9 +2013,9 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
trace_loadvm_postcopy_handle_run_bh("after announce");
- /* Make sure all file formats flush their mutable metadata.
+ /* Make sure all file formats throw away their mutable metadata.
* If we get an error here, just don't restart the VM yet. */
- bdrv_invalidate_cache_all(&local_err);
+ bdrv_activate_all(&local_err);
if (local_err) {
error_report_err(local_err);
local_err = NULL;
@@ -2808,6 +2808,8 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate,
g_autoptr(GDateTime) now = g_date_time_new_now_local();
AioContext *aio_context;
+ GLOBAL_STATE_CODE();
+
if (migration_is_blocked(errp)) {
return false;
}
diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c
index df97582dd4..ad82c275c4 100644
--- a/monitor/qmp-cmds.c
+++ b/monitor/qmp-cmds.c
@@ -144,7 +144,7 @@ void qmp_cont(Error **errp)
* If there are no inactive block nodes (e.g. because the VM was just
* paused rather than completing a migration), bdrv_inactivate_all() simply
* doesn't do anything. */
- bdrv_invalidate_cache_all(&local_err);
+ bdrv_activate_all(&local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
diff --git a/os-posix.c b/os-posix.c
index ae6c9f2a5e..24692c8593 100644
--- a/os-posix.c
+++ b/os-posix.c
@@ -317,6 +317,12 @@ bool is_daemonized(void)
return daemonize;
}
+int os_set_daemonize(bool d)
+{
+ daemonize = d;
+ return 0;
+}
+
int os_mlock(void)
{
#ifdef HAVE_MLOCKALL
diff --git a/softmmu/cpus.c b/softmmu/cpus.c
index 035395ae13..1681844b61 100644
--- a/softmmu/cpus.c
+++ b/softmmu/cpus.c
@@ -25,6 +25,7 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "monitor/monitor.h"
+#include "qemu/coroutine-tls.h"
#include "qapi/error.h"
#include "qapi/qapi-commands-machine.h"
#include "qapi/qapi-commands-misc.h"
@@ -473,11 +474,16 @@ bool qemu_in_vcpu_thread(void)
return current_cpu && qemu_cpu_is_self(current_cpu);
}
-static __thread bool iothread_locked = false;
+QEMU_DEFINE_STATIC_CO_TLS(bool, iothread_locked)
bool qemu_mutex_iothread_locked(void)
{
- return iothread_locked;
+ return get_iothread_locked();
+}
+
+bool qemu_in_main_thread(void)
+{
+ return qemu_mutex_iothread_locked();
}
/*
@@ -490,13 +496,13 @@ void qemu_mutex_lock_iothread_impl(const char *file, int line)
g_assert(!qemu_mutex_iothread_locked());
bql_lock(&qemu_global_mutex, file, line);
- iothread_locked = true;
+ set_iothread_locked(true);
}
void qemu_mutex_unlock_iothread(void)
{
g_assert(qemu_mutex_iothread_locked());
- iothread_locked = false;
+ set_iothread_locked(false);
qemu_mutex_unlock(&qemu_global_mutex);
}
diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c
index a0df820b9d..fe6cf268ff 100644
--- a/softmmu/qdev-monitor.c
+++ b/softmmu/qdev-monitor.c
@@ -973,6 +973,8 @@ BlockBackend *blk_by_qdev_id(const char *id, Error **errp)
DeviceState *dev;
BlockBackend *blk;
+ GLOBAL_STATE_CODE();
+
dev = find_device_state(id, errp);
if (dev == NULL) {
return NULL;
diff --git a/storage-daemon/qemu-storage-daemon.c b/storage-daemon/qemu-storage-daemon.c
index 504d33aa91..dd18b2cde8 100644
--- a/storage-daemon/qemu-storage-daemon.c
+++ b/storage-daemon/qemu-storage-daemon.c
@@ -93,6 +93,9 @@ static void help(void)
" --chardev <options> configure a character device backend\n"
" (see the qemu(1) man page for possible options)\n"
"\n"
+" --daemonize daemonize the process, and have the parent exit\n"
+" once startup is complete\n"
+"\n"
" --export [type=]nbd,id=<id>,node-name=<node-name>[,name=<export-name>]\n"
" [,writable=on|off][,bitmap=<name>]\n"
" export the specified block node over NBD\n"
@@ -144,6 +147,7 @@ QEMU_HELP_BOTTOM "\n",
enum {
OPTION_BLOCKDEV = 256,
OPTION_CHARDEV,
+ OPTION_DAEMONIZE,
OPTION_EXPORT,
OPTION_MONITOR,
OPTION_NBD_SERVER,
@@ -177,13 +181,30 @@ static int getopt_set_loc(int argc, char **argv, const char *optstring,
return c;
}
-static void process_options(int argc, char *argv[])
+/**
+ * Process QSD command-line arguments.
+ *
+ * This is done in two passes:
+ *
+ * First (@pre_init_pass is true), we do a pass where all global
+ * arguments pertaining to the QSD process (like --help or --daemonize)
+ * are processed. This pass is done before most of the QEMU-specific
+ * initialization steps (e.g. initializing the block layer or QMP), and
+ * so must only process arguments that are not really QEMU-specific.
+ *
+ * Second (@pre_init_pass is false), we (sequentially) process all
+ * QEMU/QSD-specific arguments. Many of these arguments are effectively
+ * translated to QMP commands (like --blockdev for blockdev-add, or
+ * --export for block-export-add).
+ */
+static void process_options(int argc, char *argv[], bool pre_init_pass)
{
int c;
static const struct option long_options[] = {
{"blockdev", required_argument, NULL, OPTION_BLOCKDEV},
{"chardev", required_argument, NULL, OPTION_CHARDEV},
+ {"daemonize", no_argument, NULL, OPTION_DAEMONIZE},
{"export", required_argument, NULL, OPTION_EXPORT},
{"help", no_argument, NULL, 'h'},
{"monitor", required_argument, NULL, OPTION_MONITOR},
@@ -196,11 +217,27 @@ static void process_options(int argc, char *argv[])
};
/*
- * In contrast to the system emulator, options are processed in the order
- * they are given on the command lines. This means that things must be
- * defined first before they can be referenced in another option.
+ * In contrast to the system emulator, QEMU-specific options are processed
+ * in the order they are given on the command lines. This means that things
+ * must be defined first before they can be referenced in another option.
*/
+ optind = 1;
while ((c = getopt_set_loc(argc, argv, "-hT:V", long_options)) != -1) {
+ bool handle_option_pre_init;
+
+ /* Should this argument be processed in the pre-init pass? */
+ handle_option_pre_init =
+ c == '?' ||
+ c == 'h' ||
+ c == 'V' ||
+ c == OPTION_DAEMONIZE ||
+ c == OPTION_PIDFILE;
+
+ /* Process every option only in its respective pass */
+ if (pre_init_pass != handle_option_pre_init) {
+ continue;
+ }
+
switch (c) {
case '?':
exit(EXIT_FAILURE);
@@ -246,6 +283,12 @@ static void process_options(int argc, char *argv[])
qemu_opts_del(opts);
break;
}
+ case OPTION_DAEMONIZE:
+ if (os_set_daemonize(true) < 0) {
+ error_report("--daemonize not supported in this build");
+ exit(EXIT_FAILURE);
+ }
+ break;
case OPTION_EXPORT:
{
Visitor *v;
@@ -334,6 +377,10 @@ int main(int argc, char *argv[])
qemu_init_exec_dir(argv[0]);
os_setup_signal_handling();
+ process_options(argc, argv, true);
+
+ os_daemonize();
+
module_call_init(MODULE_INIT_QOM);
module_call_init(MODULE_INIT_TRACE);
qemu_add_opts(&qemu_trace_opts);
@@ -348,7 +395,7 @@ int main(int argc, char *argv[])
qemu_set_log(LOG_TRACE);
qemu_init_main_loop(&error_fatal);
- process_options(argc, argv);
+ process_options(argc, argv, false);
/*
* Write the pid file after creating chardevs, exports, and NBD servers but
@@ -356,6 +403,7 @@ int main(int argc, char *argv[])
* it.
*/
pid_file_init();
+ os_setup_post();
while (!exit_requested) {
main_loop_wait(false);
diff --git a/stubs/iothread-lock-block.c b/stubs/iothread-lock-block.c
new file mode 100644
index 0000000000..c88ed70462
--- /dev/null
+++ b/stubs/iothread-lock-block.c
@@ -0,0 +1,8 @@
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+
+bool qemu_in_main_thread(void)
+{
+ return qemu_get_current_aio_context() == qemu_get_aio_context();
+}
+
diff --git a/stubs/meson.build b/stubs/meson.build
index d359cbe1ad..6f80fec761 100644
--- a/stubs/meson.build
+++ b/stubs/meson.build
@@ -17,6 +17,9 @@ if linux_io_uring.found()
stub_ss.add(files('io_uring.c'))
endif
stub_ss.add(files('iothread-lock.c'))
+if have_block
+ stub_ss.add(files('iothread-lock-block.c'))
+endif
stub_ss.add(files('isa-bus.c'))
stub_ss.add(files('is-daemonized.c'))
if libaio.found()
diff --git a/tests/check-block.sh b/tests/check-block.sh
index 18f7433901..f59496396c 100755
--- a/tests/check-block.sh
+++ b/tests/check-block.sh
@@ -48,18 +48,6 @@ if LANG=C bash --version | grep -q 'GNU bash, version [123]' ; then
skip "bash version too old ==> Not running the qemu-iotests."
fi
-if ! (sed --version | grep 'GNU sed') > /dev/null 2>&1 ; then
- if ! command -v gsed >/dev/null 2>&1; then
- skip "GNU sed not available ==> Not running the qemu-iotests."
- fi
-else
- # Double-check that we're not using BusyBox' sed which says
- # that "This is not GNU sed version 4.0" ...
- if sed --version | grep -q 'not GNU sed' ; then
- skip "BusyBox sed not supported ==> Not running the qemu-iotests."
- fi
-fi
-
cd tests/qemu-iotests
# QEMU_CHECK_BLOCK_AUTO is used to disable some unstable sub-tests
diff --git a/tests/qemu-iotests/185 b/tests/qemu-iotests/185
index f2ec5c5ceb..8b1143dc16 100755
--- a/tests/qemu-iotests/185
+++ b/tests/qemu-iotests/185
@@ -33,6 +33,12 @@ _cleanup()
_rm_test_img "${TEST_IMG}.copy"
_cleanup_test_img
_cleanup_qemu
+
+ if [ -f "$TEST_DIR/qsd.pid" ]; then
+ kill -SIGKILL "$(cat "$TEST_DIR/qsd.pid")"
+ rm -f "$TEST_DIR/qsd.pid"
+ fi
+ rm -f "$SOCK_DIR/qsd.sock"
}
trap "_cleanup; exit \$status" 0 1 2 3 15
@@ -45,7 +51,7 @@ _supported_fmt qcow2
_supported_proto file
_supported_os Linux
-size=64M
+size=$((64 * 1048576))
TEST_IMG="${TEST_IMG}.base" _make_test_img $size
echo
@@ -216,6 +222,188 @@ wait=1 _cleanup_qemu | grep -v 'JOB_STATUS_CHANGE'
_check_test_img
+echo
+echo === Start mirror to throttled QSD and exit qemu ===
+echo
+
+# Mirror to a throttled QSD instance (so that qemu cannot drain the
+# throttling), wait for READY, then write some data to the device,
+# and then quit qemu.
+# (qemu should force-cancel the job and not wait for the data to be
+# written to the target.)
+
+_make_test_img $size
+
+# Will be used by this and the next case
+set_up_throttled_qsd() {
+ $QSD \
+ --object throttle-group,id=thrgr,limits.bps-total=1048576 \
+ --blockdev null-co,node-name=null,size=$size \
+ --blockdev throttle,node-name=throttled,throttle-group=thrgr,file=null \
+ --nbd-server addr.type=unix,addr.path="$SOCK_DIR/qsd.sock" \
+ --export nbd,id=exp,node-name=throttled,name=target,writable=true \
+ --pidfile "$TEST_DIR/qsd.pid" \
+ --daemonize
+}
+
+set_up_throttled_qsd
+
+# Need a virtio-blk device so that qemu-io writes will not block the monitor
+_launch_qemu \
+ --blockdev file,node-name=source-proto,filename="$TEST_IMG" \
+ --blockdev qcow2,node-name=source-fmt,file=source-proto \
+ --device virtio-blk,id=vblk,drive=source-fmt \
+ --blockdev "{\"driver\": \"nbd\",
+ \"node-name\": \"target\",
+ \"server\": {
+ \"type\": \"unix\",
+ \"path\": \"$SOCK_DIR/qsd.sock\"
+ },
+ \"export\": \"target\"}"
+
+h=$QEMU_HANDLE
+_send_qemu_cmd $h '{"execute": "qmp_capabilities"}' 'return'
+
+# Use sync=top, so the first pass will not copy the whole image
+_send_qemu_cmd $h \
+ '{"execute": "blockdev-mirror",
+ "arguments": {
+ "job-id": "mirror",
+ "device": "source-fmt",
+ "target": "target",
+ "sync": "top"
+ }}' \
+ 'return' \
+ | grep -v JOB_STATUS_CHANGE # Ignore these events during creation
+
+# This too will be used by this and the next case
+# $1: QEMU handle
+# $2: Image size
+wait_for_job_and_quit() {
+ h=$1
+ size=$2
+
+ # List of expected events
+ capture_events='BLOCK_JOB_READY JOB_STATUS_CHANGE'
+ _wait_event $h 'BLOCK_JOB_READY'
+ QEMU_EVENTS= # Ignore all JOB_STATUS_CHANGE events that came before READY
+
+ # Write something to the device for post-READY mirroring. Write it in
+ # blocks matching the cluster size, each spaced one block apart, so
+ # that the mirror job will have to spawn one request per cluster.
+ # Because the number of concurrent requests is limited (to 16), this
+ # limits the number of bytes concurrently in flight, which speeds up
+ # cancelling the job (in-flight requests still are waited for).
+ # To limit the number of bytes in flight, we could alternatively pass
+ # something for blockdev-mirror's @buf-size parameter, but
+ # block-commit does not have such a parameter, so we need to figure
+ # something out that works for both.
+
+ cluster_size=65536
+ step=$((cluster_size * 2))
+
+ echo '--- Writing data to the virtio-blk device ---'
+
+ for ofs in $(seq 0 $step $((size - step))); do
+ qemu_io_cmd="qemu-io -d vblk/virtio-backend "
+ qemu_io_cmd+="\\\"aio_write $ofs $cluster_size\\\""
+
+ # Do not include these requests in the reference output
+ # (it's just too much)
+ silent=yes _send_qemu_cmd $h \
+ "{\"execute\": \"human-monitor-command\",
+ \"arguments\": {
+ \"command-line\": \"$qemu_io_cmd\"
+ }}" \
+ 'return'
+ done
+
+ # Wait until the job's length is updated to reflect the write requests
+
+ # We have written to half of the device, so this is the expected job length
+ final_len=$((size / 2))
+ timeout=100 # unit: 0.1 seconds
+ while true; do
+ len=$(
+ _send_qemu_cmd $h \
+ '{"execute": "query-block-jobs"}' \
+ 'return.*"len": [0-9]\+' \
+ | grep 'return.*"len": [0-9]\+' \
+ | sed -e 's/.*"len": \([0-9]\+\).*/\1/'
+ )
+ if [ "$len" -eq "$final_len" ]; then
+ break
+ fi
+ timeout=$((timeout - 1))
+ if [ "$timeout" -eq 0 ]; then
+ echo "ERROR: Timeout waiting for job to reach len=$final_len"
+ break
+ fi
+ sleep 0.1
+ done
+
+ sleep 1
+
+ _send_qemu_cmd $h \
+ '{"execute": "quit"}' \
+ 'return'
+
+ # List of expected events
+ capture_events='BLOCK_JOB_CANCELLED JOB_STATUS_CHANGE SHUTDOWN'
+ _wait_event $h 'SHUTDOWN'
+ QEMU_EVENTS= # Ignore all JOB_STATUS_CHANGE events that came before SHUTDOWN
+ _wait_event $h 'JOB_STATUS_CHANGE' # standby
+ _wait_event $h 'JOB_STATUS_CHANGE' # ready
+ _wait_event $h 'JOB_STATUS_CHANGE' # aborting
+ # Filter the offset (depends on when exactly `quit` was issued)
+ _wait_event $h 'BLOCK_JOB_CANCELLED' \
+ | sed -e 's/"offset": [0-9]\+/"offset": (filtered)/'
+ _wait_event $h 'JOB_STATUS_CHANGE' # concluded
+ _wait_event $h 'JOB_STATUS_CHANGE' # null
+
+ wait=yes _cleanup_qemu
+
+ kill -SIGTERM "$(cat "$TEST_DIR/qsd.pid")"
+}
+
+wait_for_job_and_quit $h $size
+
+echo
+echo === Start active commit to throttled QSD and exit qemu ===
+echo
+
+# Same as the above, but instead of mirroring, do an active commit
+
+_make_test_img $size
+
+set_up_throttled_qsd
+
+_launch_qemu \
+ --blockdev "{\"driver\": \"nbd\",
+ \"node-name\": \"target\",
+ \"server\": {
+ \"type\": \"unix\",
+ \"path\": \"$SOCK_DIR/qsd.sock\"
+ },
+ \"export\": \"target\"}" \
+ --blockdev file,node-name=source-proto,filename="$TEST_IMG" \
+ --blockdev qcow2,node-name=source-fmt,file=source-proto,backing=target \
+ --device virtio-blk,id=vblk,drive=source-fmt
+
+h=$QEMU_HANDLE
+_send_qemu_cmd $h '{"execute": "qmp_capabilities"}' 'return'
+
+_send_qemu_cmd $h \
+ '{"execute": "block-commit",
+ "arguments": {
+ "job-id": "commit",
+ "device": "source-fmt"
+ }}' \
+ 'return' \
+ | grep -v JOB_STATUS_CHANGE # Ignore these events during creation
+
+wait_for_job_and_quit $h $size
+
# success, all done
echo "*** done"
rm -f $seq.full
diff --git a/tests/qemu-iotests/185.out b/tests/qemu-iotests/185.out
index 754a641258..70e8dd6c87 100644
--- a/tests/qemu-iotests/185.out
+++ b/tests/qemu-iotests/185.out
@@ -116,4 +116,52 @@ Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 cluster_size=65536 extended_l2=off
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 67108864, "offset": 524288, "speed": 65536, "type": "stream"}}
No errors were found on the image.
+
+=== Start mirror to throttled QSD and exit qemu ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+{"execute": "qmp_capabilities"}
+{"return": {}}
+{"execute": "blockdev-mirror",
+ "arguments": {
+ "job-id": "mirror",
+ "device": "source-fmt",
+ "target": "target",
+ "sync": "top"
+ }}
+{"return": {}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "mirror", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}}
+--- Writing data to the virtio-blk device ---
+{"execute": "quit"}
+{"return": {}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "mirror"}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "mirror"}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "mirror"}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "mirror", "len": 33554432, "offset": (filtered), "speed": 0, "type": "mirror"}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "mirror"}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "mirror"}}
+
+=== Start active commit to throttled QSD and exit qemu ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+{"execute": "qmp_capabilities"}
+{"return": {}}
+{"execute": "block-commit",
+ "arguments": {
+ "job-id": "commit",
+ "device": "source-fmt"
+ }}
+{"return": {}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "commit", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
+--- Writing data to the virtio-blk device ---
+{"execute": "quit"}
+{"return": {}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "commit"}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "commit"}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "commit"}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "commit", "len": 33554432, "offset": (filtered), "speed": 0, "type": "commit"}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "commit"}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "commit"}}
*** done
diff --git a/tests/qemu-iotests/271 b/tests/qemu-iotests/271
index 2775b4d130..c7c2cadda0 100755
--- a/tests/qemu-iotests/271
+++ b/tests/qemu-iotests/271
@@ -896,7 +896,7 @@ _make_test_img -o extended_l2=on 1M
# Second and third writes in _concurrent_io() are independent and may finish in
# different order. So, filter offset out to match both possible variants.
_concurrent_io | $QEMU_IO | _filter_qemu_io | \
- $SED -e 's/\(20480\|40960\)/OFFSET/'
+ sed -e 's/\(20480\|40960\)/OFFSET/'
_concurrent_verify | $QEMU_IO | _filter_qemu_io
# success, all done
diff --git a/tests/qemu-iotests/296 b/tests/qemu-iotests/296
index 099a3eeaa5..f80ef3434a 100755
--- a/tests/qemu-iotests/296
+++ b/tests/qemu-iotests/296
@@ -174,8 +174,12 @@ class EncryptionSetupTestCase(iotests.QMPTestCase):
}
result = vm.qmp('x-blockdev-amend', **args)
- assert result['return'] == {}
- vm.run_job('job0')
+ iotests.log(result)
+ # Run the job only if it was created
+ event = ('JOB_STATUS_CHANGE',
+ {'data': {'id': 'job0', 'status': 'created'}})
+ if vm.events_wait([event], timeout=0.0) is not None:
+ vm.run_job('job0')
# test that when the image opened by two qemu processes,
# neither of them can update the encryption keys
diff --git a/tests/qemu-iotests/296.out b/tests/qemu-iotests/296.out
index 42205cc981..609826eaa0 100644
--- a/tests/qemu-iotests/296.out
+++ b/tests/qemu-iotests/296.out
@@ -1,11 +1,9 @@
-{"execute": "job-dismiss", "arguments": {"id": "job0"}}
{"return": {}}
-Job failed: Failed to get shared "consistent read" lock
{"execute": "job-dismiss", "arguments": {"id": "job0"}}
{"return": {}}
-Job failed: Failed to get shared "consistent read" lock
-{"execute": "job-dismiss", "arguments": {"id": "job0"}}
+{"error": {"class": "GenericError", "desc": "Failed to get shared \"consistent read\" lock"}}
+{"error": {"class": "GenericError", "desc": "Failed to get shared \"consistent read\" lock"}}
{"return": {}}
{"execute": "job-dismiss", "arguments": {"id": "job0"}}
{"return": {}}
@@ -13,14 +11,9 @@ qemu-img: Failed to get shared "consistent read" lock
Is another process using the image [TEST_DIR/test.img]?
.
-Job failed: Block node is read-only
-{"execute": "job-dismiss", "arguments": {"id": "job0"}}
-{"return": {}}
-Job failed: Failed to get shared "consistent read" lock
-{"execute": "job-dismiss", "arguments": {"id": "job0"}}
-{"return": {}}
-Job failed: Failed to get shared "consistent read" lock
-{"execute": "job-dismiss", "arguments": {"id": "job0"}}
+{"error": {"class": "GenericError", "desc": "Block node is read-only"}}
+{"error": {"class": "GenericError", "desc": "Failed to get shared \"consistent read\" lock"}}
+{"error": {"class": "GenericError", "desc": "Failed to get shared \"consistent read\" lock"}}
{"return": {}}
{"execute": "job-dismiss", "arguments": {"id": "job0"}}
{"return": {}}
diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
index 75cc241580..21819db9c3 100644
--- a/tests/qemu-iotests/common.filter
+++ b/tests/qemu-iotests/common.filter
@@ -21,44 +21,44 @@
_filter_date()
{
- $SED -re 's/[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/yyyy-mm-dd hh:mm:ss/'
+ sed -Ee 's/[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/yyyy-mm-dd hh:mm:ss/'
}
_filter_vmstate_size()
{
- $SED -r -e 's/[0-9. ]{5} [KMGT]iB/ SIZE/' \
- -e 's/[0-9. ]{5} B/ SIZE/'
+ sed -E -e 's/[0-9. ]{5} [KMGT]iB/ SIZE/' \
+ -e 's/[0-9. ]{5} B/ SIZE/'
}
_filter_generated_node_ids()
{
- $SED -re 's/\#block[0-9]{3,}/NODE_NAME/'
+ sed -Ee 's/\#block[0-9]{3,}/NODE_NAME/'
}
_filter_qom_path()
{
- $SED -e '/Attached to:/s/\device[[0-9]\+\]/device[N]/g'
+ gsed -e '/Attached to:/s/\device[[0-9]\+\]/device[N]/g'
}
# replace occurrences of the actual TEST_DIR value with TEST_DIR
_filter_testdir()
{
- $SED -e "s#$TEST_DIR/#TEST_DIR/#g" \
- -e "s#$SOCK_DIR/#SOCK_DIR/#g" \
- -e "s#SOCK_DIR/fuse-#TEST_DIR/#g"
+ sed -e "s#$TEST_DIR/#TEST_DIR/#g" \
+ -e "s#$SOCK_DIR/#SOCK_DIR/#g" \
+ -e "s#SOCK_DIR/fuse-#TEST_DIR/#g"
}
# replace occurrences of the actual IMGFMT value with IMGFMT
_filter_imgfmt()
{
- $SED -e "s#$IMGFMT#IMGFMT#g"
+ sed -e "s#$IMGFMT#IMGFMT#g"
}
# Replace error message when the format is not supported and delete
# the output lines after the first one
_filter_qemu_img_check()
{
- $SED -e '/allocated.*fragmented.*compressed clusters/d' \
+ gsed -e '/allocated.*fragmented.*compressed clusters/d' \
-e 's/qemu-img: This image format does not support checks/No errors were found on the image./' \
-e '/Image end offset: [0-9]\+/d'
}
@@ -66,13 +66,14 @@ _filter_qemu_img_check()
# Removes \r from messages
_filter_win32()
{
- $SED -e 's/\r//g'
+ gsed -e 's/\r//g'
}
# sanitize qemu-io output
_filter_qemu_io()
{
- _filter_win32 | $SED -e "s/[0-9]* ops\; [0-9/:. sec]* ([0-9/.inf]* [EPTGMKiBbytes]*\/sec and [0-9/.inf]* ops\/sec)/X ops\; XX:XX:XX.X (XXX YYY\/sec and XXX ops\/sec)/" \
+ _filter_win32 | \
+ gsed -e "s/[0-9]* ops\; [0-9/:. sec]* ([0-9/.inf]* [EPTGMKiBbytes]*\/sec and [0-9/.inf]* ops\/sec)/X ops\; XX:XX:XX.X (XXX YYY\/sec and XXX ops\/sec)/" \
-e "s/: line [0-9][0-9]*: *[0-9][0-9]*\( Aborted\| Killed\)/:\1/" \
-e "s/qemu-io> //g"
}
@@ -80,7 +81,7 @@ _filter_qemu_io()
# replace occurrences of QEMU_PROG with "qemu"
_filter_qemu()
{
- $SED -e "s#\\(^\\|(qemu) \\)$(basename $QEMU_PROG):#\1QEMU_PROG:#" \
+ gsed -e "s#\\(^\\|(qemu) \\)$(basename $QEMU_PROG):#\1QEMU_PROG:#" \
-e 's#^QEMU [0-9]\+\.[0-9]\+\.[0-9]\+ monitor#QEMU X.Y.Z monitor#' \
-e $'s#\r##' # QEMU monitor uses \r\n line endings
}
@@ -89,7 +90,7 @@ _filter_qemu()
_filter_qmp()
{
_filter_win32 | \
- $SED -e 's#\("\(micro\)\?seconds": \)[0-9]\+#\1 TIMESTAMP#g' \
+ gsed -e 's#\("\(micro\)\?seconds": \)[0-9]\+#\1 TIMESTAMP#g' \
-e 's#^{"QMP":.*}$#QMP_VERSION#' \
-e '/^ "QMP": {\s*$/, /^ }\s*$/ c\' \
-e ' QMP_VERSION'
@@ -98,32 +99,32 @@ _filter_qmp()
# readline makes HMP command strings so long that git complains
_filter_hmp()
{
- $SED -e $'s/^\\((qemu) \\)\\?.*\e\\[D/\\1/g' \
+ gsed -e $'s/^\\((qemu) \\)\\?.*\e\\[D/\\1/g' \
-e $'s/\e\\[K//g'
}
# replace block job offset
_filter_block_job_offset()
{
- $SED -e 's/, "offset": [0-9]\+,/, "offset": OFFSET,/'
+ sed -e 's/, "offset": [0-9]\+,/, "offset": OFFSET,/'
}
# replace block job len
_filter_block_job_len()
{
- $SED -e 's/, "len": [0-9]\+,/, "len": LEN,/g'
+ sed -e 's/, "len": [0-9]\+,/, "len": LEN,/g'
}
# replace actual image size (depends on the host filesystem)
_filter_actual_image_size()
{
- $SED -s 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g'
+ gsed -s 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g'
}
# Filename filters for qemu-img create
_filter_img_create_filenames()
{
- $SED \
+ sed \
-e "s#$REMOTE_TEST_DIR#TEST_DIR#g" \
-e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \
-e "s#$TEST_DIR#TEST_DIR#g" \
@@ -141,7 +142,7 @@ _do_filter_img_create()
# precedes ", fmt=") and the options part ($options, which starts
# with "fmt=")
# (And just echo everything before the first "^Formatting")
- readarray formatting_line < <($SED -e 's/, fmt=/\n/')
+ readarray formatting_line < <(gsed -e 's/, fmt=/\n/')
filename_part=${formatting_line[0]}
unset formatting_line[0]
@@ -168,11 +169,11 @@ _do_filter_img_create()
options=$(
echo "$options" \
| tr '\n' '\0' \
- | $SED -e 's/ \([a-z0-9_.-]*\)=/\n\1=/g' \
+ | gsed -e 's/ \([a-z0-9_.-]*\)=/\n\1=/g' \
| grep -a -e '^fmt' -e '^size' -e '^backing' -e '^preallocation' \
-e '^encryption' "${grep_data_file[@]}" \
| _filter_img_create_filenames \
- | $SED \
+ | sed \
-e 's/^\(fmt\)/0-\1/' \
-e 's/^\(size\)/1-\1/' \
-e 's/^\(backing\)/2-\1/' \
@@ -180,9 +181,9 @@ _do_filter_img_create()
-e 's/^\(encryption\)/4-\1/' \
-e 's/^\(preallocation\)/8-\1/' \
| LC_ALL=C sort \
- | $SED -e 's/^[0-9]-//' \
+ | sed -e 's/^[0-9]-//' \
| tr '\n\0' ' \n' \
- | $SED -e 's/^ *$//' -e 's/ *$//'
+ | sed -e 's/^ *$//' -e 's/ *$//'
)
if [ -n "$options" ]; then
@@ -208,7 +209,7 @@ _filter_img_create()
_filter_img_create_size()
{
- $SED -e "s# size=[0-9]\\+# size=SIZE#g"
+ gsed -e "s# size=[0-9]\\+# size=SIZE#g"
}
_filter_img_info()
@@ -222,7 +223,7 @@ _filter_img_info()
discard=0
regex_json_spec_start='^ *"format-specific": \{'
- $SED -e "s#$REMOTE_TEST_DIR#TEST_DIR#g" \
+ gsed -e "s#$REMOTE_TEST_DIR#TEST_DIR#g" \
-e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \
-e "s#$TEST_DIR#TEST_DIR#g" \
-e "s#$SOCK_DIR#SOCK_DIR#g" \
@@ -284,7 +285,7 @@ _filter_qemu_img_map()
data_file_filter=(-e "s#$data_file_pattern#\\1#")
fi
- $SED -e 's/\([0-9a-fx]* *[0-9a-fx]* *\)[0-9a-fx]* */\1/g' \
+ sed -e 's/\([0-9a-fx]* *[0-9a-fx]* *\)[0-9a-fx]* */\1/g' \
-e 's/"offset": [0-9]\+/"offset": OFFSET/g' \
-e 's/Mapped to *//' \
"${data_file_filter[@]}" \
@@ -298,7 +299,7 @@ _filter_nbd()
# receive callbacks sometimes, making them unreliable.
#
# Filter out the TCP port number since this changes between runs.
- $SED -e '/nbd\/.*\.c:/d' \
+ sed -e '/nbd\/.*\.c:/d' \
-e 's#127\.0\.0\.1:[0-9]*#127.0.0.1:PORT#g' \
-e "s#?socket=$SOCK_DIR#?socket=SOCK_DIR#g" \
-e 's#\(foo\|PORT/\?\|.sock\): Failed to .*$#\1#'
@@ -335,14 +336,14 @@ sys.stdout.write(result)'
_filter_authz_check_tls()
{
- $SED -e 's/TLS x509 authz check for .* is denied/TLS x509 authz check for DISTINGUISHED-NAME is denied/'
+ sed -e 's/TLS x509 authz check for .* is denied/TLS x509 authz check for DISTINGUISHED-NAME is denied/'
}
_filter_qcow2_compression_type_bit()
{
- $SED -e 's/\(incompatible_features\s\+\)\[3\(, \)\?/\1[/' \
- -e 's/\(incompatible_features.*\), 3\]/\1]/' \
- -e 's/\(incompatible_features.*\), 3\(,.*\)/\1\2/'
+ gsed -e 's/\(incompatible_features\s\+\)\[3\(, \)\?/\1[/' \
+ -e 's/\(incompatible_features.*\), 3\]/\1]/' \
+ -e 's/\(incompatible_features.*\), 3\(,.*\)/\1\2/'
}
# make sure this script returns success
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index 9885030b43..3bfd94c2e0 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -17,17 +17,28 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
-SED=
-for sed in sed gsed; do
- ($sed --version | grep 'GNU sed') > /dev/null 2>&1
- if [ "$?" -eq 0 ]; then
- SED=$sed
- break
- fi
-done
-if [ -z "$SED" ]; then
- echo "$0: GNU sed not found"
- exit 1
+# bail out, setting up .notrun file
+_notrun()
+{
+ echo "$*" >"$OUTPUT_DIR/$seq.notrun"
+ echo "$seq not run: $*"
+ status=0
+ exit
+}
+
+if ! command -v gsed >/dev/null 2>&1; then
+ if sed --version 2>&1 | grep -v 'not GNU sed' | grep 'GNU sed' > /dev/null;
+ then
+ gsed()
+ {
+ sed "$@"
+ }
+ else
+ gsed()
+ {
+ _notrun "GNU sed not available"
+ }
+ fi
fi
dd()
@@ -722,16 +733,6 @@ _img_info()
done
}
-# bail out, setting up .notrun file
-#
-_notrun()
-{
- echo "$*" >"$OUTPUT_DIR/$seq.notrun"
- echo "$seq not run: $*"
- status=0
- exit
-}
-
# bail out, setting up .casenotrun file
# The function _casenotrun() is used as a notifier. It is the
# caller's responsibility to make skipped a particular test.
@@ -920,7 +921,7 @@ _require_working_luks()
IMGFMT='luks' _rm_test_img "$file"
if [ $status != 0 ]; then
- reason=$(echo "$output" | grep "$file:" | $SED -e "s#.*$file: *##")
+ reason=$(echo "$output" | grep "$file:" | sed -e "s#.*$file: *##")
if [ -z "$reason" ]; then
reason="Failed to create a LUKS image"
fi
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 6ba65eb1ff..6027780180 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -39,6 +39,7 @@ from contextlib import contextmanager
from qemu.machine import qtest
from qemu.qmp import QMPMessage
+from qemu.aqmp.legacy import QEMUMonitorProtocol
# Use this logger for logging messages directly from the iotests module
logger = logging.getLogger('qemu.iotests')
@@ -348,14 +349,30 @@ class QemuIoInteractive:
class QemuStorageDaemon:
- def __init__(self, *args: str, instance_id: str = 'a'):
+ _qmp: Optional[QEMUMonitorProtocol] = None
+ _qmpsock: Optional[str] = None
+ # Python < 3.8 would complain if this type were not a string literal
+ # (importing `annotations` from `__future__` would work; but not on <= 3.6)
+ _p: 'Optional[subprocess.Popen[bytes]]' = None
+
+ def __init__(self, *args: str, instance_id: str = 'a', qmp: bool = False):
assert '--pidfile' not in args
self.pidfile = os.path.join(test_dir, f'qsd-{instance_id}-pid')
all_args = [qsd_prog] + list(args) + ['--pidfile', self.pidfile]
+ if qmp:
+ self._qmpsock = os.path.join(sock_dir, f'qsd-{instance_id}.sock')
+ all_args += ['--chardev',
+ f'socket,id=qmp-sock,path={self._qmpsock}',
+ '--monitor', 'qmp-sock']
+
+ self._qmp = QEMUMonitorProtocol(self._qmpsock, server=True)
+
# Cannot use with here, we want the subprocess to stay around
# pylint: disable=consider-using-with
self._p = subprocess.Popen(all_args)
+ if self._qmp is not None:
+ self._qmp.accept()
while not os.path.exists(self.pidfile):
if self._p.poll() is not None:
cmd = ' '.join(all_args)
@@ -370,11 +387,24 @@ class QemuStorageDaemon:
assert self._pid == self._p.pid
+ def qmp(self, cmd: str, args: Optional[Dict[str, object]] = None) \
+ -> QMPMessage:
+ assert self._qmp is not None
+ return self._qmp.cmd(cmd, args)
+
def stop(self, kill_signal=15):
self._p.send_signal(kill_signal)
self._p.wait()
self._p = None
+ if self._qmp:
+ self._qmp.close()
+
+ if self._qmpsock is not None:
+ try:
+ os.remove(self._qmpsock)
+ except OSError:
+ pass
try:
os.remove(self.pidfile)
except OSError:
diff --git a/tests/qemu-iotests/tests/graph-changes-while-io b/tests/qemu-iotests/tests/graph-changes-while-io
new file mode 100755
index 0000000000..567e8cf21e
--- /dev/null
+++ b/tests/qemu-iotests/tests/graph-changes-while-io
@@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+# group: rw
+#
+# Test graph changes while I/O is happening
+#
+# Copyright (C) 2022 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+import os
+from threading import Thread
+import iotests
+from iotests import imgfmt, qemu_img, qemu_img_create, QMPTestCase, \
+ QemuStorageDaemon
+
+
+top = os.path.join(iotests.test_dir, 'top.img')
+nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock')
+
+
+def do_qemu_img_bench() -> None:
+ """
+ Do some I/O requests on `nbd_sock`.
+ """
+ assert qemu_img('bench', '-f', 'raw', '-c', '2000000',
+ f'nbd+unix:///node0?socket={nbd_sock}') == 0
+
+
+class TestGraphChangesWhileIO(QMPTestCase):
+ def setUp(self) -> None:
+ # Create an overlay that can be added at runtime on top of the
+ # null-co block node that will receive I/O
+ assert qemu_img_create('-f', imgfmt, '-F', 'raw', '-b', 'null-co://',
+ top) == 0
+
+ # QSD instance with a null-co block node in an I/O thread,
+ # exported over NBD (on `nbd_sock`, export name "node0")
+ self.qsd = QemuStorageDaemon(
+ '--object', 'iothread,id=iothread0',
+ '--blockdev', 'null-co,node-name=node0,read-zeroes=true',
+ '--nbd-server', f'addr.type=unix,addr.path={nbd_sock}',
+ '--export', 'nbd,id=exp0,node-name=node0,iothread=iothread0,' +
+ 'fixed-iothread=true,writable=true',
+ qmp=True
+ )
+
+ def tearDown(self) -> None:
+ self.qsd.stop()
+
+ def test_blockdev_add_while_io(self) -> None:
+ # Run qemu-img bench in the background
+ bench_thr = Thread(target=do_qemu_img_bench)
+ bench_thr.start()
+
+ # While qemu-img bench is running, repeatedly add and remove an
+ # overlay to/from node0
+ while bench_thr.is_alive():
+ result = self.qsd.qmp('blockdev-add', {
+ 'driver': imgfmt,
+ 'node-name': 'overlay',
+ 'backing': 'node0',
+ 'file': {
+ 'driver': 'file',
+ 'filename': top
+ }
+ })
+ self.assert_qmp(result, 'return', {})
+
+ result = self.qsd.qmp('blockdev-del', {
+ 'node-name': 'overlay'
+ })
+ self.assert_qmp(result, 'return', {})
+
+ bench_thr.join()
+
+if __name__ == '__main__':
+ # Format must support raw backing files
+ iotests.main(supported_fmts=['qcow', 'qcow2', 'qed'],
+ supported_protocols=['file'])
diff --git a/tests/qemu-iotests/tests/graph-changes-while-io.out b/tests/qemu-iotests/tests/graph-changes-while-io.out
new file mode 100644
index 0000000000..ae1213e6f8
--- /dev/null
+++ b/tests/qemu-iotests/tests/graph-changes-while-io.out
@@ -0,0 +1,5 @@
+.
+----------------------------------------------------------------------
+Ran 1 tests
+
+OK
diff --git a/tests/unit/rcutorture.c b/tests/unit/rcutorture.c
index de6f649058..495a4e6f42 100644
--- a/tests/unit/rcutorture.c
+++ b/tests/unit/rcutorture.c
@@ -122,7 +122,7 @@ static void *rcu_read_perf_test(void *arg)
rcu_register_thread();
- *(struct rcu_reader_data **)arg = &rcu_reader;
+ *(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
qatomic_inc(&nthreadsrunning);
while (goflag == GOFLAG_INIT) {
g_usleep(1000);
@@ -148,7 +148,7 @@ static void *rcu_update_perf_test(void *arg)
rcu_register_thread();
- *(struct rcu_reader_data **)arg = &rcu_reader;
+ *(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
qatomic_inc(&nthreadsrunning);
while (goflag == GOFLAG_INIT) {
g_usleep(1000);
@@ -253,7 +253,7 @@ static void *rcu_read_stress_test(void *arg)
rcu_register_thread();
- *(struct rcu_reader_data **)arg = &rcu_reader;
+ *(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
while (goflag == GOFLAG_INIT) {
g_usleep(1000);
}
@@ -304,7 +304,7 @@ static void *rcu_update_stress_test(void *arg)
struct rcu_stress *cp = qatomic_read(&rcu_stress_current);
rcu_register_thread();
- *(struct rcu_reader_data **)arg = &rcu_reader;
+ *(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
while (goflag == GOFLAG_INIT) {
g_usleep(1000);
@@ -347,7 +347,7 @@ static void *rcu_fake_update_stress_test(void *arg)
{
rcu_register_thread();
- *(struct rcu_reader_data **)arg = &rcu_reader;
+ *(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
while (goflag == GOFLAG_INIT) {
g_usleep(1000);
}
diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c
index aea660aeed..94718c9319 100644
--- a/tests/unit/test-block-iothread.c
+++ b/tests/unit/test-block-iothread.c
@@ -279,10 +279,10 @@ static void test_sync_op_check(BdrvChild *c)
g_assert_cmpint(ret, ==, -ENOTSUP);
}
-static void test_sync_op_invalidate_cache(BdrvChild *c)
+static void test_sync_op_activate(BdrvChild *c)
{
/* Early success: Image is not inactive */
- bdrv_invalidate_cache(c->bs, NULL);
+ bdrv_activate(c->bs, NULL);
}
@@ -325,8 +325,8 @@ const SyncOpTest sync_op_tests[] = {
.name = "/sync-op/check",
.fn = test_sync_op_check,
}, {
- .name = "/sync-op/invalidate_cache",
- .fn = test_sync_op_invalidate_cache,
+ .name = "/sync-op/activate",
+ .fn = test_sync_op_activate,
},
};
diff --git a/tests/unit/test-rcu-list.c b/tests/unit/test-rcu-list.c
index 49641e1936..64b81ae058 100644
--- a/tests/unit/test-rcu-list.c
+++ b/tests/unit/test-rcu-list.c
@@ -171,7 +171,7 @@ static void *rcu_q_reader(void *arg)
rcu_register_thread();
- *(struct rcu_reader_data **)arg = &rcu_reader;
+ *(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
qatomic_inc(&nthreadsrunning);
while (qatomic_read(&goflag) == GOFLAG_INIT) {
g_usleep(1000);
@@ -206,7 +206,7 @@ static void *rcu_q_updater(void *arg)
long long n_removed_local = 0;
struct list_element *el, *prev_el;
- *(struct rcu_reader_data **)arg = &rcu_reader;
+ *(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
qatomic_inc(&nthreadsrunning);
while (qatomic_read(&goflag) == GOFLAG_INIT) {
g_usleep(1000);
diff --git a/util/async.c b/util/async.c
index 08d25feef5..2ea1172f3e 100644
--- a/util/async.c
+++ b/util/async.c
@@ -32,6 +32,7 @@
#include "qemu/rcu_queue.h"
#include "block/raw-aio.h"
#include "qemu/coroutine_int.h"
+#include "qemu/coroutine-tls.h"
#include "trace.h"
/***********************************************************/
@@ -675,12 +676,13 @@ void aio_context_release(AioContext *ctx)
qemu_rec_mutex_unlock(&ctx->lock);
}
-static __thread AioContext *my_aiocontext;
+QEMU_DEFINE_STATIC_CO_TLS(AioContext *, my_aiocontext)
AioContext *qemu_get_current_aio_context(void)
{
- if (my_aiocontext) {
- return my_aiocontext;
+ AioContext *ctx = get_my_aiocontext();
+ if (ctx) {
+ return ctx;
}
if (qemu_mutex_iothread_locked()) {
/* Possibly in a vCPU thread. */
@@ -691,6 +693,6 @@ AioContext *qemu_get_current_aio_context(void)
void qemu_set_current_aio_context(AioContext *ctx)
{
- assert(!my_aiocontext);
- my_aiocontext = ctx;
+ assert(!get_my_aiocontext());
+ set_my_aiocontext(ctx);
}
diff --git a/util/rcu.c b/util/rcu.c
index c91da9f137..b6d6c71cff 100644
--- a/util/rcu.c
+++ b/util/rcu.c
@@ -65,7 +65,7 @@ static inline int rcu_gp_ongoing(unsigned long *ctr)
/* Written to only by each individual reader. Read by both the reader and the
* writers.
*/
-__thread struct rcu_reader_data rcu_reader;
+QEMU_DEFINE_CO_TLS(struct rcu_reader_data, rcu_reader)
/* Protected by rcu_registry_lock. */
typedef QLIST_HEAD(, rcu_reader_data) ThreadList;
@@ -355,23 +355,23 @@ void drain_call_rcu(void)
void rcu_register_thread(void)
{
- assert(rcu_reader.ctr == 0);
+ assert(get_ptr_rcu_reader()->ctr == 0);
qemu_mutex_lock(&rcu_registry_lock);
- QLIST_INSERT_HEAD(&registry, &rcu_reader, node);
+ QLIST_INSERT_HEAD(&registry, get_ptr_rcu_reader(), node);
qemu_mutex_unlock(&rcu_registry_lock);
}
void rcu_unregister_thread(void)
{
qemu_mutex_lock(&rcu_registry_lock);
- QLIST_REMOVE(&rcu_reader, node);
+ QLIST_REMOVE(get_ptr_rcu_reader(), node);
qemu_mutex_unlock(&rcu_registry_lock);
}
void rcu_add_force_rcu_notifier(Notifier *n)
{
qemu_mutex_lock(&rcu_registry_lock);
- notifier_list_add(&rcu_reader.force_rcu, n);
+ notifier_list_add(&get_ptr_rcu_reader()->force_rcu, n);
qemu_mutex_unlock(&rcu_registry_lock);
}